diff --git a/app/src/main/java/io/legado/app/constant/AppPattern.kt b/app/src/main/java/io/legado/app/constant/AppPattern.kt index b664fee7c..450347735 100644 --- a/app/src/main/java/io/legado/app/constant/AppPattern.kt +++ b/app/src/main/java/io/legado/app/constant/AppPattern.kt @@ -5,14 +5,14 @@ import java.util.regex.Pattern @Suppress("RegExpRedundantEscape") object AppPattern { val JS_PATTERN: Pattern = - Pattern.compile("([\\w\\W]+?)|@js:([\\w\\W]*)", Pattern.CASE_INSENSITIVE) + Pattern.compile("([\\w\\W]*?|@js:[\\w\\W]*$)", Pattern.CASE_INSENSITIVE) val EXP_PATTERN: Pattern = Pattern.compile("\\{\\{([\\w\\W]*?)\\}\\}") - - //匹配格式化后的图片格式 - val imgPattern: Pattern = Pattern.compile("]+)\">") + //图片有data-开头的数据属性时优先用数据属性作为src,没有数据属性时才匹配src + val imgPattern: Pattern = + Pattern.compile("]*data-)[^>]*src|[^>]*data-)[^=]*= *\"([^\"{]+(?:\\{(?:[^{}]|\\{[^{}]*\\})*\\})?)\"[^>]*>", Pattern.CASE_INSENSITIVE) val nameRegex = Regex("\\s+作\\s*者.*|\\s+\\S+\\s+著") val authorRegex = Regex("^.*?作\\s*者[::\\s]*|\\s+著") val fileNameRegex = Regex("[\\\\/:*?\"<>|.]") val splitGroupRegex = Regex("[,;,;]") -} \ No newline at end of file +} diff --git a/app/src/main/java/io/legado/app/data/entities/BookChapter.kt b/app/src/main/java/io/legado/app/data/entities/BookChapter.kt index 67bba2e5d..bdf56417c 100644 --- a/app/src/main/java/io/legado/app/data/entities/BookChapter.kt +++ b/app/src/main/java/io/legado/app/data/entities/BookChapter.kt @@ -5,6 +5,7 @@ import androidx.room.Entity import androidx.room.ForeignKey import androidx.room.Ignore import androidx.room.Index +import io.legado.app.model.analyzeRule.AnalyzeUrl import io.legado.app.utils.GSON import io.legado.app.utils.MD5Utils import io.legado.app.utils.NetworkUtils @@ -12,32 +13,33 @@ import io.legado.app.utils.fromJsonObject import kotlinx.parcelize.IgnoredOnParcel import kotlinx.parcelize.Parcelize + @Parcelize @Entity( - tableName = "chapters", - primaryKeys = ["url", "bookUrl"], - indices = [(Index(value = ["bookUrl"], unique = false)), - (Index(value = ["bookUrl", "index"], unique = true))], - foreignKeys = [(ForeignKey( - entity = Book::class, - parentColumns = ["bookUrl"], - childColumns = ["bookUrl"], - onDelete = ForeignKey.CASCADE - ))] + tableName = "chapters", + primaryKeys = ["url", "bookUrl"], + indices = [(Index(value = ["bookUrl"], unique = false)), + (Index(value = ["bookUrl", "index"], unique = true))], + foreignKeys = [(ForeignKey( + entity = Book::class, + parentColumns = ["bookUrl"], + childColumns = ["bookUrl"], + onDelete = ForeignKey.CASCADE + ))] ) // 删除书籍时自动删除章节 data class BookChapter( - var url: String = "", // 章节地址 - var title: String = "", // 章节标题 - var baseUrl: String = "", //用来拼接相对url - var bookUrl: String = "", // 书籍地址 - var index: Int = 0, // 章节序号 - var resourceUrl: String? = null, // 音频真实URL - var tag: String? = null, // - var start: Long? = null, // 章节起始位置 - var end: Long? = null, // 章节终止位置 - var startFragmentId: String? = null, //EPUB书籍当前章节的fragmentId - var endFragmentId: String? = null, //EPUB书籍下一章节的fragmentId - var variable: String? = null //变量 + var url: String = "", // 章节地址 + var title: String = "", // 章节标题 + var baseUrl: String = "", //用来拼接相对url + var bookUrl: String = "", // 书籍地址 + var index: Int = 0, // 章节序号 + var resourceUrl: String? = null, // 音频真实URL + var tag: String? = null, // + var start: Long? = null, // 章节起始位置 + var end: Long? = null, // 章节终止位置 + var startFragmentId: String? = null, //EPUB书籍当前章节的fragmentId + var endFragmentId: String? = null, //EPUB书籍下一章节的fragmentId + var variable: String? = null //变量 ) : Parcelable { @delegate:Transient @@ -61,13 +63,13 @@ data class BookChapter( return false } - fun getAbsoluteURL():String{ - val pos = url.indexOf(',') - return if(pos == -1) NetworkUtils.getAbsoluteURL(baseUrl,url) - else NetworkUtils.getAbsoluteURL( - baseUrl, - url.substring(0, pos) - ) + url.substring(pos) + fun getAbsoluteURL(): String { + val urlArray = url.split(AnalyzeUrl.splitUrlRegex) + var absoluteUrl = NetworkUtils.getAbsoluteURL(baseUrl, urlArray[0]) + if (urlArray.size > 1) { + absoluteUrl = "$absoluteUrl,${urlArray[1]}" + } + return absoluteUrl } fun getFileName(): String = String.format("%05d-%s.nb", index, MD5Utils.md5Encode16(title)) diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt index 3efbd2f02..57b3d25a1 100644 --- a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt +++ b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt @@ -25,9 +25,7 @@ import kotlin.collections.HashMap @Keep @Suppress("unused", "RegExpRedundantEscape") class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { - - var book: BaseBook? = if (ruleData is BaseBook) ruleData else null - + var book: BaseBook? = null var chapter: BookChapter? = null var nextChapterUrl: String? = null var content: Any? = null @@ -44,11 +42,18 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { private var objectChangedJS = false private var objectChangedJP = false + init { + if (ruleData is BaseBook) { + book = ruleData + } + } + @JvmOverloads fun setContent(content: Any?, baseUrl: String? = null): AnalyzeRule { - if (content == null) throw AssertionError("内容不可空(Content cannot be null)") + if (content == null) throw AssertionError("Content cannot be null") this.content = content setBaseUrl(baseUrl) + isJSON = content.toString().isJson() objectChangedXP = true objectChangedJS = true objectChangedJP = true @@ -64,8 +69,7 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { fun setRedirectUrl(url: String): URL? { kotlin.runCatching { - val pos = url.indexOf(',') - redirectUrl = URL( if(pos == -1) url else url.substring(0,pos)) + redirectUrl = URL(url.split(AnalyzeUrl.splitUrlRegex, 1)[0]) } return redirectUrl } @@ -192,9 +196,9 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { @JvmOverloads fun getString( - ruleList: List, - isUrl: Boolean = false, - value: String? = null + ruleList: List, + isUrl: Boolean = false, + value: String? = null ): String { var result: Any? = value val content = this.content @@ -258,8 +262,8 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { result?.let { result = when (sourceRule.mode) { Mode.Regex -> AnalyzeByRegex.getElement( - result.toString(), - sourceRule.rule.splitNotBlank("&&") + result.toString(), + sourceRule.rule.splitNotBlank("&&") ) Mode.Js -> evalJS(sourceRule.rule, it) Mode.Json -> getAnalyzeByJSonPath(it).getObject(sourceRule.rule) @@ -289,8 +293,8 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { result?.let { result = when (sourceRule.mode) { Mode.Regex -> AnalyzeByRegex.getElements( - result.toString(), - sourceRule.rule.splitNotBlank("&&") + result.toString(), + sourceRule.rule.splitNotBlank("&&") ) Mode.Js -> evalJS(sourceRule.rule, result) Mode.Json -> getAnalyzeByJSonPath(it).getList(sourceRule.rule) @@ -356,72 +360,58 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { /** * 分解规则生成规则列表 */ - fun splitSourceRule(ruleStr: String?): List { - if (ruleStr.isNullOrEmpty()) return ArrayList() + fun splitSourceRule(ruleStr: String?, mode: Mode = Mode.Default): List { + var vRuleStr = ruleStr val ruleList = ArrayList() + if (vRuleStr.isNullOrEmpty()) return ruleList //检测Mode - var mMode: Mode = Mode.Default - fun mode(ruleStr0:String)=when { - ruleStr0.startsWith("@@") -> { - mMode = Mode.Default - ruleStr0.substring(2) + var mMode: Mode = mode + when { + vRuleStr.startsWith("@@") -> { + vRuleStr = vRuleStr.substring(2) } - ruleStr0.startsWith("@XPath:", true) -> { + vRuleStr.startsWith("@XPath:", true) -> { mMode = Mode.XPath - ruleStr0.substring(7) + vRuleStr = vRuleStr.substring(7) } - ruleStr0.startsWith("/") -> {//XPath特征很明显,无需配置单独的识别标头 - mMode = Mode.XPath - ruleStr0 - } - ruleStr0.startsWith("@Json:", true) -> { + vRuleStr.startsWith("@Json:", true) -> { mMode = Mode.Json - ruleStr0.substring(6) + vRuleStr = vRuleStr.substring(6) } - ruleStr0.startsWith(":") -> { //:与伪类选择器冲突,改成?更合理 + vRuleStr.startsWith(":") -> { mMode = Mode.Regex isRegex = true - ruleStr0.substring(1) - } - ( ruleStr0[1] == '.' || ruleStr0[1] == '[') && ruleStr0[0] == '$' || content.toString().isJson() -> { - mMode = Mode.Json - ruleStr0 - } - else -> { - mMode = Mode.Default - ruleStr0 + vRuleStr = vRuleStr.substring(1) } + isRegex -> mMode = Mode.Regex + isJSON -> mMode = Mode.Json } //拆分为规则列表 var start = 0 var tmp: String - val jsMatcher = JS_PATTERN.matcher(ruleStr) - - while (jsMatcher.find()){ + val jsMatcher = JS_PATTERN.matcher(vRuleStr) + while (jsMatcher.find()) { if (jsMatcher.start() > start) { - tmp = ruleStr.substring(start, jsMatcher.start()).trim { it <= ' ' } - if (tmp.isNotEmpty()) { - ruleList.add(SourceRule(mode(tmp), mMode)) + tmp = vRuleStr.substring(start, jsMatcher.start()).trim { it <= ' ' } + if (!TextUtils.isEmpty(tmp)) { + ruleList.add(SourceRule(tmp, mMode)) } } - ruleList.add(SourceRule(jsMatcher.group(2)?:jsMatcher.group(1), Mode.Js)) + ruleList.add(SourceRule(jsMatcher.group(), Mode.Js)) start = jsMatcher.end() } - - if (ruleStr.length > start){ - tmp = ruleStr.substring(start).trim { it <= ' ' } - if (tmp.isNotEmpty()) { - ruleList.add(SourceRule(mode(tmp), mMode)) + if (vRuleStr.length > start) { + tmp = vRuleStr.substring(start).trim { it <= ' ' } + if (!TextUtils.isEmpty(tmp)) { + ruleList.add(SourceRule(tmp, mMode)) } } - return ruleList } /** * 规则类 */ - inner class SourceRule internal constructor(ruleStr: String, mainMode: Mode = Mode.Default) { internal var mode: Mode internal var rule: String @@ -437,24 +427,62 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { init { this.mode = mainMode + if (mode == Mode.Js) { + rule = if (ruleStr.startsWith("")) { + ruleStr.substring(4, ruleStr.lastIndexOf("<")) + } else { + ruleStr.substring(4) + } + } else { + when { + ruleStr.startsWith("@CSS:", true) -> { + mode = Mode.Default + rule = ruleStr + } + ruleStr.startsWith("@@") -> { + mode = Mode.Default + rule = ruleStr.substring(2) + } + ruleStr.startsWith("@XPath:", true) -> { + mode = Mode.XPath + rule = ruleStr.substring(7) + } + ruleStr.startsWith("//") -> {//XPath特征很明显,无需配置单独的识别标头 + mode = Mode.XPath + rule = ruleStr + } + ruleStr.startsWith("@Json:", true) -> { + mode = Mode.Json + rule = ruleStr.substring(6) + } + ruleStr.startsWith("$.") -> { + mode = Mode.Json + rule = ruleStr + } + else -> rule = ruleStr + } + } //分离put - rule = splitPutRule(ruleStr, putMap) + rule = splitPutRule(rule, putMap) //@get,{{ }}, 拆分 var start = 0 var tmp: String val evalMatcher = evalPattern.matcher(rule) - - if(evalMatcher.find()){ - - var modeX = mode == Mode.Js || mode == Mode.Regex - if (evalMatcher.start() > 0 ) { - tmp = rule.substring(0, evalMatcher.start()) - modeX = modeX || tmp.contains("##") + while (evalMatcher.find()) { + if (evalMatcher.start() > start) { + tmp = rule.substring(start, evalMatcher.start()) + if (mode != Mode.Js && mode != Mode.Regex + && start == 0 && !tmp.contains("##") + ) { + mode = Mode.Regex + } splitRegex(tmp) + } else if (mode != Mode.Js && mode != Mode.Regex + && evalMatcher.start() == 0 + ) { + mode = Mode.Regex } - if(!modeX)mode = Mode.Regex tmp = evalMatcher.group() - when { tmp.startsWith("@get:", true) -> { ruleType.add(getRuleType) @@ -468,32 +496,8 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { splitRegex(tmp) } } - start = evalMatcher.end() - - while (evalMatcher.find()){ - if (evalMatcher.start() > start) { - tmp = rule.substring(start, evalMatcher.start()) - splitRegex(tmp) - } - tmp = evalMatcher.group() - when { - tmp.startsWith("@get:", true) -> { - ruleType.add(getRuleType) - ruleParam.add(tmp.substring(6, tmp.lastIndex)) - } - tmp.startsWith("{{") -> { - ruleType.add(jsRuleType) - ruleParam.add(tmp.substring(2, tmp.length - 2)) - } - else -> { - splitRegex(tmp) - } - } - start = evalMatcher.end() - } } - if (rule.length > start) { tmp = rule.substring(start) splitRegex(tmp) @@ -508,22 +512,19 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { var tmp: String val ruleStrArray = ruleStr.split("##") val regexMatcher = regexPattern.matcher(ruleStrArray[0]) - - if(regexMatcher.find()) { + while (regexMatcher.find()) { if (mode != Mode.Js && mode != Mode.Regex) { mode = Mode.Regex } - do{ - if (regexMatcher.start() > start) { - tmp = ruleStr.substring(start, regexMatcher.start()) - ruleType.add(defaultRuleType) - ruleParam.add(tmp) - } - tmp = regexMatcher.group() - ruleType.add(tmp.substring(1).toInt()) + if (regexMatcher.start() > start) { + tmp = ruleStr.substring(start, regexMatcher.start()) + ruleType.add(defaultRuleType) ruleParam.add(tmp) - start = regexMatcher.end() - }while (regexMatcher.find()) + } + tmp = regexMatcher.group() + ruleType.add(tmp.substring(1).toInt()) + ruleParam.add(tmp) + start = regexMatcher.end() } if (ruleStr.length > start) { tmp = ruleStr.substring(start) @@ -566,8 +567,8 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { jsEval == null -> Unit jsEval is String -> infoVal.insert(0, jsEval) jsEval is Double && jsEval % 1.0 == 0.0 -> infoVal.insert( - 0, - String.format("%.0f", jsEval) + 0, + String.format("%.0f", jsEval) ) else -> infoVal.insert(0, jsEval.toString()) } @@ -614,8 +615,8 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { fun put(key: String, value: String): String { chapter?.putVariable(key, value) - ?: book?.putVariable(key, value) - ?: ruleData.putVariable(key, value) + ?: book?.putVariable(key, value) + ?: ruleData.putVariable(key, value) return value } @@ -629,9 +630,9 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { } } return chapter?.variableMap?.get(key) - ?: book?.variableMap?.get(key) - ?: ruleData.variableMap[key] - ?: "" + ?: book?.variableMap?.get(key) + ?: ruleData.variableMap[key] + ?: "" } /** @@ -682,8 +683,9 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { companion object { private val putPattern = Pattern.compile("@put:(\\{[^}]+?\\})", Pattern.CASE_INSENSITIVE) + private val getPattern = Pattern.compile("@get:\\{([^}]+?)\\}", Pattern.CASE_INSENSITIVE) private val evalPattern = - Pattern.compile("@get:\\{[^}]+?\\}|\\{\\{[\\w\\W]*?\\}\\}", Pattern.CASE_INSENSITIVE) + Pattern.compile("@get:\\{[^}]+?\\}|\\{\\{[\\w\\W]*?\\}\\}", Pattern.CASE_INSENSITIVE) private val regexPattern = Pattern.compile("\\$\\d{1,2}") private val titleNumPattern = Pattern.compile("(第)(.+?)(章)") } diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt index 919f970e9..73e90e782 100644 --- a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt +++ b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt @@ -1,11 +1,13 @@ package io.legado.app.model.analyzeRule import android.annotation.SuppressLint +import android.text.TextUtils import androidx.annotation.Keep import com.bumptech.glide.load.model.GlideUrl import com.bumptech.glide.load.model.LazyHeaders import io.legado.app.constant.AppConst.SCRIPT_ENGINE import io.legado.app.constant.AppConst.UA_NAME +import io.legado.app.constant.AppPattern.EXP_PATTERN import io.legado.app.constant.AppPattern.JS_PATTERN import io.legado.app.data.entities.BaseBook import io.legado.app.data.entities.BookChapter @@ -39,6 +41,7 @@ class AnalyzeUrl( headerMapF: Map? = null ) : JsExtensions { companion object { + val splitUrlRegex = Regex(",\\s*(?=\\{)") private val pagePattern = Pattern.compile("<(.*?)>") } @@ -55,8 +58,7 @@ class AnalyzeUrl( private var retry: Int = 0 init { - val pos = baseUrl.indexOf(',') - if(pos != -1)baseUrl = baseUrl.substring(0,pos) + baseUrl = baseUrl.split(splitUrlRegex, 1)[0] headerMapF?.let { headerMap.putAll(it) if (it.containsKey("proxy")) { @@ -72,24 +74,39 @@ class AnalyzeUrl( } private fun analyzeJs() { + val ruleList = arrayListOf() var start = 0 var tmp: String val jsMatcher = JS_PATTERN.matcher(ruleUrl) while (jsMatcher.find()) { if (jsMatcher.start() > start) { tmp = - ruleUrl.substring(start, jsMatcher.start()).trim { it <= ' ' } - if (tmp.isNotEmpty()) { - ruleUrl = tmp.replace("@result", ruleUrl) + ruleUrl.substring(start, jsMatcher.start()).replace("\n", "").trim { it <= ' ' } + if (!TextUtils.isEmpty(tmp)) { + ruleList.add(tmp) } } - ruleUrl = evalJS(jsMatcher.group(2)?:jsMatcher.group(1), ruleUrl) as String + ruleList.add(jsMatcher.group()) start = jsMatcher.end() } if (ruleUrl.length > start) { - tmp = ruleUrl.substring(start).trim { it <= ' ' } - if (tmp.isNotEmpty()) { - ruleUrl = tmp.replace("@result", ruleUrl) + tmp = ruleUrl.substring(start).replace("\n", "").trim { it <= ' ' } + if (!TextUtils.isEmpty(tmp)) { + ruleList.add(tmp) + } + } + for (rule in ruleList) { + var ruleStr = rule + when { + ruleStr.startsWith("") -> { + ruleStr = ruleStr.substring(4, ruleStr.lastIndexOf("<")) + ruleUrl = evalJS(ruleStr, ruleUrl) as String + } + ruleStr.startsWith("@js", true) -> { + ruleStr = ruleStr.substring(4) + ruleUrl = evalJS(ruleStr, ruleUrl) as String + } + else -> ruleUrl = ruleStr.replace("@result", ruleUrl) } } } @@ -97,12 +114,23 @@ class AnalyzeUrl( /** * 替换关键字,页数,JS */ - private fun replaceKeyPageJs() { //先替换内嵌规则再替换页数规则,避免内嵌规则中存在大于小于号时,规则被切错 + private fun replaceKeyPageJs() { + //page + page?.let { + val matcher = pagePattern.matcher(ruleUrl) + while (matcher.find()) { + val pages = matcher.group(1)!!.split(",") + ruleUrl = if (page <= pages.size) { + ruleUrl.replace(matcher.group(), pages[page - 1].trim { it <= ' ' }) + } else { + ruleUrl.replace(matcher.group(), pages.last().trim { it <= ' ' }) + } + } + } //js if (ruleUrl.contains("{{") && ruleUrl.contains("}}")) { - - val analyze = RuleAnalyzer(ruleUrl) //创建解析 - + var jsEval: Any + val sb = StringBuffer() val bindings = SimpleBindings() bindings["java"] = this bindings["cookie"] = CookieStore @@ -113,28 +141,21 @@ class AnalyzeUrl( bindings["speakText"] = speakText bindings["speakSpeed"] = speakSpeed bindings["book"] = book - - //替换所有内嵌{{js}} - val url = analyze.innerRule("{{","}}"){ - when(val jsEval = SCRIPT_ENGINE.eval(it, bindings)){ - is String -> jsEval - jsEval is Double && jsEval % 1.0 == 0.0 -> String.format("%.0f", jsEval) - else -> jsEval.toString() - } - } - if(url.isNotEmpty())ruleUrl = url - } - //page - page?.let { - val matcher = pagePattern.matcher(ruleUrl) - while (matcher.find()) { - val pages = matcher.group(1)!!.split(",") - ruleUrl = if (page < pages.size) { //pages[pages.size - 1]等同于pages.last() - ruleUrl.replace(matcher.group(), pages[page - 1].trim { it <= ' ' }) + val expMatcher = EXP_PATTERN.matcher(ruleUrl) + while (expMatcher.find()) { + jsEval = expMatcher.group(1)?.let { + SCRIPT_ENGINE.eval(it, bindings) + } ?: "" + if (jsEval is String) { + expMatcher.appendReplacement(sb, jsEval) + } else if (jsEval is Double && jsEval % 1.0 == 0.0) { + expMatcher.appendReplacement(sb, String.format("%.0f", jsEval)) } else { - ruleUrl.replace(matcher.group(), pages.last().trim { it <= ' ' }) + expMatcher.appendReplacement(sb, jsEval.toString()) } } + expMatcher.appendTail(sb) + ruleUrl = sb.toString() } } @@ -142,20 +163,15 @@ class AnalyzeUrl( * 处理URL */ private fun initUrl() { - - var pos = ruleUrl.indexOf(',') - - urlHasQuery = if(pos == -1) ruleUrl else ruleUrl.substring(0,pos) - - url = NetworkUtils.getAbsoluteURL(baseUrl,urlHasQuery ) - + var urlArray = ruleUrl.split(splitUrlRegex, 2) + url = NetworkUtils.getAbsoluteURL(baseUrl, urlArray[0]) + urlHasQuery = urlArray[0] NetworkUtils.getBaseUrl(url)?.let { baseUrl = it } - - if(pos != -1 ) { - GSON.fromJsonObject(ruleUrl.substring(pos + 1).trim{ it < '!'})?.let { option -> - + if (urlArray.size > 1) { + val option = GSON.fromJsonObject(urlArray[1]) + option?.let { _ -> option.method?.let { if (it.equals("POST", true)) method = RequestMethod.POST } @@ -185,17 +201,16 @@ class AnalyzeUrl( retry = option.retry } } - headerMap[UA_NAME] ?: let { headerMap[UA_NAME] = AppConfig.userAgent } when (method) { RequestMethod.GET -> { if (!useWebView) { - pos = url.indexOf('?') - if(pos != -1) { - analyzeFields(url.substring(pos + 1)) - url = url.substring(0,pos) + urlArray = url.split("?") + url = urlArray[0] + if (urlArray.size > 1) { + analyzeFields(urlArray[1]) } } } @@ -218,7 +233,7 @@ class AnalyzeUrl( for (query in queryS) { val queryM = query.splitNotBlank("=") val value = if (queryM.size > 1) queryM[1] else "" - if (charset.isNullOrEmpty()) { + if (TextUtils.isEmpty(charset)) { if (NetworkUtils.hasUrlEncoded(value)) { fieldMap[queryM[0]] = value } else { diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/RuleAnalyzer.kt b/app/src/main/java/io/legado/app/model/analyzeRule/RuleAnalyzer.kt index 050625dfc..2b6462827 100644 --- a/app/src/main/java/io/legado/app/model/analyzeRule/RuleAnalyzer.kt +++ b/app/src/main/java/io/legado/app/model/analyzeRule/RuleAnalyzer.kt @@ -1,9 +1,5 @@ package io.legado.app.model.analyzeRule -import io.legado.app.utils.isJson -import java.util.ArrayList -import java.util.regex.Pattern - //通用的规则切分处理 class RuleAnalyzer(data: String, code: Boolean = false) { @@ -13,10 +9,12 @@ class RuleAnalyzer(data: String, code: Boolean = false) { private var start = 0 //当前处理字段的开始 private var startX = 0 //当前规则的开始 - private var rule = ArrayList() //分割出的规则列表 + private var rule = arrayOf() //分割出的规则列表 private var step: Int = 0 //分割字符的长度 var elementsType = "" //当前分割字符串 - var innerType = true //是否为内嵌{{}} + + //设置平衡组函数,json或JavaScript时设置成chompCodeBalanced,否则为chompRuleBalanced + val chompBalanced = if (code) ::chompCodeBalanced else ::chompRuleBalanced fun trim() { // 修剪当前规则之前的"@"或者空白符 if(queue[pos] == '@' || queue[pos] < '!') { //在while里重复设置start和startX会拖慢执行速度,所以先来个判断是否存在需要修剪的字段,最后再一次性设置start和startX @@ -34,8 +32,8 @@ class RuleAnalyzer(data: String, code: Boolean = false) { } /** - * 从剩余字串中拉出一个字符串,直到但不包括匹配序列 - * @param seq 查找的字符串 **区分大小写** + * 从剩余字串中拉出一个字符串,直到但不包括匹配序列,或剩余字串用完。 + * @param seq 分隔字符 **区分大小写** * @return 是否找到相应字段。 */ fun consumeTo(seq: String): Boolean { @@ -68,31 +66,6 @@ class RuleAnalyzer(data: String, code: Boolean = false) { pos++ //逐个试探 } - return false - } - /** - * 从剩余字串中拉出一个字符串,直到但不包括匹配序列(匹配参数列表中一项即为匹配),或剩余字串用完。 - * @param seq 匹配字符串序列 - * @return 成功返回true并设置间隔,失败则直接返回fasle - */ - fun chompToAny(vararg seq: String): Boolean { - var pos = pos //声明新变量记录匹配位置,不更改类本身的位置 - - while (pos != queue.length) { - - for (s in seq) { - if (queue.regionMatches(pos, s, 0, s.length)) { - rule += queue.substring(this.pos, pos) - pos += s.length //跳过分隔符 - ruleTypeList += s //追加类型到列表 - start = this.pos - this.pos = pos //匹配成功, 同步处理位置到类 - return true //匹配就返回 true - } - } - - pos++ //逐个试探 - } return false } @@ -117,6 +90,75 @@ class RuleAnalyzer(data: String, code: Boolean = false) { return -1 } + //其中js只要符合语法,就不用避开任何阅读关键字,自由发挥 + fun chompJsBalanced( + f: ((Char) -> Boolean?) = { + when (it) { + '{' -> true //开始嵌套一层 + '}' -> false //闭合一层嵌套 + else -> null + } + } + ): Boolean { + var pos = pos //声明变量记录临时处理位置 + var depth = 0 //嵌套深度 + var bracketsDepth = 0 //[]嵌套深度 + + var inSingleQuote = false //单引号 + var inDoubleQuote = false //双引号 + var inOtherQuote = false //js原始字串分隔字符 + var regex = false //正则 + var commit = false //单行注释 + var commits = false //多行注释 + + do { + if (pos == queue.length) break + var c = queue[pos++] + if (c != '\\') { //非转义字符 + if (c == '\'' && !commits && !commit && !regex && !inDoubleQuote && !inOtherQuote) inSingleQuote = + !inSingleQuote //匹配具有语法功能的单引号 + else if (c == '"' && !commits && !commit && !regex && !inSingleQuote && !inOtherQuote) inDoubleQuote = + !inDoubleQuote //匹配具有语法功能的双引号 + else if (c == '`' && !commits && !commit && !regex && !inSingleQuote && !inDoubleQuote) inOtherQuote = + !inOtherQuote //匹配具有语法功能的'`' + else if (c == '/' && !commits && !commit && !regex && !inSingleQuote && !inDoubleQuote && !inOtherQuote) { //匹配注释或正则起点 + c = queue[pos++] + when (c) { + '/' -> commit = true //匹配单行注释起点 + '*' -> commits = true //匹配多行注释起点 + else -> regex = true //匹配正则起点 + } + } else if (commits && c == '*') { //匹配多行注释终点 + c = queue[pos++] + if (c == '/') commits = false + } else if (regex && c == '/') { //正则的终点或[]平衡 + + when (c) { + '/' -> regex = false//匹配正则终点 + + //为了保证当open为( 且 close 为 )时,正则中[(]或[)]的合法性。故对[]这对在任何规则中都平衡的成对符号做匹配。 + // 注:正则里[(]、[)]、[{]、[}]都是合法的,所以只有[]必须平衡。 + '[' -> bracketsDepth++ //开始嵌套一层[] + ']' -> bracketsDepth-- //闭合一层嵌套[] + } + + } else if (c == '\n') commit = false + + if (commits || commit || regex || inSingleQuote || inDoubleQuote || inOtherQuote) continue //语法单元未匹配结束,直接进入下个循环 + + val fn = f(c) ?: continue + if (fn) depth++ else depth-- //嵌套或者闭合 + + } else pos++ + + } while (depth > 0 || bracketsDepth > 0) //拉出全部符合js语法的字段 + + return if (depth > 0 || bracketsDepth > 0) false else { + this.pos = pos //同步位置 + true + } + } + /** * 拉出一个非内嵌代码平衡组,存在转义文本 */ @@ -194,7 +236,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) { * 不用正则,不到最后不切片也不用中间变量存储,只在序列中标记当前查找字段的开头结尾,到返回时才切片,高效快速准确切割规则 * 解决jsonPath自带的"&&"和"||"与阅读的规则冲突,以及规则正则或字符串中包含"&&"、"||"、"%%"、"@"导致的冲突 */ - tailrec fun splitRule(vararg split: String): ArrayList { //首段匹配,elementsType为空 + tailrec fun splitRule(vararg split: String): Array { //首段匹配,elementsType为空 if (split.size == 1) { elementsType = split[0] //设置分割字串 @@ -218,7 +260,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) { if (st == -1) { - rule = arrayListOf(queue.substring(startX, end)) //压入分隔的首段规则到数组 + rule = arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组 elementsType = queue.substring(end, end + step) //设置组合类型 pos = end + step //跳过分隔符 @@ -235,7 +277,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) { if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组 - rule = arrayListOf(queue.substring(startX, end)) //压入分隔的首段规则到数组 + rule = arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组 elementsType = queue.substring(end, end + step) //设置组合类型 pos = end + step //跳过分隔符 @@ -269,7 +311,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) { } @JvmName("splitRuleNext") - private tailrec fun splitRule(): ArrayList { //二段匹配被调用,elementsType非空(已在首段赋值),直接按elementsType查找,比首段采用的方式更快 + private tailrec fun splitRule(): Array { //二段匹配被调用,elementsType非空(已在首段赋值),直接按elementsType查找,比首段采用的方式更快 val end = pos //记录分隔位置 pos = start //重回开始,启动另一种查找 @@ -294,7 +336,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) { if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组 - rule += arrayListOf(queue.substring(startX, end)) //压入分隔的首段规则到数组 + rule += arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组 pos = end + step //跳过分隔符 while (consumeTo(elementsType) && pos < st) { //循环切分规则压入数组 @@ -331,9 +373,9 @@ class RuleAnalyzer(data: String, code: Boolean = false) { /** * 替换内嵌规则 - * @param inner 起始标志,如{$. + * @param inner 起始标志,如{$. 或 {{ * @param startStep 不属于规则部分的前置字符长度,如{$.中{不属于规则的组成部分,故startStep为1 - * @param endStep 不属于规则部分的后置字符长度 + * @param endStep 不属于规则部分的后置字符长度,如}}长度为2 * @param fr 查找到内嵌规则时,用于解析的函数 * * */ @@ -343,13 +385,14 @@ class RuleAnalyzer(data: String, code: Boolean = false) { endStep: Int = 1, fr: (String) -> String? ): String { + val st = StringBuilder() while (consumeTo(inner)) { //拉取成功返回true,ruleAnalyzes里的字符序列索引变量pos后移相应位置,否则返回false,且isEmpty为true val posPre = pos //记录consumeTo匹配位置 if (chompCodeBalanced('{', '}')) { val frv = fr(queue.substring(posPre + startStep, pos - endStep)) - if (!frv.isNullOrEmpty()) { + if (frv != null) { st.append(queue.substring(startX, posPre) + frv) //压入内嵌规则前的内容,及内嵌规则解析得到的字符串 startX = pos //记录下次规则起点 continue //获取内容成功,继续选择下个内嵌规则 @@ -363,206 +406,64 @@ class RuleAnalyzer(data: String, code: Boolean = false) { }.toString() } - /** - * 替换内嵌规则 - * @param fr 查找到内嵌规则时,用于解析的函数 - * - * */ - fun innerRule( - startStr:String, - endStr:String, - fr: (String) -> String? - ): String { - - val st = StringBuilder() - while (consumeTo(startStr)) { //拉取成功返回true,ruleAnalyzes里的字符序列索引变量pos后移相应位置,否则返回false,且isEmpty为true - pos += startStr.length //跳过开始字符串 - val posPre = pos //记录consumeTo匹配位置 - if (consumeTo(endStr)) { - val frv = fr(queue.substring(posPre, pos)) - st.append(queue.substring(startX, posPre - startStr.length) + frv) //压入内嵌规则前的内容,及内嵌规则解析得到的字符串 - pos += endStr.length //跳过结束字符串 - startX = pos //记录下次规则起点 - } - } - - return if(startX == 0) queue else st.apply { - append(queue.substring(startX)) - }.toString() - } - - val ruleTypeList = ArrayList() - //设置平衡组函数,json或JavaScript时设置成chompCodeBalanced,否则为chompRuleBalanced - val chompBalanced = if (code) ::chompCodeBalanced else ::chompRuleBalanced - enum class Mode { - XPath, Json, Default, Js, Regex - } - /** - * 不用正则,不到最后不切片也不用中间变量存储,只在序列中标记当前查找字段的开头结尾,到返回时才切片,高效快速准确切割规则 - * 解决jsonPath自带的"&&"和"||"与阅读的规则冲突,以及规则正则或字符串中包含"&&"、"||"、"%%"、"@"导致的冲突 - */ - tailrec fun splitAnyRule(): ArrayList { //首段匹配,elementsType为空 - - if (!consumeToAny(* STARTSTR)) { //未找到分隔符 - rule += queue.substring(startX) - return rule - } - - val end = pos //记录分隔位置 - pos = start //重回开始,启动另一种查找 - - do { - val st = findToAny('[', '(') //查找筛选器位置 - - if (st == -1) { - - rule += arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组 - - ruleTypeList += queue.substring(end, end + step) //追加类型到类型列表 - pos = end + step //跳过分隔符 - - while (!chompToAny(elementsType)) { //循环切分规则压入数组 - rule += queue.substring(pos) //将剩余字段压入数组末尾 - return rule - } - } - - if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组 - - rule += arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组 - - ruleTypeList += queue.substring(end, end + step) //设置组合类型 - pos = end + step //跳过分隔符 - - while (!chompToAny(elementsType) && pos >= st) { //循环切分规则压入数组 - if (pos > st) { - startX = start - } else { //执行到此,证明后面再无分隔字符 - rule += queue.substring(pos) //将剩余字段压入数组末尾 - return rule - } - } - } - - pos = st //位置回到筛选器处 - val next = if (queue[pos] == '[') ']' else ')' //平衡组末尾字符 - - if (!chompBalanced(queue[pos], next)){ - ruleTypeList.clear() - rule.clear() - consumeToAny("","@js:") - rule += queue.substring(0,pos) - ruleTypeList += queue.substring(pos, pos + 4) //设置组合类型 - } - - } while (end > pos) - - start = pos //设置开始查找筛选器位置的起始位置 - - return splitAnyRule() //递归调用首段匹配 - } - - var isJSON = false - - var isUrl = false - var isUrlList = false - - var isMulu = false - var isreverse = false - var isAllInOne= false - - var isFind = false - private val findName = ArrayList() - - var replaceRegex = "" - var replacement = "" - var replaceFirst = false - val putMap = HashMap() - private val ruleParam = ArrayList() - private val ruleType = ArrayList() - private val getRuleType = -2 - private val jsRuleType = -1 - private val defaultRuleType = 0 - - @JvmOverloads - fun setContent(cont: String,type:String = ""): RuleAnalyzer { - queue = cont - when(type){ - "mulu" -> { - if(queue[0] =='-'){ //目录反转 - isreverse = true - startX++ - pos++ - }else if(queue[0] =='?'){ //AllInOne - isAllInOne = true - startX++ - pos++ - } - isMulu = true - } - "find" -> { - pos = queue.indexOf("::") - findName.add(queue.substring(startX,pos)) - pos+=2 - isFind = true - } - "url" -> { - - isUrl = true - } - "urlList" -> { - - isUrlList = true - } - else -> { - isJSON = queue.toString().isJson() - } - } - - return this - } - companion object { - /** * 转义字符 */ private const val ESC = '\\' + /** + * 阅读共有分隔字串起始部分 + * "##","@@","{{","{[","", "@js:" + */ + val splitList = arrayOf("##", "@@", "{{", "{[", "", "@js:") + + /** + * 发现‘名称-链接’分隔字串 + * "::" + */ + const val splitListFaXian = "::" + + /** + * 目录专有起始字符 + * "-" + */ + const val splitListMulu = "-" + + /** + * 结果为元素列表的 all in one 模式起始字符 + * "+" + */ + const val splitListTongYi = "+" - val validKeys = arrayOf("class", "id", "tag", "text", "children") + /** + * 结果为元素列表的项的同规则组合结构 + * "||","&&","%%" + */ + val splitListReSplit = arrayOf("||", "&&", "%%") /** - * 参数字符串 + * js脚本结束字串 + * "" */ - private val STARTSTRURL = arrayOf(",{",) + const val splitListEndJS = "" - private val regexPattern = Pattern.compile("\\$\\d{1,2}") - private val putPattern = Pattern.compile("@put:(\\{[^}]+?\\})", Pattern.CASE_INSENSITIVE) - private val getPattern = Pattern.compile("@get:\\{([^}]+?)\\}", Pattern.CASE_INSENSITIVE) - private val evalPattern = Pattern.compile("\\{\\{[\\w\\W]*?\\}\\}", Pattern.CASE_INSENSITIVE) + /** + *内嵌js结束字串 + * "}}" + */ + const val splitListEndInnerJS = "}}" - val ENDSTR= mapOf( - "" to "", - "{{" to "}}", - ) + /** + * 内嵌规则结束字串 + * "]}" + */ + const val splitListEndInnerRule = "]}" /** - * 规则起始字符串 + * '[', ']', '(', ')','{','}' */ - private val STARTSTR = arrayOf("@js:","","","##","@@","@" - ,"{{@", "{{","}}" - ,"}" - , "{@", "{/", "{$", "{?" - , "{class" - , "{id" - , "{tag" - , "{text" - , "{children" - ,"/","$","@xpath:","@json:","@css:" - ,"||", "&&", "%%" - ,"@get:{","@put:{" - ) + val splitListPublic = charArrayOf('[', ']', '(', ')', '{', '}') /** * '*',"/","//",":","::","@","|","@xpath:" diff --git a/app/src/main/java/io/legado/app/service/help/ReadBook.kt b/app/src/main/java/io/legado/app/service/help/ReadBook.kt index 62ab815c0..56a8f4bb7 100644 --- a/app/src/main/java/io/legado/app/service/help/ReadBook.kt +++ b/app/src/main/java/io/legado/app/service/help/ReadBook.kt @@ -415,22 +415,30 @@ object ReadBook { else -> chapter.title } val contents = contentProcessor!!.getContent(book, chapter.title, content) - val textChapter = ChapterProvider.getTextChapter(book, chapter,contents,chapterSize) - - val offset = chapter.index - durChapterIndex - if (upContent) callBack?.upContent(offset ,resetPageOffset) - when (offset) { - 0 -> { - curTextChapter = textChapter + when (chapter.index) { + durChapterIndex -> { + curTextChapter = + ChapterProvider.getTextChapter( + book, chapter, contents, chapterSize + ) + if (upContent) callBack?.upContent(resetPageOffset = resetPageOffset) callBack?.upView() curPageChanged() callBack?.contentLoadFinish() } - - 1 -> { - prevTextChapter = textChapter + durChapterIndex - 1 -> { + prevTextChapter = + ChapterProvider.getTextChapter( + book, chapter, contents, chapterSize + ) + if (upContent) callBack?.upContent(-1, resetPageOffset) } - 1 -> { - nextTextChapter = textChapter + durChapterIndex + 1 -> { + nextTextChapter = + ChapterProvider.getTextChapter( + book, chapter, contents, chapterSize + ) + if (upContent) callBack?.upContent(1, resetPageOffset) } } } diff --git a/app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt b/app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt index ca3643caa..814c8736d 100644 --- a/app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt +++ b/app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt @@ -115,7 +115,15 @@ object ChapterProvider { content.replace(AppPattern.imgPattern.toRegex(), "\n\$0\n") .split("\n").forEach { text -> if (text.isNotBlank()) { - if (!text.startsWith(" + durY = setTypeImage( + book, bookChapter, src, + durY, textPages, book.getImageStyle() + ) + } + } else { val isTitle = index == 0 val textPaint = if (isTitle) titlePaint else contentPaint if (!(isTitle && ReadBookConfig.titleMode == 2)) { @@ -124,11 +132,6 @@ object ChapterProvider { stringBuilder, isTitle, textPaint ) } - } else { //图片 - durY = setTypeImage( - book, bookChapter, text.substring(10, text.length-2), - durY, textPages, book.getImageStyle() - ) } } } @@ -147,10 +150,7 @@ object ChapterProvider { return TextChapter( bookChapter.index, bookChapter.title, - bookChapter.getAbsoluteURL().run{ - val pos = indexOf(',') - if(pos == -1) this else substring(0,pos) - }, + bookChapter.getAbsoluteURL().split(AnalyzeUrl.splitUrlRegex)[0], textPages, chapterSize ) } diff --git a/app/src/main/java/io/legado/app/utils/HtmlFormatter.kt b/app/src/main/java/io/legado/app/utils/HtmlFormatter.kt index e5ab0972d..129d5018c 100644 --- a/app/src/main/java/io/legado/app/utils/HtmlFormatter.kt +++ b/app/src/main/java/io/legado/app/utils/HtmlFormatter.kt @@ -1,7 +1,8 @@ package io.legado.app.utils +import io.legado.app.constant.AppPattern +import io.legado.app.model.analyzeRule.AnalyzeUrl import java.net.URL -import java.util.regex.Pattern object HtmlFormatter { private val wrapHtmlRegex = "]*>".toRegex() @@ -13,7 +14,7 @@ object HtmlFormatter { return html.replace(wrapHtmlRegex, "\n") .replace(otherRegex, "") .replace("\\s*\\n+\\s*".toRegex(), "\n  ") - .replace("^[\\n\\s]*".toRegex(), "  ") + .replace("^[\\n\\s]+".toRegex(), "  ") .replace("[\\n\\s]+$".toRegex(), "") } @@ -21,64 +22,22 @@ object HtmlFormatter { fun formatKeepImg(html: String?, redirectUrl: URL?): String { html ?: return "" - val keepImgHtml = html.replace(wrapHtmlRegex, "\n") - .replace(notImgHtmlRegex, "") - .replace("[\\n\\s]+\$|^[\\n\\s]*".toRegex(), "") - .replace("\\s*\\n+\\s*".toRegex(), "\n") - - val sb = StringBuffer("  ") //前置缩减 - val hasDataType:Boolean //是否有数据属性 - - //图片有data-开头的数据属性时优先用数据属性作为src,没有数据属性时匹配src - val imgPattern = Pattern.compile( - if(keepImgHtml.matches("]*data-".toRegex())) { - hasDataType = true - "]*data-[^=]*= *\"([^\"])\"[^>]*>" - } - else { - hasDataType = false - "]*src *= *\"([^\"{]+(?:\\{(?:[^{}]|\\{[^{}]*\\})*\\})?)\"[^>]*>" - }, Pattern.CASE_INSENSITIVE - ) - - val matcher = imgPattern.matcher(keepImgHtml) + val keepImgHtml = formatKeepImg(html) + val sb = StringBuffer() + val matcher = AppPattern.imgPattern.matcher(keepImgHtml) var appendPos = 0 - - if(matcher.find()){ - if(hasDataType || matcher.group(1)!!.indexOf(',') == -1) { //图片无参 - - do{ - sb.append(keepImgHtml.substring(appendPos, matcher.start()).replace("\n","\n  ")) //非图片部分换行缩减 - sb.append( "" ) - appendPos = matcher.end() - }while (matcher.find()) - - }else{ //图片有参 - - do{ - val url = matcher.group(1)!! - val pos = url.indexOf(',') - sb.append(keepImgHtml.substring(appendPos, matcher.start()).replace("\n","\n  ")) //非图片部分换行缩减 - sb.append( - "" - ) - appendPos = matcher.end() - }while(matcher.find()) - + while (matcher.find()) { + val urlArray = matcher.group(1)!!.split(AnalyzeUrl.splitUrlRegex) + var url = NetworkUtils.getAbsoluteURL(redirectUrl, urlArray[0]) + if (urlArray.size > 1) { + url = "$url,${urlArray[1]}" } + sb.append(keepImgHtml.substring(appendPos, matcher.start())) + sb.append("") + appendPos = matcher.end() } - if (appendPos < keepImgHtml.length) { - sb.append(keepImgHtml.substring(appendPos, keepImgHtml.length).replace("\n","\n  ")) //非图片部分换行缩减 + sb.append(keepImgHtml.substring(appendPos, keepImgHtml.length)) } return sb.toString() }