Revert "修复与优化"

pull/1122/head
kunfei 3 years ago committed by GitHub
parent 3c3be26874
commit f36d3a8949
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 8
      app/src/main/java/io/legado/app/constant/AppPattern.kt
  2. 14
      app/src/main/java/io/legado/app/data/entities/BookChapter.kt
  3. 158
      app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt
  4. 113
      app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt
  5. 347
      app/src/main/java/io/legado/app/model/analyzeRule/RuleAnalyzer.kt
  6. 30
      app/src/main/java/io/legado/app/service/help/ReadBook.kt
  7. 20
      app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt
  8. 71
      app/src/main/java/io/legado/app/utils/HtmlFormatter.kt

@ -5,11 +5,11 @@ import java.util.regex.Pattern
@Suppress("RegExpRedundantEscape") @Suppress("RegExpRedundantEscape")
object AppPattern { object AppPattern {
val JS_PATTERN: Pattern = val JS_PATTERN: Pattern =
Pattern.compile("<js>([\\w\\W]+?)</js>|@js:([\\w\\W]*)", Pattern.CASE_INSENSITIVE) Pattern.compile("(<js>[\\w\\W]*?</js>|@js:[\\w\\W]*$)", Pattern.CASE_INSENSITIVE)
val EXP_PATTERN: Pattern = Pattern.compile("\\{\\{([\\w\\W]*?)\\}\\}") val EXP_PATTERN: Pattern = Pattern.compile("\\{\\{([\\w\\W]*?)\\}\\}")
//图片有data-开头的数据属性时优先用数据属性作为src,没有数据属性时才匹配src
//匹配格式化后的图片格式 val imgPattern: Pattern =
val imgPattern: Pattern = Pattern.compile("<img src=\"([^>]+)\">") Pattern.compile("<img(?:(?![^>]*data-)[^>]*src|[^>]*data-)[^=]*= *\"([^\"{]+(?:\\{(?:[^{}]|\\{[^{}]*\\})*\\})?)\"[^>]*>", Pattern.CASE_INSENSITIVE)
val nameRegex = Regex("\\s+作\\s*者.*|\\s+\\S+\\s+著") val nameRegex = Regex("\\s+作\\s*者.*|\\s+\\S+\\s+著")
val authorRegex = Regex("^.*?作\\s*者[::\\s]*|\\s+著") val authorRegex = Regex("^.*?作\\s*者[::\\s]*|\\s+著")

@ -5,6 +5,7 @@ import androidx.room.Entity
import androidx.room.ForeignKey import androidx.room.ForeignKey
import androidx.room.Ignore import androidx.room.Ignore
import androidx.room.Index import androidx.room.Index
import io.legado.app.model.analyzeRule.AnalyzeUrl
import io.legado.app.utils.GSON import io.legado.app.utils.GSON
import io.legado.app.utils.MD5Utils import io.legado.app.utils.MD5Utils
import io.legado.app.utils.NetworkUtils import io.legado.app.utils.NetworkUtils
@ -12,6 +13,7 @@ import io.legado.app.utils.fromJsonObject
import kotlinx.parcelize.IgnoredOnParcel import kotlinx.parcelize.IgnoredOnParcel
import kotlinx.parcelize.Parcelize import kotlinx.parcelize.Parcelize
@Parcelize @Parcelize
@Entity( @Entity(
tableName = "chapters", tableName = "chapters",
@ -62,12 +64,12 @@ data class BookChapter(
} }
fun getAbsoluteURL(): String { fun getAbsoluteURL(): String {
val pos = url.indexOf(',') val urlArray = url.split(AnalyzeUrl.splitUrlRegex)
return if(pos == -1) NetworkUtils.getAbsoluteURL(baseUrl,url) var absoluteUrl = NetworkUtils.getAbsoluteURL(baseUrl, urlArray[0])
else NetworkUtils.getAbsoluteURL( if (urlArray.size > 1) {
baseUrl, absoluteUrl = "$absoluteUrl,${urlArray[1]}"
url.substring(0, pos) }
) + url.substring(pos) return absoluteUrl
} }
fun getFileName(): String = String.format("%05d-%s.nb", index, MD5Utils.md5Encode16(title)) fun getFileName(): String = String.format("%05d-%s.nb", index, MD5Utils.md5Encode16(title))

@ -25,9 +25,7 @@ import kotlin.collections.HashMap
@Keep @Keep
@Suppress("unused", "RegExpRedundantEscape") @Suppress("unused", "RegExpRedundantEscape")
class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
var book: BaseBook? = null
var book: BaseBook? = if (ruleData is BaseBook) ruleData else null
var chapter: BookChapter? = null var chapter: BookChapter? = null
var nextChapterUrl: String? = null var nextChapterUrl: String? = null
var content: Any? = null var content: Any? = null
@ -44,11 +42,18 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
private var objectChangedJS = false private var objectChangedJS = false
private var objectChangedJP = false private var objectChangedJP = false
init {
if (ruleData is BaseBook) {
book = ruleData
}
}
@JvmOverloads @JvmOverloads
fun setContent(content: Any?, baseUrl: String? = null): AnalyzeRule { fun setContent(content: Any?, baseUrl: String? = null): AnalyzeRule {
if (content == null) throw AssertionError("内容不可空(Content cannot be null)") if (content == null) throw AssertionError("Content cannot be null")
this.content = content this.content = content
setBaseUrl(baseUrl) setBaseUrl(baseUrl)
isJSON = content.toString().isJson()
objectChangedXP = true objectChangedXP = true
objectChangedJS = true objectChangedJS = true
objectChangedJP = true objectChangedJP = true
@ -64,8 +69,7 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
fun setRedirectUrl(url: String): URL? { fun setRedirectUrl(url: String): URL? {
kotlin.runCatching { kotlin.runCatching {
val pos = url.indexOf(',') redirectUrl = URL(url.split(AnalyzeUrl.splitUrlRegex, 1)[0])
redirectUrl = URL( if(pos == -1) url else url.substring(0,pos))
} }
return redirectUrl return redirectUrl
} }
@ -356,72 +360,58 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
/** /**
* 分解规则生成规则列表 * 分解规则生成规则列表
*/ */
fun splitSourceRule(ruleStr: String?): List<SourceRule> { fun splitSourceRule(ruleStr: String?, mode: Mode = Mode.Default): List<SourceRule> {
if (ruleStr.isNullOrEmpty()) return ArrayList<SourceRule>() var vRuleStr = ruleStr
val ruleList = ArrayList<SourceRule>() val ruleList = ArrayList<SourceRule>()
if (vRuleStr.isNullOrEmpty()) return ruleList
//检测Mode //检测Mode
var mMode: Mode = Mode.Default var mMode: Mode = mode
fun mode(ruleStr0:String)=when { when {
ruleStr0.startsWith("@@") -> { vRuleStr.startsWith("@@") -> {
mMode = Mode.Default vRuleStr = vRuleStr.substring(2)
ruleStr0.substring(2)
}
ruleStr0.startsWith("@XPath:", true) -> {
mMode = Mode.XPath
ruleStr0.substring(7)
} }
ruleStr0.startsWith("/") -> {//XPath特征很明显,无需配置单独的识别标头 vRuleStr.startsWith("@XPath:", true) -> {
mMode = Mode.XPath mMode = Mode.XPath
ruleStr0 vRuleStr = vRuleStr.substring(7)
} }
ruleStr0.startsWith("@Json:", true) -> { vRuleStr.startsWith("@Json:", true) -> {
mMode = Mode.Json mMode = Mode.Json
ruleStr0.substring(6) vRuleStr = vRuleStr.substring(6)
} }
ruleStr0.startsWith(":") -> { //:与伪类选择器冲突,改成?更合理 vRuleStr.startsWith(":") -> {
mMode = Mode.Regex mMode = Mode.Regex
isRegex = true isRegex = true
ruleStr0.substring(1) vRuleStr = vRuleStr.substring(1)
}
( ruleStr0[1] == '.' || ruleStr0[1] == '[') && ruleStr0[0] == '$' || content.toString().isJson() -> {
mMode = Mode.Json
ruleStr0
}
else -> {
mMode = Mode.Default
ruleStr0
} }
isRegex -> mMode = Mode.Regex
isJSON -> mMode = Mode.Json
} }
//拆分为规则列表 //拆分为规则列表
var start = 0 var start = 0
var tmp: String var tmp: String
val jsMatcher = JS_PATTERN.matcher(ruleStr) val jsMatcher = JS_PATTERN.matcher(vRuleStr)
while (jsMatcher.find()) { while (jsMatcher.find()) {
if (jsMatcher.start() > start) { if (jsMatcher.start() > start) {
tmp = ruleStr.substring(start, jsMatcher.start()).trim { it <= ' ' } tmp = vRuleStr.substring(start, jsMatcher.start()).trim { it <= ' ' }
if (tmp.isNotEmpty()) { if (!TextUtils.isEmpty(tmp)) {
ruleList.add(SourceRule(mode(tmp), mMode)) ruleList.add(SourceRule(tmp, mMode))
} }
} }
ruleList.add(SourceRule(jsMatcher.group(2)?:jsMatcher.group(1), Mode.Js)) ruleList.add(SourceRule(jsMatcher.group(), Mode.Js))
start = jsMatcher.end() start = jsMatcher.end()
} }
if (vRuleStr.length > start) {
if (ruleStr.length > start){ tmp = vRuleStr.substring(start).trim { it <= ' ' }
tmp = ruleStr.substring(start).trim { it <= ' ' } if (!TextUtils.isEmpty(tmp)) {
if (tmp.isNotEmpty()) { ruleList.add(SourceRule(tmp, mMode))
ruleList.add(SourceRule(mode(tmp), mMode))
} }
} }
return ruleList return ruleList
} }
/** /**
* 规则类 * 规则类
*/ */
inner class SourceRule internal constructor(ruleStr: String, mainMode: Mode = Mode.Default) { inner class SourceRule internal constructor(ruleStr: String, mainMode: Mode = Mode.Default) {
internal var mode: Mode internal var mode: Mode
internal var rule: String internal var rule: String
@ -437,44 +427,60 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
init { init {
this.mode = mainMode this.mode = mainMode
//分离put if (mode == Mode.Js) {
rule = splitPutRule(ruleStr, putMap) rule = if (ruleStr.startsWith("<js>")) {
//@get,{{ }}, 拆分 ruleStr.substring(4, ruleStr.lastIndexOf("<"))
var start = 0 } else {
var tmp: String ruleStr.substring(4)
val evalMatcher = evalPattern.matcher(rule)
if(evalMatcher.find()){
var modeX = mode == Mode.Js || mode == Mode.Regex
if (evalMatcher.start() > 0 ) {
tmp = rule.substring(0, evalMatcher.start())
modeX = modeX || tmp.contains("##")
splitRegex(tmp)
} }
if(!modeX)mode = Mode.Regex } else {
tmp = evalMatcher.group()
when { when {
tmp.startsWith("@get:", true) -> { ruleStr.startsWith("@CSS:", true) -> {
ruleType.add(getRuleType) mode = Mode.Default
ruleParam.add(tmp.substring(6, tmp.lastIndex)) rule = ruleStr
} }
tmp.startsWith("{{") -> { ruleStr.startsWith("@@") -> {
ruleType.add(jsRuleType) mode = Mode.Default
ruleParam.add(tmp.substring(2, tmp.length - 2)) rule = ruleStr.substring(2)
} }
else -> { ruleStr.startsWith("@XPath:", true) -> {
splitRegex(tmp) mode = Mode.XPath
rule = ruleStr.substring(7)
} }
ruleStr.startsWith("//") -> {//XPath特征很明显,无需配置单独的识别标头
mode = Mode.XPath
rule = ruleStr
} }
ruleStr.startsWith("@Json:", true) -> {
start = evalMatcher.end() mode = Mode.Json
rule = ruleStr.substring(6)
}
ruleStr.startsWith("$.") -> {
mode = Mode.Json
rule = ruleStr
}
else -> rule = ruleStr
}
}
//分离put
rule = splitPutRule(rule, putMap)
//@get,{{ }}, 拆分
var start = 0
var tmp: String
val evalMatcher = evalPattern.matcher(rule)
while (evalMatcher.find()) { while (evalMatcher.find()) {
if (evalMatcher.start() > start) { if (evalMatcher.start() > start) {
tmp = rule.substring(start, evalMatcher.start()) tmp = rule.substring(start, evalMatcher.start())
if (mode != Mode.Js && mode != Mode.Regex
&& start == 0 && !tmp.contains("##")
) {
mode = Mode.Regex
}
splitRegex(tmp) splitRegex(tmp)
} else if (mode != Mode.Js && mode != Mode.Regex
&& evalMatcher.start() == 0
) {
mode = Mode.Regex
} }
tmp = evalMatcher.group() tmp = evalMatcher.group()
when { when {
@ -492,8 +498,6 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
} }
start = evalMatcher.end() start = evalMatcher.end()
} }
}
if (rule.length > start) { if (rule.length > start) {
tmp = rule.substring(start) tmp = rule.substring(start)
splitRegex(tmp) splitRegex(tmp)
@ -508,12 +512,10 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
var tmp: String var tmp: String
val ruleStrArray = ruleStr.split("##") val ruleStrArray = ruleStr.split("##")
val regexMatcher = regexPattern.matcher(ruleStrArray[0]) val regexMatcher = regexPattern.matcher(ruleStrArray[0])
while (regexMatcher.find()) {
if(regexMatcher.find()) {
if (mode != Mode.Js && mode != Mode.Regex) { if (mode != Mode.Js && mode != Mode.Regex) {
mode = Mode.Regex mode = Mode.Regex
} }
do{
if (regexMatcher.start() > start) { if (regexMatcher.start() > start) {
tmp = ruleStr.substring(start, regexMatcher.start()) tmp = ruleStr.substring(start, regexMatcher.start())
ruleType.add(defaultRuleType) ruleType.add(defaultRuleType)
@ -523,7 +525,6 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
ruleType.add(tmp.substring(1).toInt()) ruleType.add(tmp.substring(1).toInt())
ruleParam.add(tmp) ruleParam.add(tmp)
start = regexMatcher.end() start = regexMatcher.end()
}while (regexMatcher.find())
} }
if (ruleStr.length > start) { if (ruleStr.length > start) {
tmp = ruleStr.substring(start) tmp = ruleStr.substring(start)
@ -682,6 +683,7 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
companion object { companion object {
private val putPattern = Pattern.compile("@put:(\\{[^}]+?\\})", Pattern.CASE_INSENSITIVE) private val putPattern = Pattern.compile("@put:(\\{[^}]+?\\})", Pattern.CASE_INSENSITIVE)
private val getPattern = Pattern.compile("@get:\\{([^}]+?)\\}", Pattern.CASE_INSENSITIVE)
private val evalPattern = private val evalPattern =
Pattern.compile("@get:\\{[^}]+?\\}|\\{\\{[\\w\\W]*?\\}\\}", Pattern.CASE_INSENSITIVE) Pattern.compile("@get:\\{[^}]+?\\}|\\{\\{[\\w\\W]*?\\}\\}", Pattern.CASE_INSENSITIVE)
private val regexPattern = Pattern.compile("\\$\\d{1,2}") private val regexPattern = Pattern.compile("\\$\\d{1,2}")

@ -1,11 +1,13 @@
package io.legado.app.model.analyzeRule package io.legado.app.model.analyzeRule
import android.annotation.SuppressLint import android.annotation.SuppressLint
import android.text.TextUtils
import androidx.annotation.Keep import androidx.annotation.Keep
import com.bumptech.glide.load.model.GlideUrl import com.bumptech.glide.load.model.GlideUrl
import com.bumptech.glide.load.model.LazyHeaders import com.bumptech.glide.load.model.LazyHeaders
import io.legado.app.constant.AppConst.SCRIPT_ENGINE import io.legado.app.constant.AppConst.SCRIPT_ENGINE
import io.legado.app.constant.AppConst.UA_NAME import io.legado.app.constant.AppConst.UA_NAME
import io.legado.app.constant.AppPattern.EXP_PATTERN
import io.legado.app.constant.AppPattern.JS_PATTERN import io.legado.app.constant.AppPattern.JS_PATTERN
import io.legado.app.data.entities.BaseBook import io.legado.app.data.entities.BaseBook
import io.legado.app.data.entities.BookChapter import io.legado.app.data.entities.BookChapter
@ -39,6 +41,7 @@ class AnalyzeUrl(
headerMapF: Map<String, String>? = null headerMapF: Map<String, String>? = null
) : JsExtensions { ) : JsExtensions {
companion object { companion object {
val splitUrlRegex = Regex(",\\s*(?=\\{)")
private val pagePattern = Pattern.compile("<(.*?)>") private val pagePattern = Pattern.compile("<(.*?)>")
} }
@ -55,8 +58,7 @@ class AnalyzeUrl(
private var retry: Int = 0 private var retry: Int = 0
init { init {
val pos = baseUrl.indexOf(',') baseUrl = baseUrl.split(splitUrlRegex, 1)[0]
if(pos != -1)baseUrl = baseUrl.substring(0,pos)
headerMapF?.let { headerMapF?.let {
headerMap.putAll(it) headerMap.putAll(it)
if (it.containsKey("proxy")) { if (it.containsKey("proxy")) {
@ -72,24 +74,39 @@ class AnalyzeUrl(
} }
private fun analyzeJs() { private fun analyzeJs() {
val ruleList = arrayListOf<String>()
var start = 0 var start = 0
var tmp: String var tmp: String
val jsMatcher = JS_PATTERN.matcher(ruleUrl) val jsMatcher = JS_PATTERN.matcher(ruleUrl)
while (jsMatcher.find()) { while (jsMatcher.find()) {
if (jsMatcher.start() > start) { if (jsMatcher.start() > start) {
tmp = tmp =
ruleUrl.substring(start, jsMatcher.start()).trim { it <= ' ' } ruleUrl.substring(start, jsMatcher.start()).replace("\n", "").trim { it <= ' ' }
if (tmp.isNotEmpty()) { if (!TextUtils.isEmpty(tmp)) {
ruleUrl = tmp.replace("@result", ruleUrl) ruleList.add(tmp)
} }
} }
ruleUrl = evalJS(jsMatcher.group(2)?:jsMatcher.group(1), ruleUrl) as String ruleList.add(jsMatcher.group())
start = jsMatcher.end() start = jsMatcher.end()
} }
if (ruleUrl.length > start) { if (ruleUrl.length > start) {
tmp = ruleUrl.substring(start).trim { it <= ' ' } tmp = ruleUrl.substring(start).replace("\n", "").trim { it <= ' ' }
if (tmp.isNotEmpty()) { if (!TextUtils.isEmpty(tmp)) {
ruleUrl = tmp.replace("@result", ruleUrl) ruleList.add(tmp)
}
}
for (rule in ruleList) {
var ruleStr = rule
when {
ruleStr.startsWith("<js>") -> {
ruleStr = ruleStr.substring(4, ruleStr.lastIndexOf("<"))
ruleUrl = evalJS(ruleStr, ruleUrl) as String
}
ruleStr.startsWith("@js", true) -> {
ruleStr = ruleStr.substring(4)
ruleUrl = evalJS(ruleStr, ruleUrl) as String
}
else -> ruleUrl = ruleStr.replace("@result", ruleUrl)
} }
} }
} }
@ -97,12 +114,23 @@ class AnalyzeUrl(
/** /**
* 替换关键字,页数,JS * 替换关键字,页数,JS
*/ */
private fun replaceKeyPageJs() { //先替换内嵌规则再替换页数规则,避免内嵌规则中存在大于小于号时,规则被切错 private fun replaceKeyPageJs() {
//page
page?.let {
val matcher = pagePattern.matcher(ruleUrl)
while (matcher.find()) {
val pages = matcher.group(1)!!.split(",")
ruleUrl = if (page <= pages.size) {
ruleUrl.replace(matcher.group(), pages[page - 1].trim { it <= ' ' })
} else {
ruleUrl.replace(matcher.group(), pages.last().trim { it <= ' ' })
}
}
}
//js //js
if (ruleUrl.contains("{{") && ruleUrl.contains("}}")) { if (ruleUrl.contains("{{") && ruleUrl.contains("}}")) {
var jsEval: Any
val analyze = RuleAnalyzer(ruleUrl) //创建解析 val sb = StringBuffer()
val bindings = SimpleBindings() val bindings = SimpleBindings()
bindings["java"] = this bindings["java"] = this
bindings["cookie"] = CookieStore bindings["cookie"] = CookieStore
@ -113,28 +141,21 @@ class AnalyzeUrl(
bindings["speakText"] = speakText bindings["speakText"] = speakText
bindings["speakSpeed"] = speakSpeed bindings["speakSpeed"] = speakSpeed
bindings["book"] = book bindings["book"] = book
val expMatcher = EXP_PATTERN.matcher(ruleUrl)
//替换所有内嵌{{js}} while (expMatcher.find()) {
val url = analyze.innerRule("{{","}}"){ jsEval = expMatcher.group(1)?.let {
when(val jsEval = SCRIPT_ENGINE.eval(it, bindings)){ SCRIPT_ENGINE.eval(it, bindings)
is String -> jsEval } ?: ""
jsEval is Double && jsEval % 1.0 == 0.0 -> String.format("%.0f", jsEval) if (jsEval is String) {
else -> jsEval.toString() expMatcher.appendReplacement(sb, jsEval)
} } else if (jsEval is Double && jsEval % 1.0 == 0.0) {
} expMatcher.appendReplacement(sb, String.format("%.0f", jsEval))
if(url.isNotEmpty())ruleUrl = url
}
//page
page?.let {
val matcher = pagePattern.matcher(ruleUrl)
while (matcher.find()) {
val pages = matcher.group(1)!!.split(",")
ruleUrl = if (page < pages.size) { //pages[pages.size - 1]等同于pages.last()
ruleUrl.replace(matcher.group(), pages[page - 1].trim { it <= ' ' })
} else { } else {
ruleUrl.replace(matcher.group(), pages.last().trim { it <= ' ' }) expMatcher.appendReplacement(sb, jsEval.toString())
} }
} }
expMatcher.appendTail(sb)
ruleUrl = sb.toString()
} }
} }
@ -142,20 +163,15 @@ class AnalyzeUrl(
* 处理URL * 处理URL
*/ */
private fun initUrl() { private fun initUrl() {
var urlArray = ruleUrl.split(splitUrlRegex, 2)
var pos = ruleUrl.indexOf(',') url = NetworkUtils.getAbsoluteURL(baseUrl, urlArray[0])
urlHasQuery = urlArray[0]
urlHasQuery = if(pos == -1) ruleUrl else ruleUrl.substring(0,pos)
url = NetworkUtils.getAbsoluteURL(baseUrl,urlHasQuery )
NetworkUtils.getBaseUrl(url)?.let { NetworkUtils.getBaseUrl(url)?.let {
baseUrl = it baseUrl = it
} }
if (urlArray.size > 1) {
if(pos != -1 ) { val option = GSON.fromJsonObject<UrlOption>(urlArray[1])
GSON.fromJsonObject<UrlOption>(ruleUrl.substring(pos + 1).trim{ it < '!'})?.let { option -> option?.let { _ ->
option.method?.let { option.method?.let {
if (it.equals("POST", true)) method = RequestMethod.POST if (it.equals("POST", true)) method = RequestMethod.POST
} }
@ -185,17 +201,16 @@ class AnalyzeUrl(
retry = option.retry retry = option.retry
} }
} }
headerMap[UA_NAME] ?: let { headerMap[UA_NAME] ?: let {
headerMap[UA_NAME] = AppConfig.userAgent headerMap[UA_NAME] = AppConfig.userAgent
} }
when (method) { when (method) {
RequestMethod.GET -> { RequestMethod.GET -> {
if (!useWebView) { if (!useWebView) {
pos = url.indexOf('?') urlArray = url.split("?")
if(pos != -1) { url = urlArray[0]
analyzeFields(url.substring(pos + 1)) if (urlArray.size > 1) {
url = url.substring(0,pos) analyzeFields(urlArray[1])
} }
} }
} }
@ -218,7 +233,7 @@ class AnalyzeUrl(
for (query in queryS) { for (query in queryS) {
val queryM = query.splitNotBlank("=") val queryM = query.splitNotBlank("=")
val value = if (queryM.size > 1) queryM[1] else "" val value = if (queryM.size > 1) queryM[1] else ""
if (charset.isNullOrEmpty()) { if (TextUtils.isEmpty(charset)) {
if (NetworkUtils.hasUrlEncoded(value)) { if (NetworkUtils.hasUrlEncoded(value)) {
fieldMap[queryM[0]] = value fieldMap[queryM[0]] = value
} else { } else {

@ -1,9 +1,5 @@
package io.legado.app.model.analyzeRule package io.legado.app.model.analyzeRule
import io.legado.app.utils.isJson
import java.util.ArrayList
import java.util.regex.Pattern
//通用的规则切分处理 //通用的规则切分处理
class RuleAnalyzer(data: String, code: Boolean = false) { class RuleAnalyzer(data: String, code: Boolean = false) {
@ -13,10 +9,12 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
private var start = 0 //当前处理字段的开始 private var start = 0 //当前处理字段的开始
private var startX = 0 //当前规则的开始 private var startX = 0 //当前规则的开始
private var rule = ArrayList<String>() //分割出的规则列表 private var rule = arrayOf<String>() //分割出的规则列表
private var step: Int = 0 //分割字符的长度 private var step: Int = 0 //分割字符的长度
var elementsType = "" //当前分割字符串 var elementsType = "" //当前分割字符串
var innerType = true //是否为内嵌{{}}
//设置平衡组函数,json或JavaScript时设置成chompCodeBalanced,否则为chompRuleBalanced
val chompBalanced = if (code) ::chompCodeBalanced else ::chompRuleBalanced
fun trim() { // 修剪当前规则之前的"@"或者空白符 fun trim() { // 修剪当前规则之前的"@"或者空白符
if(queue[pos] == '@' || queue[pos] < '!') { //在while里重复设置start和startX会拖慢执行速度,所以先来个判断是否存在需要修剪的字段,最后再一次性设置start和startX if(queue[pos] == '@' || queue[pos] < '!') { //在while里重复设置start和startX会拖慢执行速度,所以先来个判断是否存在需要修剪的字段,最后再一次性设置start和startX
@ -34,8 +32,8 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
} }
/** /**
* 从剩余字串中拉出一个字符串直到但不包括匹配序列 * 从剩余字串中拉出一个字符串直到但不包括匹配序列或剩余字串用完
* @param seq 查找的字符串 **区分大小写** * @param seq 分隔字符 **区分大小写**
* @return 是否找到相应字段 * @return 是否找到相应字段
*/ */
fun consumeTo(seq: String): Boolean { fun consumeTo(seq: String): Boolean {
@ -68,31 +66,6 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
pos++ //逐个试探 pos++ //逐个试探
} }
return false
}
/**
* 从剩余字串中拉出一个字符串直到但不包括匹配序列匹配参数列表中一项即为匹配或剩余字串用完
* @param seq 匹配字符串序列
* @return 成功返回true并设置间隔失败则直接返回fasle
*/
fun chompToAny(vararg seq: String): Boolean {
var pos = pos //声明新变量记录匹配位置,不更改类本身的位置
while (pos != queue.length) {
for (s in seq) {
if (queue.regionMatches(pos, s, 0, s.length)) {
rule += queue.substring(this.pos, pos)
pos += s.length //跳过分隔符
ruleTypeList += s //追加类型到列表
start = this.pos
this.pos = pos //匹配成功, 同步处理位置到类
return true //匹配就返回 true
}
}
pos++ //逐个试探
}
return false return false
} }
@ -117,6 +90,75 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
return -1 return -1
} }
//其中js只要符合语法,就不用避开任何阅读关键字,自由发挥
fun chompJsBalanced(
f: ((Char) -> Boolean?) = {
when (it) {
'{' -> true //开始嵌套一层
'}' -> false //闭合一层嵌套
else -> null
}
}
): Boolean {
var pos = pos //声明变量记录临时处理位置
var depth = 0 //嵌套深度
var bracketsDepth = 0 //[]嵌套深度
var inSingleQuote = false //单引号
var inDoubleQuote = false //双引号
var inOtherQuote = false //js原始字串分隔字符
var regex = false //正则
var commit = false //单行注释
var commits = false //多行注释
do {
if (pos == queue.length) break
var c = queue[pos++]
if (c != '\\') { //非转义字符
if (c == '\'' && !commits && !commit && !regex && !inDoubleQuote && !inOtherQuote) inSingleQuote =
!inSingleQuote //匹配具有语法功能的单引号
else if (c == '"' && !commits && !commit && !regex && !inSingleQuote && !inOtherQuote) inDoubleQuote =
!inDoubleQuote //匹配具有语法功能的双引号
else if (c == '`' && !commits && !commit && !regex && !inSingleQuote && !inDoubleQuote) inOtherQuote =
!inOtherQuote //匹配具有语法功能的'`'
else if (c == '/' && !commits && !commit && !regex && !inSingleQuote && !inDoubleQuote && !inOtherQuote) { //匹配注释或正则起点
c = queue[pos++]
when (c) {
'/' -> commit = true //匹配单行注释起点
'*' -> commits = true //匹配多行注释起点
else -> regex = true //匹配正则起点
}
} else if (commits && c == '*') { //匹配多行注释终点
c = queue[pos++]
if (c == '/') commits = false
} else if (regex && c == '/') { //正则的终点或[]平衡
when (c) {
'/' -> regex = false//匹配正则终点
//为了保证当open为( 且 close 为 )时,正则中[(]或[)]的合法性。故对[]这对在任何规则中都平衡的成对符号做匹配。
// 注:正则里[(]、[)]、[{]、[}]都是合法的,所以只有[]必须平衡。
'[' -> bracketsDepth++ //开始嵌套一层[]
']' -> bracketsDepth-- //闭合一层嵌套[]
}
} else if (c == '\n') commit = false
if (commits || commit || regex || inSingleQuote || inDoubleQuote || inOtherQuote) continue //语法单元未匹配结束,直接进入下个循环
val fn = f(c) ?: continue
if (fn) depth++ else depth-- //嵌套或者闭合
} else pos++
} while (depth > 0 || bracketsDepth > 0) //拉出全部符合js语法的字段
return if (depth > 0 || bracketsDepth > 0) false else {
this.pos = pos //同步位置
true
}
}
/** /**
* 拉出一个非内嵌代码平衡组存在转义文本 * 拉出一个非内嵌代码平衡组存在转义文本
*/ */
@ -194,7 +236,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
* 不用正则,不到最后不切片也不用中间变量存储,只在序列中标记当前查找字段的开头结尾,到返回时才切片,高效快速准确切割规则 * 不用正则,不到最后不切片也不用中间变量存储,只在序列中标记当前查找字段的开头结尾,到返回时才切片,高效快速准确切割规则
* 解决jsonPath自带的"&&""||"与阅读的规则冲突,以及规则正则或字符串中包含"&&""||""%%""@"导致的冲突 * 解决jsonPath自带的"&&""||"与阅读的规则冲突,以及规则正则或字符串中包含"&&""||""%%""@"导致的冲突
*/ */
tailrec fun splitRule(vararg split: String): ArrayList<String> { //首段匹配,elementsType为空 tailrec fun splitRule(vararg split: String): Array<String> { //首段匹配,elementsType为空
if (split.size == 1) { if (split.size == 1) {
elementsType = split[0] //设置分割字串 elementsType = split[0] //设置分割字串
@ -218,7 +260,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
if (st == -1) { if (st == -1) {
rule = arrayListOf(queue.substring(startX, end)) //压入分隔的首段规则到数组 rule = arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
elementsType = queue.substring(end, end + step) //设置组合类型 elementsType = queue.substring(end, end + step) //设置组合类型
pos = end + step //跳过分隔符 pos = end + step //跳过分隔符
@ -235,7 +277,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组 if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组
rule = arrayListOf(queue.substring(startX, end)) //压入分隔的首段规则到数组 rule = arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
elementsType = queue.substring(end, end + step) //设置组合类型 elementsType = queue.substring(end, end + step) //设置组合类型
pos = end + step //跳过分隔符 pos = end + step //跳过分隔符
@ -269,7 +311,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
} }
@JvmName("splitRuleNext") @JvmName("splitRuleNext")
private tailrec fun splitRule(): ArrayList<String> { //二段匹配被调用,elementsType非空(已在首段赋值),直接按elementsType查找,比首段采用的方式更快 private tailrec fun splitRule(): Array<String> { //二段匹配被调用,elementsType非空(已在首段赋值),直接按elementsType查找,比首段采用的方式更快
val end = pos //记录分隔位置 val end = pos //记录分隔位置
pos = start //重回开始,启动另一种查找 pos = start //重回开始,启动另一种查找
@ -294,7 +336,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组 if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组
rule += arrayListOf(queue.substring(startX, end)) //压入分隔的首段规则到数组 rule += arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
pos = end + step //跳过分隔符 pos = end + step //跳过分隔符
while (consumeTo(elementsType) && pos < st) { //循环切分规则压入数组 while (consumeTo(elementsType) && pos < st) { //循环切分规则压入数组
@ -331,9 +373,9 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
/** /**
* 替换内嵌规则 * 替换内嵌规则
* @param inner 起始标志,{$. * @param inner 起始标志,{$. {{
* @param startStep 不属于规则部分的前置字符长度{$.{不属于规则的组成部分故startStep为1 * @param startStep 不属于规则部分的前置字符长度{$.{不属于规则的组成部分故startStep为1
* @param endStep 不属于规则部分的后置字符长度 * @param endStep 不属于规则部分的后置字符长度}}长度为2
* @param fr 查找到内嵌规则时用于解析的函数 * @param fr 查找到内嵌规则时用于解析的函数
* *
* */ * */
@ -343,13 +385,14 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
endStep: Int = 1, endStep: Int = 1,
fr: (String) -> String? fr: (String) -> String?
): String { ): String {
val st = StringBuilder() val st = StringBuilder()
while (consumeTo(inner)) { //拉取成功返回true,ruleAnalyzes里的字符序列索引变量pos后移相应位置,否则返回false,且isEmpty为true while (consumeTo(inner)) { //拉取成功返回true,ruleAnalyzes里的字符序列索引变量pos后移相应位置,否则返回false,且isEmpty为true
val posPre = pos //记录consumeTo匹配位置 val posPre = pos //记录consumeTo匹配位置
if (chompCodeBalanced('{', '}')) { if (chompCodeBalanced('{', '}')) {
val frv = fr(queue.substring(posPre + startStep, pos - endStep)) val frv = fr(queue.substring(posPre + startStep, pos - endStep))
if (!frv.isNullOrEmpty()) { if (frv != null) {
st.append(queue.substring(startX, posPre) + frv) //压入内嵌规则前的内容,及内嵌规则解析得到的字符串 st.append(queue.substring(startX, posPre) + frv) //压入内嵌规则前的内容,及内嵌规则解析得到的字符串
startX = pos //记录下次规则起点 startX = pos //记录下次规则起点
continue //获取内容成功,继续选择下个内嵌规则 continue //获取内容成功,继续选择下个内嵌规则
@ -363,206 +406,64 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
}.toString() }.toString()
} }
companion object {
/** /**
* 替换内嵌规则 * 转义字符
* @param fr 查找到内嵌规则时用于解析的函数 */
* private const val ESC = '\\'
* */
fun innerRule(
startStr:String,
endStr:String,
fr: (String) -> String?
): String {
val st = StringBuilder()
while (consumeTo(startStr)) { //拉取成功返回true,ruleAnalyzes里的字符序列索引变量pos后移相应位置,否则返回false,且isEmpty为true
pos += startStr.length //跳过开始字符串
val posPre = pos //记录consumeTo匹配位置
if (consumeTo(endStr)) {
val frv = fr(queue.substring(posPre, pos))
st.append(queue.substring(startX, posPre - startStr.length) + frv) //压入内嵌规则前的内容,及内嵌规则解析得到的字符串
pos += endStr.length //跳过结束字符串
startX = pos //记录下次规则起点
}
}
return if(startX == 0) queue else st.apply {
append(queue.substring(startX))
}.toString()
}
val ruleTypeList = ArrayList<String>()
//设置平衡组函数,json或JavaScript时设置成chompCodeBalanced,否则为chompRuleBalanced
val chompBalanced = if (code) ::chompCodeBalanced else ::chompRuleBalanced
enum class Mode {
XPath, Json, Default, Js, Regex
}
/** /**
* 不用正则,不到最后不切片也不用中间变量存储,只在序列中标记当前查找字段的开头结尾,到返回时才切片,高效快速准确切割规则 * 阅读共有分隔字串起始部分
* 解决jsonPath自带的"&&""||"与阅读的规则冲突,以及规则正则或字符串中包含"&&""||""%%""@"导致的冲突 * "##","@@","{{","{[","<js>", "@js:"
*/ */
tailrec fun splitAnyRule(): ArrayList<String> { //首段匹配,elementsType为空 val splitList = arrayOf("##", "@@", "{{", "{[", "<js>", "@js:")
if (!consumeToAny(* STARTSTR)) { //未找到分隔符
rule += queue.substring(startX)
return rule
}
val end = pos //记录分隔位置 /**
pos = start //重回开始,启动另一种查找 * 发现名称-链接分隔字串
* "::"
do { */
val st = findToAny('[', '(') //查找筛选器位置 const val splitListFaXian = "::"
if (st == -1) {
rule += arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
ruleTypeList += queue.substring(end, end + step) //追加类型到类型列表
pos = end + step //跳过分隔符
while (!chompToAny(elementsType)) { //循环切分规则压入数组
rule += queue.substring(pos) //将剩余字段压入数组末尾
return rule
}
}
if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组
rule += arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
ruleTypeList += queue.substring(end, end + step) //设置组合类型
pos = end + step //跳过分隔符
while (!chompToAny(elementsType) && pos >= st) { //循环切分规则压入数组
if (pos > st) {
startX = start
} else { //执行到此,证明后面再无分隔字符
rule += queue.substring(pos) //将剩余字段压入数组末尾
return rule
}
}
}
pos = st //位置回到筛选器处
val next = if (queue[pos] == '[') ']' else ')' //平衡组末尾字符
if (!chompBalanced(queue[pos], next)){
ruleTypeList.clear()
rule.clear()
consumeToAny("<js>","@js:")
rule += queue.substring(0,pos)
ruleTypeList += queue.substring(pos, pos + 4) //设置组合类型
}
} while (end > pos)
start = pos //设置开始查找筛选器位置的起始位置
return splitAnyRule() //递归调用首段匹配
}
var isJSON = false
var isUrl = false
var isUrlList = false
var isMulu = false
var isreverse = false
var isAllInOne= false
var isFind = false
private val findName = ArrayList<String>()
var replaceRegex = ""
var replacement = ""
var replaceFirst = false
val putMap = HashMap<String, String>()
private val ruleParam = ArrayList<String>()
private val ruleType = ArrayList<Int>()
private val getRuleType = -2
private val jsRuleType = -1
private val defaultRuleType = 0
@JvmOverloads
fun setContent(cont: String,type:String = ""): RuleAnalyzer {
queue = cont
when(type){
"mulu" -> {
if(queue[0] =='-'){ //目录反转
isreverse = true
startX++
pos++
}else if(queue[0] =='?'){ //AllInOne
isAllInOne = true
startX++
pos++
}
isMulu = true
}
"find" -> {
pos = queue.indexOf("::")
findName.add(queue.substring(startX,pos))
pos+=2
isFind = true
}
"url" -> {
isUrl = true
}
"urlList" -> {
isUrlList = true
}
else -> {
isJSON = queue.toString().isJson()
}
}
return this
}
companion object {
/** /**
* 转义字符 * 目录专有起始字符
* "-"
*/ */
private const val ESC = '\\' const val splitListMulu = "-"
/**
* 结果为元素列表的 all in one 模式起始字符
* "+"
*/
const val splitListTongYi = "+"
val validKeys = arrayOf("class", "id", "tag", "text", "children") /**
* 结果为元素列表的项的同规则组合结构
* "||","&&","%%"
*/
val splitListReSplit = arrayOf("||", "&&", "%%")
/** /**
* 参数字符串 * js脚本结束字串
* "</js>"
*/ */
private val STARTSTRURL = arrayOf(",{",) const val splitListEndJS = "</js>"
private val regexPattern = Pattern.compile("\\$\\d{1,2}") /**
private val putPattern = Pattern.compile("@put:(\\{[^}]+?\\})", Pattern.CASE_INSENSITIVE) *内嵌js结束字串
private val getPattern = Pattern.compile("@get:\\{([^}]+?)\\}", Pattern.CASE_INSENSITIVE) * "}}"
private val evalPattern = Pattern.compile("\\{\\{[\\w\\W]*?\\}\\}", Pattern.CASE_INSENSITIVE) */
const val splitListEndInnerJS = "}}"
val ENDSTR= mapOf( /**
"<js>" to "</js>", * 内嵌规则结束字串
"{{" to "}}", * "]}"
) */
const val splitListEndInnerRule = "]}"
/** /**
* 规则起始字符串 * '[', ']', '(', ')','{','}'
*/ */
private val STARTSTR = arrayOf("@js:","<js>","</js>","##","@@","@" val splitListPublic = charArrayOf('[', ']', '(', ')', '{', '}')
,"{{@", "{{","}}"
,"}"
, "{@", "{/", "{$", "{?"
, "{class"
, "{id"
, "{tag"
, "{text"
, "{children"
,"/","$","@xpath:","@json:","@css:"
,"||", "&&", "%%"
,"@get:{","@put:{"
)
/** /**
* '*',"/","//",":","::","@","|","@xpath:" * '*',"/","//",":","::","@","|","@xpath:"

@ -415,22 +415,30 @@ object ReadBook {
else -> chapter.title else -> chapter.title
} }
val contents = contentProcessor!!.getContent(book, chapter.title, content) val contents = contentProcessor!!.getContent(book, chapter.title, content)
val textChapter = ChapterProvider.getTextChapter(book, chapter,contents,chapterSize) when (chapter.index) {
durChapterIndex -> {
val offset = chapter.index - durChapterIndex curTextChapter =
if (upContent) callBack?.upContent(offset ,resetPageOffset) ChapterProvider.getTextChapter(
when (offset) { book, chapter, contents, chapterSize
0 -> { )
curTextChapter = textChapter if (upContent) callBack?.upContent(resetPageOffset = resetPageOffset)
callBack?.upView() callBack?.upView()
curPageChanged() curPageChanged()
callBack?.contentLoadFinish() callBack?.contentLoadFinish()
} }
- 1 -> { durChapterIndex - 1 -> {
prevTextChapter = textChapter prevTextChapter =
ChapterProvider.getTextChapter(
book, chapter, contents, chapterSize
)
if (upContent) callBack?.upContent(-1, resetPageOffset)
} }
1 -> { durChapterIndex + 1 -> {
nextTextChapter = textChapter nextTextChapter =
ChapterProvider.getTextChapter(
book, chapter, contents, chapterSize
)
if (upContent) callBack?.upContent(1, resetPageOffset)
} }
} }
} }

@ -115,7 +115,15 @@ object ChapterProvider {
content.replace(AppPattern.imgPattern.toRegex(), "\n\$0\n") content.replace(AppPattern.imgPattern.toRegex(), "\n\$0\n")
.split("\n").forEach { text -> .split("\n").forEach { text ->
if (text.isNotBlank()) { if (text.isNotBlank()) {
if (!text.startsWith("<img src=\"")) { //非图片 val matcher = AppPattern.imgPattern.matcher(text)
if (matcher.find()) {
matcher.group(1)?.let { src ->
durY = setTypeImage(
book, bookChapter, src,
durY, textPages, book.getImageStyle()
)
}
} else {
val isTitle = index == 0 val isTitle = index == 0
val textPaint = if (isTitle) titlePaint else contentPaint val textPaint = if (isTitle) titlePaint else contentPaint
if (!(isTitle && ReadBookConfig.titleMode == 2)) { if (!(isTitle && ReadBookConfig.titleMode == 2)) {
@ -124,11 +132,6 @@ object ChapterProvider {
stringBuilder, isTitle, textPaint stringBuilder, isTitle, textPaint
) )
} }
} else { //图片
durY = setTypeImage(
book, bookChapter, text.substring(10, text.length-2),
durY, textPages, book.getImageStyle()
)
} }
} }
} }
@ -147,10 +150,7 @@ object ChapterProvider {
return TextChapter( return TextChapter(
bookChapter.index, bookChapter.title, bookChapter.index, bookChapter.title,
bookChapter.getAbsoluteURL().run{ bookChapter.getAbsoluteURL().split(AnalyzeUrl.splitUrlRegex)[0],
val pos = indexOf(',')
if(pos == -1) this else substring(0,pos)
},
textPages, chapterSize textPages, chapterSize
) )
} }

@ -1,7 +1,8 @@
package io.legado.app.utils package io.legado.app.utils
import io.legado.app.constant.AppPattern
import io.legado.app.model.analyzeRule.AnalyzeUrl
import java.net.URL import java.net.URL
import java.util.regex.Pattern
object HtmlFormatter { object HtmlFormatter {
private val wrapHtmlRegex = "</?(?:div|p|br|hr|h\\d|article|dd|dl)[^>]*>".toRegex() private val wrapHtmlRegex = "</?(?:div|p|br|hr|h\\d|article|dd|dl)[^>]*>".toRegex()
@ -13,7 +14,7 @@ object HtmlFormatter {
return html.replace(wrapHtmlRegex, "\n") return html.replace(wrapHtmlRegex, "\n")
.replace(otherRegex, "") .replace(otherRegex, "")
.replace("\\s*\\n+\\s*".toRegex(), "\n  ") .replace("\\s*\\n+\\s*".toRegex(), "\n  ")
.replace("^[\\n\\s]*".toRegex(), "  ") .replace("^[\\n\\s]+".toRegex(), "  ")
.replace("[\\n\\s]+$".toRegex(), "") .replace("[\\n\\s]+$".toRegex(), "")
} }
@ -21,64 +22,22 @@ object HtmlFormatter {
fun formatKeepImg(html: String?, redirectUrl: URL?): String { fun formatKeepImg(html: String?, redirectUrl: URL?): String {
html ?: return "" html ?: return ""
val keepImgHtml = html.replace(wrapHtmlRegex, "\n") val keepImgHtml = formatKeepImg(html)
.replace(notImgHtmlRegex, "") val sb = StringBuffer()
.replace("[\\n\\s]+\$|^[\\n\\s]*".toRegex(), "") val matcher = AppPattern.imgPattern.matcher(keepImgHtml)
.replace("\\s*\\n+\\s*".toRegex(), "\n")
val sb = StringBuffer("  ") //前置缩减
val hasDataType:Boolean //是否有数据属性
//图片有data-开头的数据属性时优先用数据属性作为src,没有数据属性时匹配src
val imgPattern = Pattern.compile(
if(keepImgHtml.matches("<img[^>]*data-".toRegex())) {
hasDataType = true
"<img[^>]*data-[^=]*= *\"([^\"])\"[^>]*>"
}
else {
hasDataType = false
"<img[^>]*src *= *\"([^\"{]+(?:\\{(?:[^{}]|\\{[^{}]*\\})*\\})?)\"[^>]*>"
}, Pattern.CASE_INSENSITIVE
)
val matcher = imgPattern.matcher(keepImgHtml)
var appendPos = 0 var appendPos = 0
while (matcher.find()) {
if(matcher.find()){ val urlArray = matcher.group(1)!!.split(AnalyzeUrl.splitUrlRegex)
if(hasDataType || matcher.group(1)!!.indexOf(',') == -1) { //图片无参 var url = NetworkUtils.getAbsoluteURL(redirectUrl, urlArray[0])
if (urlArray.size > 1) {
do{ url = "$url,${urlArray[1]}"
sb.append(keepImgHtml.substring(appendPos, matcher.start()).replace("\n","\n  ")) //非图片部分换行缩减
sb.append( "<img src=\"${
NetworkUtils.getAbsoluteURL(redirectUrl,matcher.group(1)!!)
}\">" )
appendPos = matcher.end()
}while (matcher.find())
}else{ //图片有参
do{
val url = matcher.group(1)!!
val pos = url.indexOf(',')
sb.append(keepImgHtml.substring(appendPos, matcher.start()).replace("\n","\n  ")) //非图片部分换行缩减
sb.append(
"<img src=\"${
NetworkUtils.getAbsoluteURL(
redirectUrl,
url.substring(0, pos)
)
},${
url.substring(pos)
}\">"
)
appendPos = matcher.end()
}while(matcher.find())
} }
sb.append(keepImgHtml.substring(appendPos, matcher.start()))
sb.append("<img src=\"$url\" >")
appendPos = matcher.end()
} }
if (appendPos < keepImgHtml.length) { if (appendPos < keepImgHtml.length) {
sb.append(keepImgHtml.substring(appendPos, keepImgHtml.length).replace("\n","\n  ")) //非图片部分换行缩减 sb.append(keepImgHtml.substring(appendPos, keepImgHtml.length))
} }
return sb.toString() return sb.toString()
} }

Loading…
Cancel
Save