优化链接分割规则,避免 ,{c参数} 的字符串中也存在 ,{ ,导致规则切错

修复<str0,str2,...{{js}}>这种页数列表写法中,js部分内含 < 或 > 就会切割错误的问题

简化图片格式化操作
pull/1114/head
bushixuanqi 3 years ago
parent 4a4208c031
commit c06df68404
  1. 5
      app/src/main/java/io/legado/app/constant/AppPattern.kt
  2. 13
      app/src/main/java/io/legado/app/data/entities/BookChapter.kt
  3. 12
      app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt
  4. 86
      app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt
  5. 121
      app/src/main/java/io/legado/app/model/analyzeRule/RuleAnalyzer.kt
  6. 3
      app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt
  7. 34
      app/src/main/java/io/legado/app/utils/HtmlFormatter.kt

@ -7,9 +7,10 @@ object AppPattern {
val JS_PATTERN: Pattern = val JS_PATTERN: Pattern =
Pattern.compile("(<js>[\\w\\W]*?</js>|@js:[\\w\\W]*$)", Pattern.CASE_INSENSITIVE) Pattern.compile("(<js>[\\w\\W]*?</js>|@js:[\\w\\W]*$)", Pattern.CASE_INSENSITIVE)
val EXP_PATTERN: Pattern = Pattern.compile("\\{\\{([\\w\\W]*?)\\}\\}") val EXP_PATTERN: Pattern = Pattern.compile("\\{\\{([\\w\\W]*?)\\}\\}")
//图片有data-开头的数据属性时优先用数据属性作为src,没有数据属性时才匹配src
//非格式化时,不需要原来那么复杂的正则表达式
val imgPattern: Pattern = val imgPattern: Pattern =
Pattern.compile("<img(?:(?![^>]*data-)[^>]*src|[^>]*data-)[^=]*= *\"([^\"{]+(?:\\{(?:[^{}]|\\{[^{}]*\\})*\\})?)\"[^>]*>", Pattern.CASE_INSENSITIVE) Pattern.compile("<img[^>]*src *= *\"([^\"{]+(?:\\{(?:[^{}]|\\{[^{}]*\\})*\\})?)\"[^>]*>", Pattern.CASE_INSENSITIVE)
val nameRegex = Regex("\\s+作\\s*者.*|\\s+\\S+\\s+著") val nameRegex = Regex("\\s+作\\s*者.*|\\s+\\S+\\s+著")
val authorRegex = Regex("^.*?作\\s*者[::\\s]*|\\s+著") val authorRegex = Regex("^.*?作\\s*者[::\\s]*|\\s+著")

@ -8,7 +8,6 @@ import androidx.room.Index
import io.legado.app.model.analyzeRule.AnalyzeUrl import io.legado.app.model.analyzeRule.AnalyzeUrl
import io.legado.app.utils.GSON import io.legado.app.utils.GSON
import io.legado.app.utils.MD5Utils import io.legado.app.utils.MD5Utils
import io.legado.app.utils.NetworkUtils
import io.legado.app.utils.fromJsonObject import io.legado.app.utils.fromJsonObject
import kotlinx.parcelize.IgnoredOnParcel import kotlinx.parcelize.IgnoredOnParcel
import kotlinx.parcelize.Parcelize import kotlinx.parcelize.Parcelize
@ -63,14 +62,10 @@ data class BookChapter(
return false return false
} }
fun getAbsoluteURL(): String { fun getAbsoluteURL() = if(url.indexOf(',') != -1) {
val urlArray = url.split(AnalyzeUrl.splitUrlRegex) val absoluteUrl = url.split(AnalyzeUrl.splitUrlRegex, 1)[0]
var absoluteUrl = NetworkUtils.getAbsoluteURL(baseUrl, urlArray[0]) "${absoluteUrl},${url.substring(absoluteUrl.length)}"
if (urlArray.size > 1) { } else url
absoluteUrl = "$absoluteUrl,${urlArray[1]}"
}
return absoluteUrl
}
fun getFileName(): String = String.format("%05d-%s.nb", index, MD5Utils.md5Encode16(title)) fun getFileName(): String = String.format("%05d-%s.nb", index, MD5Utils.md5Encode16(title))

@ -25,7 +25,9 @@ import kotlin.collections.HashMap
@Keep @Keep
@Suppress("unused", "RegExpRedundantEscape") @Suppress("unused", "RegExpRedundantEscape")
class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions { class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
var book: BaseBook? = null
var book: BaseBook? = if (ruleData is BaseBook) ruleData else null
var chapter: BookChapter? = null var chapter: BookChapter? = null
var nextChapterUrl: String? = null var nextChapterUrl: String? = null
var content: Any? = null var content: Any? = null
@ -42,15 +44,9 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
private var objectChangedJS = false private var objectChangedJS = false
private var objectChangedJP = false private var objectChangedJP = false
init {
if (ruleData is BaseBook) {
book = ruleData
}
}
@JvmOverloads @JvmOverloads
fun setContent(content: Any?, baseUrl: String? = null): AnalyzeRule { fun setContent(content: Any?, baseUrl: String? = null): AnalyzeRule {
if (content == null) throw AssertionError("Content cannot be null") if (content == null) throw AssertionError("内容不可空(Content cannot be null)")
this.content = content this.content = content
setBaseUrl(baseUrl) setBaseUrl(baseUrl)
isJSON = content.toString().isJson() isJSON = content.toString().isJson()

@ -1,13 +1,11 @@
package io.legado.app.model.analyzeRule package io.legado.app.model.analyzeRule
import android.annotation.SuppressLint import android.annotation.SuppressLint
import android.text.TextUtils
import androidx.annotation.Keep import androidx.annotation.Keep
import com.bumptech.glide.load.model.GlideUrl import com.bumptech.glide.load.model.GlideUrl
import com.bumptech.glide.load.model.LazyHeaders import com.bumptech.glide.load.model.LazyHeaders
import io.legado.app.constant.AppConst.SCRIPT_ENGINE import io.legado.app.constant.AppConst.SCRIPT_ENGINE
import io.legado.app.constant.AppConst.UA_NAME import io.legado.app.constant.AppConst.UA_NAME
import io.legado.app.constant.AppPattern.EXP_PATTERN
import io.legado.app.constant.AppPattern.JS_PATTERN import io.legado.app.constant.AppPattern.JS_PATTERN
import io.legado.app.data.entities.BaseBook import io.legado.app.data.entities.BaseBook
import io.legado.app.data.entities.BookChapter import io.legado.app.data.entities.BookChapter
@ -41,7 +39,7 @@ class AnalyzeUrl(
headerMapF: Map<String, String>? = null headerMapF: Map<String, String>? = null
) : JsExtensions { ) : JsExtensions {
companion object { companion object {
val splitUrlRegex = Regex(",\\s*(?=\\{)") val splitUrlRegex = Regex("\\s*,\\s*(?=\\{)")
private val pagePattern = Pattern.compile("<(.*?)>") private val pagePattern = Pattern.compile("<(.*?)>")
} }
@ -82,7 +80,7 @@ class AnalyzeUrl(
if (jsMatcher.start() > start) { if (jsMatcher.start() > start) {
tmp = tmp =
ruleUrl.substring(start, jsMatcher.start()).replace("\n", "").trim { it <= ' ' } ruleUrl.substring(start, jsMatcher.start()).replace("\n", "").trim { it <= ' ' }
if (!TextUtils.isEmpty(tmp)) { if (tmp.isNotEmpty()) {
ruleList.add(tmp) ruleList.add(tmp)
} }
} }
@ -91,7 +89,7 @@ class AnalyzeUrl(
} }
if (ruleUrl.length > start) { if (ruleUrl.length > start) {
tmp = ruleUrl.substring(start).replace("\n", "").trim { it <= ' ' } tmp = ruleUrl.substring(start).replace("\n", "").trim { it <= ' ' }
if (!TextUtils.isEmpty(tmp)) { if (tmp.isNotEmpty()) {
ruleList.add(tmp) ruleList.add(tmp)
} }
} }
@ -114,23 +112,12 @@ class AnalyzeUrl(
/** /**
* 替换关键字,页数,JS * 替换关键字,页数,JS
*/ */
private fun replaceKeyPageJs() { private fun replaceKeyPageJs() { //先替换内嵌规则再替换页数规则,避免内嵌规则中存在大于小于号时,规则被切错
//page
page?.let {
val matcher = pagePattern.matcher(ruleUrl)
while (matcher.find()) {
val pages = matcher.group(1)!!.split(",")
ruleUrl = if (page <= pages.size) {
ruleUrl.replace(matcher.group(), pages[page - 1].trim { it <= ' ' })
} else {
ruleUrl.replace(matcher.group(), pages.last().trim { it <= ' ' })
}
}
}
//js //js
if (ruleUrl.contains("{{") && ruleUrl.contains("}}")) { if (ruleUrl.contains("{{") && ruleUrl.contains("}}")) {
var jsEval: Any
val sb = StringBuffer() val analyze = RuleAnalyzer(ruleUrl) //创建解析
val bindings = SimpleBindings() val bindings = SimpleBindings()
bindings["java"] = this bindings["java"] = this
bindings["cookie"] = CookieStore bindings["cookie"] = CookieStore
@ -141,21 +128,28 @@ class AnalyzeUrl(
bindings["speakText"] = speakText bindings["speakText"] = speakText
bindings["speakSpeed"] = speakSpeed bindings["speakSpeed"] = speakSpeed
bindings["book"] = book bindings["book"] = book
val expMatcher = EXP_PATTERN.matcher(ruleUrl)
while (expMatcher.find()) { //替换所有内嵌{{js}}
jsEval = expMatcher.group(1)?.let { val url = analyze.innerRule("{{",2,2){
SCRIPT_ENGINE.eval(it, bindings) when(val jsEval = SCRIPT_ENGINE.eval(it, bindings)){
} ?: "" is String -> jsEval
if (jsEval is String) { jsEval is Double && jsEval % 1.0 == 0.0 -> String.format("%.0f", jsEval)
expMatcher.appendReplacement(sb, jsEval) else -> jsEval.toString()
} else if (jsEval is Double && jsEval % 1.0 == 0.0) { }
expMatcher.appendReplacement(sb, String.format("%.0f", jsEval)) }
if(url.isNotEmpty())ruleUrl = url
}
//page
page?.let {
val matcher = pagePattern.matcher(ruleUrl)
while (matcher.find()) {
val pages = matcher.group(1)!!.split(",")
ruleUrl = if (page < pages.size) { //pages[pages.size - 1]等同于pages.last()
ruleUrl.replace(matcher.group(), pages[page - 1].trim { it <= ' ' })
} else { } else {
expMatcher.appendReplacement(sb, jsEval.toString()) ruleUrl.replace(matcher.group(), pages.last().trim { it <= ' ' })
} }
} }
expMatcher.appendTail(sb)
ruleUrl = sb.toString()
} }
} }
@ -163,15 +157,20 @@ class AnalyzeUrl(
* 处理URL * 处理URL
*/ */
private fun initUrl() { private fun initUrl() {
var urlArray = ruleUrl.split(splitUrlRegex, 2)
url = NetworkUtils.getAbsoluteURL(baseUrl, urlArray[0]) val hasQuery = ruleUrl.indexOf(',') != -1
urlHasQuery = urlArray[0]
urlHasQuery = if(hasQuery) ruleUrl.split(splitUrlRegex, 1)[0] else ruleUrl
url = NetworkUtils.getAbsoluteURL(baseUrl,urlHasQuery )
NetworkUtils.getBaseUrl(url)?.let { NetworkUtils.getBaseUrl(url)?.let {
baseUrl = it baseUrl = it
} }
if (urlArray.size > 1) {
val option = GSON.fromJsonObject<UrlOption>(urlArray[1]) if(hasQuery) {
option?.let { _ -> GSON.fromJsonObject<UrlOption>(ruleUrl.substring(urlHasQuery.length))?.let { option ->
option.method?.let { option.method?.let {
if (it.equals("POST", true)) method = RequestMethod.POST if (it.equals("POST", true)) method = RequestMethod.POST
} }
@ -201,16 +200,17 @@ class AnalyzeUrl(
retry = option.retry retry = option.retry
} }
} }
headerMap[UA_NAME] ?: let { headerMap[UA_NAME] ?: let {
headerMap[UA_NAME] = AppConfig.userAgent headerMap[UA_NAME] = AppConfig.userAgent
} }
when (method) { when (method) {
RequestMethod.GET -> { RequestMethod.GET -> {
if (!useWebView) { if (!useWebView) {
urlArray = url.split("?") val pos = url.indexOf('?')
url = urlArray[0] if(pos != -1) {
if (urlArray.size > 1) { analyzeFields(url.substring(pos + 1))
analyzeFields(urlArray[1]) url = url.substring(0,pos)
} }
} }
} }
@ -233,7 +233,7 @@ class AnalyzeUrl(
for (query in queryS) { for (query in queryS) {
val queryM = query.splitNotBlank("=") val queryM = query.splitNotBlank("=")
val value = if (queryM.size > 1) queryM[1] else "" val value = if (queryM.size > 1) queryM[1] else ""
if (TextUtils.isEmpty(charset)) { if (charset.isNullOrEmpty()) {
if (NetworkUtils.hasUrlEncoded(value)) { if (NetworkUtils.hasUrlEncoded(value)) {
fieldMap[queryM[0]] = value fieldMap[queryM[0]] = value
} else { } else {

@ -12,6 +12,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
private var rule = arrayOf<String>() //分割出的规则列表 private var rule = arrayOf<String>() //分割出的规则列表
private var step: Int = 0 //分割字符的长度 private var step: Int = 0 //分割字符的长度
var elementsType = "" //当前分割字符串 var elementsType = "" //当前分割字符串
var innerType = true //是否为内嵌{{}}
//设置平衡组函数,json或JavaScript时设置成chompCodeBalanced,否则为chompRuleBalanced //设置平衡组函数,json或JavaScript时设置成chompCodeBalanced,否则为chompRuleBalanced
val chompBalanced = if (code) ::chompCodeBalanced else ::chompRuleBalanced val chompBalanced = if (code) ::chompCodeBalanced else ::chompRuleBalanced
@ -31,9 +32,14 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
startX = 0 startX = 0
} }
//返回剩余字段
fun Remained(): String {
return queue.substring(pos)
}
/** /**
* 从剩余字串中拉出一个字符串直到但不包括匹配序列或剩余字串用完 * 从剩余字串中拉出一个字符串直到但不包括匹配序列
* @param seq 分隔字符 **区分大小写** * @param seq 查找的字符串 **区分大小写**
* @return 是否找到相应字段 * @return 是否找到相应字段
*/ */
fun consumeTo(seq: String): Boolean { fun consumeTo(seq: String): Boolean {
@ -45,6 +51,20 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
} else false } else false
} }
/**
* 从剩余字串中拉出一个字符串直到但不包括匹配序列
* @param seq 查找的字符串 **区分大小写**
* @return 返回查找的字符串之前的匹配字段
*/
fun consumeToString(seq: String): String {
start = pos //将处理到的位置设置为规则起点
val offset = queue.indexOf(seq, pos)
return if (offset != -1) {
pos = offset
queue.substring(start, offset)
} else ""
}
/** /**
* 从剩余字串中拉出一个字符串直到但不包括匹配序列匹配参数列表中一项即为匹配或剩余字串用完 * 从剩余字串中拉出一个字符串直到但不包括匹配序列匹配参数列表中一项即为匹配或剩余字串用完
* @param seq 匹配字符串序列 * @param seq 匹配字符串序列
@ -91,50 +111,61 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
} }
//其中js只要符合语法,就不用避开任何阅读关键字,自由发挥 //其中js只要符合语法,就不用避开任何阅读关键字,自由发挥
fun chompJsBalanced( fun chompJsBalanced(innerType:Boolean = true,startPos:Int = pos): String {
f: ((Char) -> Boolean?) = {
when (it) { var pos = startPos //声明变量记录临时处理位置
'{' -> true //开始嵌套一层
'}' -> false //闭合一层嵌套
else -> null
}
}
): Boolean {
var pos = pos //声明变量记录临时处理位置
var depth = 0 //嵌套深度
var bracketsDepth = 0 //[]嵌套深度 var bracketsDepth = 0 //[]嵌套深度
var inSingleQuote = false //单引号 var inSingleQuote = false //单引号
var inDoubleQuote = false //双引号 var inDoubleQuote = false //双引号
var inOtherQuote = false //js原始字串分隔字符 var inOtherQuote = false //js原始字串分隔字符
var regex = false //正则 var inRegex = false //正则
var commit = false //单行注释 var inCommit = false //单行注释
var commits = false //多行注释 var inCommits = false //多行注释
val start:String
val end:String
val endChar:Char
if(innerType){
start = "{{"
end = "}}"
endChar = '}'
}else{
start = "<js>"
end = "</js>"
endChar = '<'
}
pos += start.length //跳过起始字符串
do { do {
if (pos == queue.length) break if (pos == queue.length) break
var c = queue[pos++] var c = queue[pos++]
if (c != '\\') { //非转义字符 if (c != '\\') { //非转义字符
if (c == '\'' && !commits && !commit && !regex && !inDoubleQuote && !inOtherQuote) inSingleQuote = if (c == '\'' && !inCommits && !inCommit && !inRegex && !inDoubleQuote && !inOtherQuote) inSingleQuote =
!inSingleQuote //匹配具有语法功能的单引号 !inSingleQuote //匹配具有语法功能的单引号
else if (c == '"' && !commits && !commit && !regex && !inSingleQuote && !inOtherQuote) inDoubleQuote = else if (c == '"' && !inCommits && !inCommit && !inRegex && !inSingleQuote && !inOtherQuote) inDoubleQuote =
!inDoubleQuote //匹配具有语法功能的双引号 !inDoubleQuote //匹配具有语法功能的双引号
else if (c == '`' && !commits && !commit && !regex && !inSingleQuote && !inDoubleQuote) inOtherQuote = else if (c == '`' && !inCommits && !inCommit && !inRegex && !inSingleQuote && !inDoubleQuote) inOtherQuote =
!inOtherQuote //匹配具有语法功能的'`' !inOtherQuote //匹配具有语法功能的'`'
else if (c == '/' && !commits && !commit && !regex && !inSingleQuote && !inDoubleQuote && !inOtherQuote) { //匹配注释或正则起点 else if (c == '/' && !inCommits && !inCommit && !inRegex && !inSingleQuote && !inDoubleQuote && !inOtherQuote) { //匹配注释或正则起点
c = queue[pos++] c = queue[pos++]
when (c) { when (c) {
'/' -> commit = true //匹配单行注释起点 '/' -> inCommit = true //匹配单行注释起点
'*' -> commits = true //匹配多行注释起点 '*' -> inCommits = true //匹配多行注释起点
else -> regex = true //匹配正则起点 else -> inRegex = true //匹配正则起点
} }
} else if (commits && c == '*') { //匹配多行注释终点 } else if (inCommits) { //匹配多行注释终点
c = queue[pos++]
if (c == '/') commits = false pos = queue.indexOf("*/", pos) //跳过多行注释
} else if (regex && c == '/') { //正则的终点或[]平衡 if(pos == -1)break //没有终点,语法出错,跳出
continue
} else if (inRegex) { //正则的终点或[]平衡
when (c) { when (c) {
'/' -> regex = false//匹配正则终点 '/' -> inRegex = false//匹配正则终点
//为了保证当open为( 且 close 为 )时,正则中[(]或[)]的合法性。故对[]这对在任何规则中都平衡的成对符号做匹配。 //为了保证当open为( 且 close 为 )时,正则中[(]或[)]的合法性。故对[]这对在任何规则中都平衡的成对符号做匹配。
// 注:正则里[(]、[)]、[{]、[}]都是合法的,所以只有[]必须平衡。 // 注:正则里[(]、[)]、[{]、[}]都是合法的,所以只有[]必须平衡。
@ -142,21 +173,20 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
']' -> bracketsDepth-- //闭合一层嵌套[] ']' -> bracketsDepth-- //闭合一层嵌套[]
} }
} else if (c == '\n') commit = false } else if (c == '\n') inCommit = false //单行注释终点
if (commits || commit || regex || inSingleQuote || inDoubleQuote || inOtherQuote) continue //语法单元未匹配结束,直接进入下个循环
val fn = f(c) ?: continue if (inCommits || inCommit || inRegex || inSingleQuote || inDoubleQuote || inOtherQuote) continue //语法单元未匹配结束,直接进入下个循环
if (fn) depth++ else depth-- //嵌套或者闭合
if( c == endChar && queue.regionMatches(pos, end, 0, end.length)) {
this.pos = pos
return queue.substring(startPos + start.length, pos - end.length) //匹配到终点,返回结果
}
} else pos++ } else pos++
} while (depth > 0 || bracketsDepth > 0) //拉出全部符合js语法的字段 } while (bracketsDepth > 0) //拉出全部符合js语法的字段
return ""
return if (depth > 0 || bracketsDepth > 0) false else {
this.pos = pos //同步位置
true
}
} }
/** /**
@ -392,7 +422,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
val posPre = pos //记录consumeTo匹配位置 val posPre = pos //记录consumeTo匹配位置
if (chompCodeBalanced('{', '}')) { if (chompCodeBalanced('{', '}')) {
val frv = fr(queue.substring(posPre + startStep, pos - endStep)) val frv = fr(queue.substring(posPre + startStep, pos - endStep))
if (frv != null) { if (!frv.isNullOrEmpty()) {
st.append(queue.substring(startX, posPre) + frv) //压入内嵌规则前的内容,及内嵌规则解析得到的字符串 st.append(queue.substring(startX, posPre) + frv) //压入内嵌规则前的内容,及内嵌规则解析得到的字符串
startX = pos //记录下次规则起点 startX = pos //记录下次规则起点
continue //获取内容成功,继续选择下个内嵌规则 continue //获取内容成功,继续选择下个内嵌规则
@ -412,11 +442,16 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
*/ */
private const val ESC = '\\' private const val ESC = '\\'
/**
* "<js>"
*/
private const val JSSTART = "<js>"
/** /**
* 阅读共有分隔字串起始部分 * 阅读共有分隔字串起始部分
* "##","@@","{{","{[","<js>", "@js:" * "##","@@","{{","{[","<js>", "@js:"
*/ */
val splitList = arrayOf("##", "@@", "{{", "{[", "<js>", "@js:") val splitList2 = arrayOf("##", "@@", "{{", "{[")
/** /**
* 发现名称-链接分隔字串 * 发现名称-链接分隔字串

@ -12,7 +12,6 @@ import io.legado.app.data.entities.Book
import io.legado.app.data.entities.BookChapter import io.legado.app.data.entities.BookChapter
import io.legado.app.help.AppConfig import io.legado.app.help.AppConfig
import io.legado.app.help.ReadBookConfig import io.legado.app.help.ReadBookConfig
import io.legado.app.model.analyzeRule.AnalyzeUrl
import io.legado.app.ui.book.read.page.entities.TextChapter import io.legado.app.ui.book.read.page.entities.TextChapter
import io.legado.app.ui.book.read.page.entities.TextChar import io.legado.app.ui.book.read.page.entities.TextChar
import io.legado.app.ui.book.read.page.entities.TextLine import io.legado.app.ui.book.read.page.entities.TextLine
@ -150,7 +149,7 @@ object ChapterProvider {
return TextChapter( return TextChapter(
bookChapter.index, bookChapter.title, bookChapter.index, bookChapter.title,
bookChapter.getAbsoluteURL().split(AnalyzeUrl.splitUrlRegex)[0], bookChapter.getAbsoluteURL().split(',',limit = 1)[0], //bookChapter.getAbsoluteURL已经处理过,直接按','就行
textPages, chapterSize textPages, chapterSize
) )
} }

@ -1,8 +1,9 @@
package io.legado.app.utils package io.legado.app.utils
import io.legado.app.constant.AppPattern import io.legado.app.constant.AppPattern.imgPattern
import io.legado.app.model.analyzeRule.AnalyzeUrl import io.legado.app.model.analyzeRule.AnalyzeUrl
import java.net.URL import java.net.URL
import java.util.regex.Pattern
object HtmlFormatter { object HtmlFormatter {
private val wrapHtmlRegex = "</?(?:div|p|br|hr|h\\d|article|dd|dl)[^>]*>".toRegex() private val wrapHtmlRegex = "</?(?:div|p|br|hr|h\\d|article|dd|dl)[^>]*>".toRegex()
@ -14,7 +15,7 @@ object HtmlFormatter {
return html.replace(wrapHtmlRegex, "\n") return html.replace(wrapHtmlRegex, "\n")
.replace(otherRegex, "") .replace(otherRegex, "")
.replace("\\s*\\n+\\s*".toRegex(), "\n  ") .replace("\\s*\\n+\\s*".toRegex(), "\n  ")
.replace("^[\\n\\s]+".toRegex(), "  ") .replace("^[\\n\\s]*".toRegex(), "  ")
.replace("[\\n\\s]+$".toRegex(), "") .replace("[\\n\\s]+$".toRegex(), "")
} }
@ -24,17 +25,32 @@ object HtmlFormatter {
html ?: return "" html ?: return ""
val keepImgHtml = formatKeepImg(html) val keepImgHtml = formatKeepImg(html)
val sb = StringBuffer() val sb = StringBuffer()
val matcher = AppPattern.imgPattern.matcher(keepImgHtml)
//图片有data-开头的数据属性时优先用数据属性作为src,没有数据属性时才匹配src
val hasData = keepImgHtml.matches("<img[^>]*data-".toRegex())
val imgPatternX = if(hasData) Pattern.compile("<img[^>]*data-[^=]*= *\"([^\"])\"[^>]*>", Pattern.CASE_INSENSITIVE) else imgPattern
val matcher = imgPatternX.matcher(keepImgHtml)
var appendPos = 0 var appendPos = 0
while (matcher.find()) { while (matcher.find()) {
val urlArray = matcher.group(1)!!.split(AnalyzeUrl.splitUrlRegex)
var url = NetworkUtils.getAbsoluteURL(redirectUrl, urlArray[0]) var url = matcher.group(1)!!
if (urlArray.size > 1) { val param:String
url = "$url,${urlArray[1]}"
} url = NetworkUtils.getAbsoluteURL(redirectUrl, if(url.indexOf(',') != -1) {
val absoluteUrl = url.split(AnalyzeUrl.splitUrlRegex, 1)[0]
param = url.substring(absoluteUrl.length)
absoluteUrl
} else {
param = ""
url
})
sb.append(keepImgHtml.substring(appendPos, matcher.start())) sb.append(keepImgHtml.substring(appendPos, matcher.start()))
sb.append("<img src=\"$url\" >") sb.append("<img src=\"${url+param}\" >")
appendPos = matcher.end() appendPos = matcher.end()
} }
if (appendPos < keepImgHtml.length) { if (appendPos < keepImgHtml.length) {
sb.append(keepImgHtml.substring(appendPos, keepImgHtml.length)) sb.append(keepImgHtml.substring(appendPos, keepImgHtml.length))

Loading…
Cancel
Save