优化链接分割规则,避免附加参数中存在 ,{ 时导致规则切错

修复<str0,str2,...{{js}}>这种页数列表写法中,js部分内含 < 或 > 会切割错误的问题

优化格式化图片标签操作和匹配图片标签操作,加快图片处理速度。

链接规则中,{{}}里面语法无限制,正则、字符串、注释中可以存在除了“<js>”和“@js:”之外的任意字符串而不被切错
pull/1118/head
bushixuanqi 3 years ago
parent 31be81239b
commit 5c89c0bc27
  1. 4
      app/src/main/java/io/legado/app/constant/AppPattern.kt
  2. 2
      app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt
  3. 58
      app/src/main/java/io/legado/app/model/analyzeRule/RuleAnalyzer.kt
  4. 5
      app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt
  5. 59
      app/src/main/java/io/legado/app/utils/HtmlFormatter.kt

@ -8,8 +8,8 @@ object AppPattern {
Pattern.compile("(<js>[\\w\\W]*?</js>|@js:[\\w\\W]*$)", Pattern.CASE_INSENSITIVE)
val EXP_PATTERN: Pattern = Pattern.compile("\\{\\{([\\w\\W]*?)\\}\\}")
//匹配格式化后的图片
val imgPattern: Pattern = Pattern.compile("<img src=\"([^\"{]+(?:\\{(?:[^{}]|\\{[^{}]*\\})*\\})?)\">")
//匹配格式化后的图片格式
val imgPattern: Pattern = Pattern.compile("<img src=\"([^>]+)\">")
val nameRegex = Regex("\\s+作\\s*者.*|\\s+\\S+\\s+著")
val authorRegex = Regex("^.*?作\\s*者[::\\s]*|\\s+著")

@ -130,7 +130,7 @@ class AnalyzeUrl(
bindings["book"] = book
//替换所有内嵌{{js}}
val url = analyze.innerRule("{{",2,2){
val url = analyze.innerJsRule{
when(val jsEval = SCRIPT_ENGINE.eval(it, bindings)){
is String -> jsEval
jsEval is Double && jsEval % 1.0 == 0.0 -> String.format("%.0f", jsEval)

@ -32,11 +32,6 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
startX = 0
}
//返回剩余字段
fun Remained(): String {
return queue.substring(pos)
}
/**
* 从剩余字串中拉出一个字符串直到但不包括匹配序列
* @param seq 查找的字符串 **区分大小写**
@ -51,20 +46,6 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
} else false
}
/**
* 从剩余字串中拉出一个字符串直到但不包括匹配序列
* @param seq 查找的字符串 **区分大小写**
* @return 返回查找的字符串之前的匹配字段
*/
fun consumeToString(seq: String): String {
start = pos //将处理到的位置设置为规则起点
val offset = queue.indexOf(seq, pos)
return if (offset != -1) {
pos = offset
queue.substring(start, offset)
} else ""
}
/**
* 从剩余字串中拉出一个字符串直到但不包括匹配序列匹配参数列表中一项即为匹配或剩余字串用完
* @param seq 匹配字符串序列
@ -111,7 +92,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
}
//其中js只要符合语法,就不用避开任何阅读关键字,自由发挥
fun chompJsBalanced(innerType:Boolean = true,startPos:Int = pos): String {
fun chompJsBalanced(innerType:Boolean = true,startPos:Int = pos): Boolean {
var pos = startPos //声明变量记录临时处理位置
var bracketsDepth = 0 //[]嵌套深度
@ -179,13 +160,13 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
if( c == endChar && queue.regionMatches(pos, end, 0, end.length)) {
this.pos = pos
return queue.substring(startPos + start.length, pos - end.length) //匹配到终点,返回结果
return true
}
} else pos++
} while (bracketsDepth > 0) //拉出全部符合js语法的字段
return ""
return false
}
@ -403,9 +384,9 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
/**
* 替换内嵌规则
* @param inner 起始标志,{$. {{
* @param inner 起始标志,{$.
* @param startStep 不属于规则部分的前置字符长度{$.{不属于规则的组成部分故startStep为1
* @param endStep 不属于规则部分的后置字符长度}}长度为2
* @param endStep 不属于规则部分的后置字符长度
* @param fr 查找到内嵌规则时用于解析的函数
*
* */
@ -415,7 +396,6 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
endStep: Int = 1,
fr: (String) -> String?
): String {
val st = StringBuilder()
while (consumeTo(inner)) { //拉取成功返回true,ruleAnalyzes里的字符序列索引变量pos后移相应位置,否则返回false,且isEmpty为true
@ -436,6 +416,34 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
}.toString()
}
/**
* 替换内嵌规则
* @param fr 查找到内嵌规则时用于解析的函数
*
* */
fun innerJsRule(
fr: (String) -> String?
): String {
val st = StringBuilder()
while (consumeTo("{{")) { //拉取成功返回true,ruleAnalyzes里的字符序列索引变量pos后移相应位置,否则返回false,且isEmpty为true
val posPre = pos //记录consumeTo匹配位置
if (chompJsBalanced()) {
val frv = fr(queue.substring(posPre + 2, pos - 2))
if (!frv.isNullOrEmpty()) {
st.append(queue.substring(startX, posPre) + frv) //压入内嵌规则前的内容,及内嵌规则解析得到的字符串
startX = pos //记录下次规则起点
continue //获取内容成功,继续选择下个内嵌规则
}
}
pos += 2 //拉出字段不平衡,inner只是个普通字串,跳到此inner后继续匹配
}
return if(startX == 0) "" else st.apply {
append(queue.substring(startX))
}.toString()
}
companion object {
/**
* 转义字符

@ -12,6 +12,7 @@ import io.legado.app.data.entities.Book
import io.legado.app.data.entities.BookChapter
import io.legado.app.help.AppConfig
import io.legado.app.help.ReadBookConfig
import io.legado.app.model.analyzeRule.AnalyzeUrl
import io.legado.app.ui.book.read.page.entities.TextChapter
import io.legado.app.ui.book.read.page.entities.TextChar
import io.legado.app.ui.book.read.page.entities.TextLine
@ -110,7 +111,7 @@ object ChapterProvider {
isTitle, textPaint, srcList
)
}
} else {
} else if (book.getImageStyle() != Book.imgStyleText) {
content.replace(AppPattern.imgPattern.toRegex(), "\n\$0\n")
.split("\n").forEach { text ->
if (text.isNotBlank()) {
@ -125,7 +126,7 @@ object ChapterProvider {
}
} else { //图片
durY = setTypeImage(
book, bookChapter, text,
book, bookChapter, text.substring(10, text.length-2),
durY, textPages, book.getImageStyle()
)
}

@ -21,61 +21,64 @@ object HtmlFormatter {
fun formatKeepImg(html: String?, redirectUrl: URL?): String {
html ?: return ""
val keepImgHtml = formatKeepImg(html)
val sb = StringBuffer()
val keepImgHtml = html.replace(wrapHtmlRegex, "\n")
.replace(notImgHtmlRegex, "")
.replace("[\\n\\s]+\$|^[\\n\\s]*".toRegex(), "")
.replace("\\s*\\n+\\s*".toRegex(), "\n")
val sb = StringBuffer("  ") //前置缩减
val hasDataType:Boolean //是否有数据属性
//图片有data-开头的数据属性时优先用数据属性作为src,没有数据属性时匹配src
val imgPattern = Pattern.compile(
if(keepImgHtml.matches("  <img[^>]*data-".toRegex())) "<img[^>]*data-[^=]*= *\"([^\"])\"[^>]*>"
else "  <img[^>]*src *= *\"([^\"{]+(?:\\{(?:[^{}]|\\{[^{}]*\\})*\\})?)\"[^>]*>", Pattern.CASE_INSENSITIVE
if(keepImgHtml.matches("<img[^>]*data-".toRegex())) {
hasDataType = true
"<img[^>]*data-[^=]*= *\"([^\"])\"[^>]*>"
}
else {
hasDataType = false
"<img[^>]*src *= *\"([^\"{]+(?:\\{(?:[^{}]|\\{[^{}]*\\})*\\})?)\"[^>]*>"
}, Pattern.CASE_INSENSITIVE
)
val matcher = imgPattern.matcher(keepImgHtml)
var appendPos = 0
if(matcher.find()){
var url = matcher.group(1)!!
var pos = url.indexOf(',')
sb.append(keepImgHtml.substring(appendPos, matcher.start()))
sb.append(
"<img src=\"${
if (pos == -1) url else NetworkUtils.getAbsoluteURL(
redirectUrl,
url.substring(0, pos)
) + url.substring(pos)
}\">"
)
appendPos = matcher.end()
if(pos == -1) {
while (matcher.find()) {
sb.append(keepImgHtml.substring(appendPos, matcher.start()))
if(hasDataType || matcher.group(1)!!.indexOf(',') == -1) { //图片无参
do{
sb.append(keepImgHtml.substring(appendPos, matcher.start()).replace("\n","\n  ")) //非图片部分换行缩减
sb.append( "<img src=\"${
NetworkUtils.getAbsoluteURL(redirectUrl,matcher.group(1)!!)
}\">" )
appendPos = matcher.end()
}
}else{
while (matcher.find()) {
url = matcher.group(1)!!
pos = url.indexOf(',')
sb.append(keepImgHtml.substring(appendPos, matcher.start()))
}while (matcher.find())
}else{ //图片有参
do{
val url = matcher.group(1)!!
val pos = url.indexOf(',')
sb.append(keepImgHtml.substring(appendPos, matcher.start()).replace("\n","\n  ")) //非图片部分换行缩减
sb.append(
"<img src=\"${
NetworkUtils.getAbsoluteURL(
redirectUrl,
url.substring(0, pos)
)
}${
},${
url.substring(pos)
}\">"
)
appendPos = matcher.end()
}
}while(matcher.find())
}
}
if (appendPos < keepImgHtml.length) {
sb.append(keepImgHtml.substring(appendPos, keepImgHtml.length))
sb.append(keepImgHtml.substring(appendPos, keepImgHtml.length).replace("\n","\n  ")) //非图片部分换行缩减
}
return sb.toString()
}

Loading…
Cancel
Save