Merge pull request #1122 from gedoor/revert-1121-master

Revert "修复与优化"
pull/1125/head
kunfei 3 years ago committed by GitHub
commit c8bb52d0c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 8
      app/src/main/java/io/legado/app/constant/AppPattern.kt
  2. 16
      app/src/main/java/io/legado/app/data/entities/BookChapter.kt
  3. 162
      app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt
  4. 113
      app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt
  5. 347
      app/src/main/java/io/legado/app/model/analyzeRule/RuleAnalyzer.kt
  6. 30
      app/src/main/java/io/legado/app/service/help/ReadBook.kt
  7. 20
      app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt
  8. 71
      app/src/main/java/io/legado/app/utils/HtmlFormatter.kt

@ -5,11 +5,11 @@ import java.util.regex.Pattern
@Suppress("RegExpRedundantEscape")
object AppPattern {
val JS_PATTERN: Pattern =
Pattern.compile("<js>([\\w\\W]+?)</js>|@js:([\\w\\W]*)", Pattern.CASE_INSENSITIVE)
Pattern.compile("(<js>[\\w\\W]*?</js>|@js:[\\w\\W]*$)", Pattern.CASE_INSENSITIVE)
val EXP_PATTERN: Pattern = Pattern.compile("\\{\\{([\\w\\W]*?)\\}\\}")
//匹配格式化后的图片格式
val imgPattern: Pattern = Pattern.compile("<img src=\"([^>]+)\">")
//图片有data-开头的数据属性时优先用数据属性作为src,没有数据属性时才匹配src
val imgPattern: Pattern =
Pattern.compile("<img(?:(?![^>]*data-)[^>]*src|[^>]*data-)[^=]*= *\"([^\"{]+(?:\\{(?:[^{}]|\\{[^{}]*\\})*\\})?)\"[^>]*>", Pattern.CASE_INSENSITIVE)
val nameRegex = Regex("\\s+作\\s*者.*|\\s+\\S+\\s+著")
val authorRegex = Regex("^.*?作\\s*者[::\\s]*|\\s+著")

@ -5,6 +5,7 @@ import androidx.room.Entity
import androidx.room.ForeignKey
import androidx.room.Ignore
import androidx.room.Index
import io.legado.app.model.analyzeRule.AnalyzeUrl
import io.legado.app.utils.GSON
import io.legado.app.utils.MD5Utils
import io.legado.app.utils.NetworkUtils
@ -12,6 +13,7 @@ import io.legado.app.utils.fromJsonObject
import kotlinx.parcelize.IgnoredOnParcel
import kotlinx.parcelize.Parcelize
@Parcelize
@Entity(
tableName = "chapters",
@ -61,13 +63,13 @@ data class BookChapter(
return false
}
fun getAbsoluteURL():String{
val pos = url.indexOf(',')
return if(pos == -1) NetworkUtils.getAbsoluteURL(baseUrl,url)
else NetworkUtils.getAbsoluteURL(
baseUrl,
url.substring(0, pos)
) + url.substring(pos)
fun getAbsoluteURL(): String {
val urlArray = url.split(AnalyzeUrl.splitUrlRegex)
var absoluteUrl = NetworkUtils.getAbsoluteURL(baseUrl, urlArray[0])
if (urlArray.size > 1) {
absoluteUrl = "$absoluteUrl,${urlArray[1]}"
}
return absoluteUrl
}
fun getFileName(): String = String.format("%05d-%s.nb", index, MD5Utils.md5Encode16(title))

@ -25,9 +25,7 @@ import kotlin.collections.HashMap
@Keep
@Suppress("unused", "RegExpRedundantEscape")
class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
var book: BaseBook? = if (ruleData is BaseBook) ruleData else null
var book: BaseBook? = null
var chapter: BookChapter? = null
var nextChapterUrl: String? = null
var content: Any? = null
@ -44,11 +42,18 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
private var objectChangedJS = false
private var objectChangedJP = false
init {
if (ruleData is BaseBook) {
book = ruleData
}
}
@JvmOverloads
fun setContent(content: Any?, baseUrl: String? = null): AnalyzeRule {
if (content == null) throw AssertionError("内容不可空(Content cannot be null)")
if (content == null) throw AssertionError("Content cannot be null")
this.content = content
setBaseUrl(baseUrl)
isJSON = content.toString().isJson()
objectChangedXP = true
objectChangedJS = true
objectChangedJP = true
@ -64,8 +69,7 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
fun setRedirectUrl(url: String): URL? {
kotlin.runCatching {
val pos = url.indexOf(',')
redirectUrl = URL( if(pos == -1) url else url.substring(0,pos))
redirectUrl = URL(url.split(AnalyzeUrl.splitUrlRegex, 1)[0])
}
return redirectUrl
}
@ -356,72 +360,58 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
/**
* 分解规则生成规则列表
*/
fun splitSourceRule(ruleStr: String?): List<SourceRule> {
if (ruleStr.isNullOrEmpty()) return ArrayList<SourceRule>()
fun splitSourceRule(ruleStr: String?, mode: Mode = Mode.Default): List<SourceRule> {
var vRuleStr = ruleStr
val ruleList = ArrayList<SourceRule>()
if (vRuleStr.isNullOrEmpty()) return ruleList
//检测Mode
var mMode: Mode = Mode.Default
fun mode(ruleStr0:String)=when {
ruleStr0.startsWith("@@") -> {
mMode = Mode.Default
ruleStr0.substring(2)
}
ruleStr0.startsWith("@XPath:", true) -> {
mMode = Mode.XPath
ruleStr0.substring(7)
var mMode: Mode = mode
when {
vRuleStr.startsWith("@@") -> {
vRuleStr = vRuleStr.substring(2)
}
ruleStr0.startsWith("/") -> {//XPath特征很明显,无需配置单独的识别标头
vRuleStr.startsWith("@XPath:", true) -> {
mMode = Mode.XPath
ruleStr0
vRuleStr = vRuleStr.substring(7)
}
ruleStr0.startsWith("@Json:", true) -> {
vRuleStr.startsWith("@Json:", true) -> {
mMode = Mode.Json
ruleStr0.substring(6)
vRuleStr = vRuleStr.substring(6)
}
ruleStr0.startsWith(":") -> { //:与伪类选择器冲突,改成?更合理
vRuleStr.startsWith(":") -> {
mMode = Mode.Regex
isRegex = true
ruleStr0.substring(1)
}
( ruleStr0[1] == '.' || ruleStr0[1] == '[') && ruleStr0[0] == '$' || content.toString().isJson() -> {
mMode = Mode.Json
ruleStr0
}
else -> {
mMode = Mode.Default
ruleStr0
vRuleStr = vRuleStr.substring(1)
}
isRegex -> mMode = Mode.Regex
isJSON -> mMode = Mode.Json
}
//拆分为规则列表
var start = 0
var tmp: String
val jsMatcher = JS_PATTERN.matcher(ruleStr)
while (jsMatcher.find()){
val jsMatcher = JS_PATTERN.matcher(vRuleStr)
while (jsMatcher.find()) {
if (jsMatcher.start() > start) {
tmp = ruleStr.substring(start, jsMatcher.start()).trim { it <= ' ' }
if (tmp.isNotEmpty()) {
ruleList.add(SourceRule(mode(tmp), mMode))
tmp = vRuleStr.substring(start, jsMatcher.start()).trim { it <= ' ' }
if (!TextUtils.isEmpty(tmp)) {
ruleList.add(SourceRule(tmp, mMode))
}
}
ruleList.add(SourceRule(jsMatcher.group(2)?:jsMatcher.group(1), Mode.Js))
ruleList.add(SourceRule(jsMatcher.group(), Mode.Js))
start = jsMatcher.end()
}
if (ruleStr.length > start){
tmp = ruleStr.substring(start).trim { it <= ' ' }
if (tmp.isNotEmpty()) {
ruleList.add(SourceRule(mode(tmp), mMode))
if (vRuleStr.length > start) {
tmp = vRuleStr.substring(start).trim { it <= ' ' }
if (!TextUtils.isEmpty(tmp)) {
ruleList.add(SourceRule(tmp, mMode))
}
}
return ruleList
}
/**
* 规则类
*/
inner class SourceRule internal constructor(ruleStr: String, mainMode: Mode = Mode.Default) {
internal var mode: Mode
internal var rule: String
@ -437,44 +427,60 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
init {
this.mode = mainMode
//分离put
rule = splitPutRule(ruleStr, putMap)
//@get,{{ }}, 拆分
var start = 0
var tmp: String
val evalMatcher = evalPattern.matcher(rule)
if(evalMatcher.find()){
var modeX = mode == Mode.Js || mode == Mode.Regex
if (evalMatcher.start() > 0 ) {
tmp = rule.substring(0, evalMatcher.start())
modeX = modeX || tmp.contains("##")
splitRegex(tmp)
if (mode == Mode.Js) {
rule = if (ruleStr.startsWith("<js>")) {
ruleStr.substring(4, ruleStr.lastIndexOf("<"))
} else {
ruleStr.substring(4)
}
if(!modeX)mode = Mode.Regex
tmp = evalMatcher.group()
} else {
when {
tmp.startsWith("@get:", true) -> {
ruleType.add(getRuleType)
ruleParam.add(tmp.substring(6, tmp.lastIndex))
ruleStr.startsWith("@CSS:", true) -> {
mode = Mode.Default
rule = ruleStr
}
tmp.startsWith("{{") -> {
ruleType.add(jsRuleType)
ruleParam.add(tmp.substring(2, tmp.length - 2))
ruleStr.startsWith("@@") -> {
mode = Mode.Default
rule = ruleStr.substring(2)
}
else -> {
splitRegex(tmp)
ruleStr.startsWith("@XPath:", true) -> {
mode = Mode.XPath
rule = ruleStr.substring(7)
}
ruleStr.startsWith("//") -> {//XPath特征很明显,无需配置单独的识别标头
mode = Mode.XPath
rule = ruleStr
}
start = evalMatcher.end()
while (evalMatcher.find()){
ruleStr.startsWith("@Json:", true) -> {
mode = Mode.Json
rule = ruleStr.substring(6)
}
ruleStr.startsWith("$.") -> {
mode = Mode.Json
rule = ruleStr
}
else -> rule = ruleStr
}
}
//分离put
rule = splitPutRule(rule, putMap)
//@get,{{ }}, 拆分
var start = 0
var tmp: String
val evalMatcher = evalPattern.matcher(rule)
while (evalMatcher.find()) {
if (evalMatcher.start() > start) {
tmp = rule.substring(start, evalMatcher.start())
if (mode != Mode.Js && mode != Mode.Regex
&& start == 0 && !tmp.contains("##")
) {
mode = Mode.Regex
}
splitRegex(tmp)
} else if (mode != Mode.Js && mode != Mode.Regex
&& evalMatcher.start() == 0
) {
mode = Mode.Regex
}
tmp = evalMatcher.group()
when {
@ -492,8 +498,6 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
}
start = evalMatcher.end()
}
}
if (rule.length > start) {
tmp = rule.substring(start)
splitRegex(tmp)
@ -508,12 +512,10 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
var tmp: String
val ruleStrArray = ruleStr.split("##")
val regexMatcher = regexPattern.matcher(ruleStrArray[0])
if(regexMatcher.find()) {
while (regexMatcher.find()) {
if (mode != Mode.Js && mode != Mode.Regex) {
mode = Mode.Regex
}
do{
if (regexMatcher.start() > start) {
tmp = ruleStr.substring(start, regexMatcher.start())
ruleType.add(defaultRuleType)
@ -523,7 +525,6 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
ruleType.add(tmp.substring(1).toInt())
ruleParam.add(tmp)
start = regexMatcher.end()
}while (regexMatcher.find())
}
if (ruleStr.length > start) {
tmp = ruleStr.substring(start)
@ -682,6 +683,7 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
companion object {
private val putPattern = Pattern.compile("@put:(\\{[^}]+?\\})", Pattern.CASE_INSENSITIVE)
private val getPattern = Pattern.compile("@get:\\{([^}]+?)\\}", Pattern.CASE_INSENSITIVE)
private val evalPattern =
Pattern.compile("@get:\\{[^}]+?\\}|\\{\\{[\\w\\W]*?\\}\\}", Pattern.CASE_INSENSITIVE)
private val regexPattern = Pattern.compile("\\$\\d{1,2}")

@ -1,11 +1,13 @@
package io.legado.app.model.analyzeRule
import android.annotation.SuppressLint
import android.text.TextUtils
import androidx.annotation.Keep
import com.bumptech.glide.load.model.GlideUrl
import com.bumptech.glide.load.model.LazyHeaders
import io.legado.app.constant.AppConst.SCRIPT_ENGINE
import io.legado.app.constant.AppConst.UA_NAME
import io.legado.app.constant.AppPattern.EXP_PATTERN
import io.legado.app.constant.AppPattern.JS_PATTERN
import io.legado.app.data.entities.BaseBook
import io.legado.app.data.entities.BookChapter
@ -39,6 +41,7 @@ class AnalyzeUrl(
headerMapF: Map<String, String>? = null
) : JsExtensions {
companion object {
val splitUrlRegex = Regex(",\\s*(?=\\{)")
private val pagePattern = Pattern.compile("<(.*?)>")
}
@ -55,8 +58,7 @@ class AnalyzeUrl(
private var retry: Int = 0
init {
val pos = baseUrl.indexOf(',')
if(pos != -1)baseUrl = baseUrl.substring(0,pos)
baseUrl = baseUrl.split(splitUrlRegex, 1)[0]
headerMapF?.let {
headerMap.putAll(it)
if (it.containsKey("proxy")) {
@ -72,24 +74,39 @@ class AnalyzeUrl(
}
private fun analyzeJs() {
val ruleList = arrayListOf<String>()
var start = 0
var tmp: String
val jsMatcher = JS_PATTERN.matcher(ruleUrl)
while (jsMatcher.find()) {
if (jsMatcher.start() > start) {
tmp =
ruleUrl.substring(start, jsMatcher.start()).trim { it <= ' ' }
if (tmp.isNotEmpty()) {
ruleUrl = tmp.replace("@result", ruleUrl)
ruleUrl.substring(start, jsMatcher.start()).replace("\n", "").trim { it <= ' ' }
if (!TextUtils.isEmpty(tmp)) {
ruleList.add(tmp)
}
}
ruleUrl = evalJS(jsMatcher.group(2)?:jsMatcher.group(1), ruleUrl) as String
ruleList.add(jsMatcher.group())
start = jsMatcher.end()
}
if (ruleUrl.length > start) {
tmp = ruleUrl.substring(start).trim { it <= ' ' }
if (tmp.isNotEmpty()) {
ruleUrl = tmp.replace("@result", ruleUrl)
tmp = ruleUrl.substring(start).replace("\n", "").trim { it <= ' ' }
if (!TextUtils.isEmpty(tmp)) {
ruleList.add(tmp)
}
}
for (rule in ruleList) {
var ruleStr = rule
when {
ruleStr.startsWith("<js>") -> {
ruleStr = ruleStr.substring(4, ruleStr.lastIndexOf("<"))
ruleUrl = evalJS(ruleStr, ruleUrl) as String
}
ruleStr.startsWith("@js", true) -> {
ruleStr = ruleStr.substring(4)
ruleUrl = evalJS(ruleStr, ruleUrl) as String
}
else -> ruleUrl = ruleStr.replace("@result", ruleUrl)
}
}
}
@ -97,12 +114,23 @@ class AnalyzeUrl(
/**
* 替换关键字,页数,JS
*/
private fun replaceKeyPageJs() { //先替换内嵌规则再替换页数规则,避免内嵌规则中存在大于小于号时,规则被切错
private fun replaceKeyPageJs() {
//page
page?.let {
val matcher = pagePattern.matcher(ruleUrl)
while (matcher.find()) {
val pages = matcher.group(1)!!.split(",")
ruleUrl = if (page <= pages.size) {
ruleUrl.replace(matcher.group(), pages[page - 1].trim { it <= ' ' })
} else {
ruleUrl.replace(matcher.group(), pages.last().trim { it <= ' ' })
}
}
}
//js
if (ruleUrl.contains("{{") && ruleUrl.contains("}}")) {
val analyze = RuleAnalyzer(ruleUrl) //创建解析
var jsEval: Any
val sb = StringBuffer()
val bindings = SimpleBindings()
bindings["java"] = this
bindings["cookie"] = CookieStore
@ -113,28 +141,21 @@ class AnalyzeUrl(
bindings["speakText"] = speakText
bindings["speakSpeed"] = speakSpeed
bindings["book"] = book
//替换所有内嵌{{js}}
val url = analyze.innerRule("{{","}}"){
when(val jsEval = SCRIPT_ENGINE.eval(it, bindings)){
is String -> jsEval
jsEval is Double && jsEval % 1.0 == 0.0 -> String.format("%.0f", jsEval)
else -> jsEval.toString()
}
}
if(url.isNotEmpty())ruleUrl = url
}
//page
page?.let {
val matcher = pagePattern.matcher(ruleUrl)
while (matcher.find()) {
val pages = matcher.group(1)!!.split(",")
ruleUrl = if (page < pages.size) { //pages[pages.size - 1]等同于pages.last()
ruleUrl.replace(matcher.group(), pages[page - 1].trim { it <= ' ' })
val expMatcher = EXP_PATTERN.matcher(ruleUrl)
while (expMatcher.find()) {
jsEval = expMatcher.group(1)?.let {
SCRIPT_ENGINE.eval(it, bindings)
} ?: ""
if (jsEval is String) {
expMatcher.appendReplacement(sb, jsEval)
} else if (jsEval is Double && jsEval % 1.0 == 0.0) {
expMatcher.appendReplacement(sb, String.format("%.0f", jsEval))
} else {
ruleUrl.replace(matcher.group(), pages.last().trim { it <= ' ' })
expMatcher.appendReplacement(sb, jsEval.toString())
}
}
expMatcher.appendTail(sb)
ruleUrl = sb.toString()
}
}
@ -142,20 +163,15 @@ class AnalyzeUrl(
* 处理URL
*/
private fun initUrl() {
var pos = ruleUrl.indexOf(',')
urlHasQuery = if(pos == -1) ruleUrl else ruleUrl.substring(0,pos)
url = NetworkUtils.getAbsoluteURL(baseUrl,urlHasQuery )
var urlArray = ruleUrl.split(splitUrlRegex, 2)
url = NetworkUtils.getAbsoluteURL(baseUrl, urlArray[0])
urlHasQuery = urlArray[0]
NetworkUtils.getBaseUrl(url)?.let {
baseUrl = it
}
if(pos != -1 ) {
GSON.fromJsonObject<UrlOption>(ruleUrl.substring(pos + 1).trim{ it < '!'})?.let { option ->
if (urlArray.size > 1) {
val option = GSON.fromJsonObject<UrlOption>(urlArray[1])
option?.let { _ ->
option.method?.let {
if (it.equals("POST", true)) method = RequestMethod.POST
}
@ -185,17 +201,16 @@ class AnalyzeUrl(
retry = option.retry
}
}
headerMap[UA_NAME] ?: let {
headerMap[UA_NAME] = AppConfig.userAgent
}
when (method) {
RequestMethod.GET -> {
if (!useWebView) {
pos = url.indexOf('?')
if(pos != -1) {
analyzeFields(url.substring(pos + 1))
url = url.substring(0,pos)
urlArray = url.split("?")
url = urlArray[0]
if (urlArray.size > 1) {
analyzeFields(urlArray[1])
}
}
}
@ -218,7 +233,7 @@ class AnalyzeUrl(
for (query in queryS) {
val queryM = query.splitNotBlank("=")
val value = if (queryM.size > 1) queryM[1] else ""
if (charset.isNullOrEmpty()) {
if (TextUtils.isEmpty(charset)) {
if (NetworkUtils.hasUrlEncoded(value)) {
fieldMap[queryM[0]] = value
} else {

@ -1,9 +1,5 @@
package io.legado.app.model.analyzeRule
import io.legado.app.utils.isJson
import java.util.ArrayList
import java.util.regex.Pattern
//通用的规则切分处理
class RuleAnalyzer(data: String, code: Boolean = false) {
@ -13,10 +9,12 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
private var start = 0 //当前处理字段的开始
private var startX = 0 //当前规则的开始
private var rule = ArrayList<String>() //分割出的规则列表
private var rule = arrayOf<String>() //分割出的规则列表
private var step: Int = 0 //分割字符的长度
var elementsType = "" //当前分割字符串
var innerType = true //是否为内嵌{{}}
//设置平衡组函数,json或JavaScript时设置成chompCodeBalanced,否则为chompRuleBalanced
val chompBalanced = if (code) ::chompCodeBalanced else ::chompRuleBalanced
fun trim() { // 修剪当前规则之前的"@"或者空白符
if(queue[pos] == '@' || queue[pos] < '!') { //在while里重复设置start和startX会拖慢执行速度,所以先来个判断是否存在需要修剪的字段,最后再一次性设置start和startX
@ -34,8 +32,8 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
}
/**
* 从剩余字串中拉出一个字符串直到但不包括匹配序列
* @param seq 查找的字符串 **区分大小写**
* 从剩余字串中拉出一个字符串直到但不包括匹配序列或剩余字串用完
* @param seq 分隔字符 **区分大小写**
* @return 是否找到相应字段
*/
fun consumeTo(seq: String): Boolean {
@ -68,31 +66,6 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
pos++ //逐个试探
}
return false
}
/**
* 从剩余字串中拉出一个字符串直到但不包括匹配序列匹配参数列表中一项即为匹配或剩余字串用完
* @param seq 匹配字符串序列
* @return 成功返回true并设置间隔失败则直接返回fasle
*/
fun chompToAny(vararg seq: String): Boolean {
var pos = pos //声明新变量记录匹配位置,不更改类本身的位置
while (pos != queue.length) {
for (s in seq) {
if (queue.regionMatches(pos, s, 0, s.length)) {
rule += queue.substring(this.pos, pos)
pos += s.length //跳过分隔符
ruleTypeList += s //追加类型到列表
start = this.pos
this.pos = pos //匹配成功, 同步处理位置到类
return true //匹配就返回 true
}
}
pos++ //逐个试探
}
return false
}
@ -117,6 +90,75 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
return -1
}
//其中js只要符合语法,就不用避开任何阅读关键字,自由发挥
fun chompJsBalanced(
f: ((Char) -> Boolean?) = {
when (it) {
'{' -> true //开始嵌套一层
'}' -> false //闭合一层嵌套
else -> null
}
}
): Boolean {
var pos = pos //声明变量记录临时处理位置
var depth = 0 //嵌套深度
var bracketsDepth = 0 //[]嵌套深度
var inSingleQuote = false //单引号
var inDoubleQuote = false //双引号
var inOtherQuote = false //js原始字串分隔字符
var regex = false //正则
var commit = false //单行注释
var commits = false //多行注释
do {
if (pos == queue.length) break
var c = queue[pos++]
if (c != '\\') { //非转义字符
if (c == '\'' && !commits && !commit && !regex && !inDoubleQuote && !inOtherQuote) inSingleQuote =
!inSingleQuote //匹配具有语法功能的单引号
else if (c == '"' && !commits && !commit && !regex && !inSingleQuote && !inOtherQuote) inDoubleQuote =
!inDoubleQuote //匹配具有语法功能的双引号
else if (c == '`' && !commits && !commit && !regex && !inSingleQuote && !inDoubleQuote) inOtherQuote =
!inOtherQuote //匹配具有语法功能的'`'
else if (c == '/' && !commits && !commit && !regex && !inSingleQuote && !inDoubleQuote && !inOtherQuote) { //匹配注释或正则起点
c = queue[pos++]
when (c) {
'/' -> commit = true //匹配单行注释起点
'*' -> commits = true //匹配多行注释起点
else -> regex = true //匹配正则起点
}
} else if (commits && c == '*') { //匹配多行注释终点
c = queue[pos++]
if (c == '/') commits = false
} else if (regex && c == '/') { //正则的终点或[]平衡
when (c) {
'/' -> regex = false//匹配正则终点
//为了保证当open为( 且 close 为 )时,正则中[(]或[)]的合法性。故对[]这对在任何规则中都平衡的成对符号做匹配。
// 注:正则里[(]、[)]、[{]、[}]都是合法的,所以只有[]必须平衡。
'[' -> bracketsDepth++ //开始嵌套一层[]
']' -> bracketsDepth-- //闭合一层嵌套[]
}
} else if (c == '\n') commit = false
if (commits || commit || regex || inSingleQuote || inDoubleQuote || inOtherQuote) continue //语法单元未匹配结束,直接进入下个循环
val fn = f(c) ?: continue
if (fn) depth++ else depth-- //嵌套或者闭合
} else pos++
} while (depth > 0 || bracketsDepth > 0) //拉出全部符合js语法的字段
return if (depth > 0 || bracketsDepth > 0) false else {
this.pos = pos //同步位置
true
}
}
/**
* 拉出一个非内嵌代码平衡组存在转义文本
*/
@ -194,7 +236,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
* 不用正则,不到最后不切片也不用中间变量存储,只在序列中标记当前查找字段的开头结尾,到返回时才切片,高效快速准确切割规则
* 解决jsonPath自带的"&&""||"与阅读的规则冲突,以及规则正则或字符串中包含"&&""||""%%""@"导致的冲突
*/
tailrec fun splitRule(vararg split: String): ArrayList<String> { //首段匹配,elementsType为空
tailrec fun splitRule(vararg split: String): Array<String> { //首段匹配,elementsType为空
if (split.size == 1) {
elementsType = split[0] //设置分割字串
@ -218,7 +260,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
if (st == -1) {
rule = arrayListOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
rule = arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
elementsType = queue.substring(end, end + step) //设置组合类型
pos = end + step //跳过分隔符
@ -235,7 +277,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组
rule = arrayListOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
rule = arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
elementsType = queue.substring(end, end + step) //设置组合类型
pos = end + step //跳过分隔符
@ -269,7 +311,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
}
@JvmName("splitRuleNext")
private tailrec fun splitRule(): ArrayList<String> { //二段匹配被调用,elementsType非空(已在首段赋值),直接按elementsType查找,比首段采用的方式更快
private tailrec fun splitRule(): Array<String> { //二段匹配被调用,elementsType非空(已在首段赋值),直接按elementsType查找,比首段采用的方式更快
val end = pos //记录分隔位置
pos = start //重回开始,启动另一种查找
@ -294,7 +336,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组
rule += arrayListOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
rule += arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
pos = end + step //跳过分隔符
while (consumeTo(elementsType) && pos < st) { //循环切分规则压入数组
@ -331,9 +373,9 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
/**
* 替换内嵌规则
* @param inner 起始标志,{$.
* @param inner 起始标志,{$. {{
* @param startStep 不属于规则部分的前置字符长度{$.{不属于规则的组成部分故startStep为1
* @param endStep 不属于规则部分的后置字符长度
* @param endStep 不属于规则部分的后置字符长度}}长度为2
* @param fr 查找到内嵌规则时用于解析的函数
*
* */
@ -343,13 +385,14 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
endStep: Int = 1,
fr: (String) -> String?
): String {
val st = StringBuilder()
while (consumeTo(inner)) { //拉取成功返回true,ruleAnalyzes里的字符序列索引变量pos后移相应位置,否则返回false,且isEmpty为true
val posPre = pos //记录consumeTo匹配位置
if (chompCodeBalanced('{', '}')) {
val frv = fr(queue.substring(posPre + startStep, pos - endStep))
if (!frv.isNullOrEmpty()) {
if (frv != null) {
st.append(queue.substring(startX, posPre) + frv) //压入内嵌规则前的内容,及内嵌规则解析得到的字符串
startX = pos //记录下次规则起点
continue //获取内容成功,继续选择下个内嵌规则
@ -363,206 +406,64 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
}.toString()
}
companion object {
/**
* 替换内嵌规则
* @param fr 查找到内嵌规则时用于解析的函数
*
* */
fun innerRule(
startStr:String,
endStr:String,
fr: (String) -> String?
): String {
val st = StringBuilder()
while (consumeTo(startStr)) { //拉取成功返回true,ruleAnalyzes里的字符序列索引变量pos后移相应位置,否则返回false,且isEmpty为true
pos += startStr.length //跳过开始字符串
val posPre = pos //记录consumeTo匹配位置
if (consumeTo(endStr)) {
val frv = fr(queue.substring(posPre, pos))
st.append(queue.substring(startX, posPre - startStr.length) + frv) //压入内嵌规则前的内容,及内嵌规则解析得到的字符串
pos += endStr.length //跳过结束字符串
startX = pos //记录下次规则起点
}
}
return if(startX == 0) queue else st.apply {
append(queue.substring(startX))
}.toString()
}
* 转义字符
*/
private const val ESC = '\\'
val ruleTypeList = ArrayList<String>()
//设置平衡组函数,json或JavaScript时设置成chompCodeBalanced,否则为chompRuleBalanced
val chompBalanced = if (code) ::chompCodeBalanced else ::chompRuleBalanced
enum class Mode {
XPath, Json, Default, Js, Regex
}
/**
* 不用正则,不到最后不切片也不用中间变量存储,只在序列中标记当前查找字段的开头结尾,到返回时才切片,高效快速准确切割规则
* 解决jsonPath自带的"&&""||"与阅读的规则冲突,以及规则正则或字符串中包含"&&""||""%%""@"导致的冲突
* 阅读共有分隔字串起始部分
* "##","@@","{{","{[","<js>", "@js:"
*/
tailrec fun splitAnyRule(): ArrayList<String> { //首段匹配,elementsType为空
if (!consumeToAny(* STARTSTR)) { //未找到分隔符
rule += queue.substring(startX)
return rule
}
val splitList = arrayOf("##", "@@", "{{", "{[", "<js>", "@js:")
val end = pos //记录分隔位置
pos = start //重回开始,启动另一种查找
do {
val st = findToAny('[', '(') //查找筛选器位置
if (st == -1) {
rule += arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
ruleTypeList += queue.substring(end, end + step) //追加类型到类型列表
pos = end + step //跳过分隔符
while (!chompToAny(elementsType)) { //循环切分规则压入数组
rule += queue.substring(pos) //将剩余字段压入数组末尾
return rule
}
}
if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组
rule += arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
ruleTypeList += queue.substring(end, end + step) //设置组合类型
pos = end + step //跳过分隔符
while (!chompToAny(elementsType) && pos >= st) { //循环切分规则压入数组
if (pos > st) {
startX = start
} else { //执行到此,证明后面再无分隔字符
rule += queue.substring(pos) //将剩余字段压入数组末尾
return rule
}
}
}
pos = st //位置回到筛选器处
val next = if (queue[pos] == '[') ']' else ')' //平衡组末尾字符
if (!chompBalanced(queue[pos], next)){
ruleTypeList.clear()
rule.clear()
consumeToAny("<js>","@js:")
rule += queue.substring(0,pos)
ruleTypeList += queue.substring(pos, pos + 4) //设置组合类型
}
} while (end > pos)
start = pos //设置开始查找筛选器位置的起始位置
return splitAnyRule() //递归调用首段匹配
}
var isJSON = false
var isUrl = false
var isUrlList = false
var isMulu = false
var isreverse = false
var isAllInOne= false
var isFind = false
private val findName = ArrayList<String>()
var replaceRegex = ""
var replacement = ""
var replaceFirst = false
val putMap = HashMap<String, String>()
private val ruleParam = ArrayList<String>()
private val ruleType = ArrayList<Int>()
private val getRuleType = -2
private val jsRuleType = -1
private val defaultRuleType = 0
@JvmOverloads
fun setContent(cont: String,type:String = ""): RuleAnalyzer {
queue = cont
when(type){
"mulu" -> {
if(queue[0] =='-'){ //目录反转
isreverse = true
startX++
pos++
}else if(queue[0] =='?'){ //AllInOne
isAllInOne = true
startX++
pos++
}
isMulu = true
}
"find" -> {
pos = queue.indexOf("::")
findName.add(queue.substring(startX,pos))
pos+=2
isFind = true
}
"url" -> {
isUrl = true
}
"urlList" -> {
isUrlList = true
}
else -> {
isJSON = queue.toString().isJson()
}
}
return this
}
companion object {
/**
* 发现名称-链接分隔字串
* "::"
*/
const val splitListFaXian = "::"
/**
* 转义字符
* 目录专有起始字符
* "-"
*/
private const val ESC = '\\'
const val splitListMulu = "-"
/**
* 结果为元素列表的 all in one 模式起始字符
* "+"
*/
const val splitListTongYi = "+"
val validKeys = arrayOf("class", "id", "tag", "text", "children")
/**
* 结果为元素列表的项的同规则组合结构
* "||","&&","%%"
*/
val splitListReSplit = arrayOf("||", "&&", "%%")
/**
* 参数字符串
* js脚本结束字串
* "</js>"
*/
private val STARTSTRURL = arrayOf(",{",)
const val splitListEndJS = "</js>"
private val regexPattern = Pattern.compile("\\$\\d{1,2}")
private val putPattern = Pattern.compile("@put:(\\{[^}]+?\\})", Pattern.CASE_INSENSITIVE)
private val getPattern = Pattern.compile("@get:\\{([^}]+?)\\}", Pattern.CASE_INSENSITIVE)
private val evalPattern = Pattern.compile("\\{\\{[\\w\\W]*?\\}\\}", Pattern.CASE_INSENSITIVE)
/**
*内嵌js结束字串
* "}}"
*/
const val splitListEndInnerJS = "}}"
val ENDSTR= mapOf(
"<js>" to "</js>",
"{{" to "}}",
)
/**
* 内嵌规则结束字串
* "]}"
*/
const val splitListEndInnerRule = "]}"
/**
* 规则起始字符串
* '[', ']', '(', ')','{','}'
*/
private val STARTSTR = arrayOf("@js:","<js>","</js>","##","@@","@"
,"{{@", "{{","}}"
,"}"
, "{@", "{/", "{$", "{?"
, "{class"
, "{id"
, "{tag"
, "{text"
, "{children"
,"/","$","@xpath:","@json:","@css:"
,"||", "&&", "%%"
,"@get:{","@put:{"
)
val splitListPublic = charArrayOf('[', ']', '(', ')', '{', '}')
/**
* '*',"/","//",":","::","@","|","@xpath:"

@ -415,22 +415,30 @@ object ReadBook {
else -> chapter.title
}
val contents = contentProcessor!!.getContent(book, chapter.title, content)
val textChapter = ChapterProvider.getTextChapter(book, chapter,contents,chapterSize)
val offset = chapter.index - durChapterIndex
if (upContent) callBack?.upContent(offset ,resetPageOffset)
when (offset) {
0 -> {
curTextChapter = textChapter
when (chapter.index) {
durChapterIndex -> {
curTextChapter =
ChapterProvider.getTextChapter(
book, chapter, contents, chapterSize
)
if (upContent) callBack?.upContent(resetPageOffset = resetPageOffset)
callBack?.upView()
curPageChanged()
callBack?.contentLoadFinish()
}
- 1 -> {
prevTextChapter = textChapter
durChapterIndex - 1 -> {
prevTextChapter =
ChapterProvider.getTextChapter(
book, chapter, contents, chapterSize
)
if (upContent) callBack?.upContent(-1, resetPageOffset)
}
1 -> {
nextTextChapter = textChapter
durChapterIndex + 1 -> {
nextTextChapter =
ChapterProvider.getTextChapter(
book, chapter, contents, chapterSize
)
if (upContent) callBack?.upContent(1, resetPageOffset)
}
}
}

@ -115,7 +115,15 @@ object ChapterProvider {
content.replace(AppPattern.imgPattern.toRegex(), "\n\$0\n")
.split("\n").forEach { text ->
if (text.isNotBlank()) {
if (!text.startsWith("<img src=\"")) { //非图片
val matcher = AppPattern.imgPattern.matcher(text)
if (matcher.find()) {
matcher.group(1)?.let { src ->
durY = setTypeImage(
book, bookChapter, src,
durY, textPages, book.getImageStyle()
)
}
} else {
val isTitle = index == 0
val textPaint = if (isTitle) titlePaint else contentPaint
if (!(isTitle && ReadBookConfig.titleMode == 2)) {
@ -124,11 +132,6 @@ object ChapterProvider {
stringBuilder, isTitle, textPaint
)
}
} else { //图片
durY = setTypeImage(
book, bookChapter, text.substring(10, text.length-2),
durY, textPages, book.getImageStyle()
)
}
}
}
@ -147,10 +150,7 @@ object ChapterProvider {
return TextChapter(
bookChapter.index, bookChapter.title,
bookChapter.getAbsoluteURL().run{
val pos = indexOf(',')
if(pos == -1) this else substring(0,pos)
},
bookChapter.getAbsoluteURL().split(AnalyzeUrl.splitUrlRegex)[0],
textPages, chapterSize
)
}

@ -1,7 +1,8 @@
package io.legado.app.utils
import io.legado.app.constant.AppPattern
import io.legado.app.model.analyzeRule.AnalyzeUrl
import java.net.URL
import java.util.regex.Pattern
object HtmlFormatter {
private val wrapHtmlRegex = "</?(?:div|p|br|hr|h\\d|article|dd|dl)[^>]*>".toRegex()
@ -13,7 +14,7 @@ object HtmlFormatter {
return html.replace(wrapHtmlRegex, "\n")
.replace(otherRegex, "")
.replace("\\s*\\n+\\s*".toRegex(), "\n  ")
.replace("^[\\n\\s]*".toRegex(), "  ")
.replace("^[\\n\\s]+".toRegex(), "  ")
.replace("[\\n\\s]+$".toRegex(), "")
}
@ -21,64 +22,22 @@ object HtmlFormatter {
fun formatKeepImg(html: String?, redirectUrl: URL?): String {
html ?: return ""
val keepImgHtml = html.replace(wrapHtmlRegex, "\n")
.replace(notImgHtmlRegex, "")
.replace("[\\n\\s]+\$|^[\\n\\s]*".toRegex(), "")
.replace("\\s*\\n+\\s*".toRegex(), "\n")
val sb = StringBuffer("  ") //前置缩减
val hasDataType:Boolean //是否有数据属性
//图片有data-开头的数据属性时优先用数据属性作为src,没有数据属性时匹配src
val imgPattern = Pattern.compile(
if(keepImgHtml.matches("<img[^>]*data-".toRegex())) {
hasDataType = true
"<img[^>]*data-[^=]*= *\"([^\"])\"[^>]*>"
}
else {
hasDataType = false
"<img[^>]*src *= *\"([^\"{]+(?:\\{(?:[^{}]|\\{[^{}]*\\})*\\})?)\"[^>]*>"
}, Pattern.CASE_INSENSITIVE
)
val matcher = imgPattern.matcher(keepImgHtml)
val keepImgHtml = formatKeepImg(html)
val sb = StringBuffer()
val matcher = AppPattern.imgPattern.matcher(keepImgHtml)
var appendPos = 0
if(matcher.find()){
if(hasDataType || matcher.group(1)!!.indexOf(',') == -1) { //图片无参
do{
sb.append(keepImgHtml.substring(appendPos, matcher.start()).replace("\n","\n  ")) //非图片部分换行缩减
sb.append( "<img src=\"${
NetworkUtils.getAbsoluteURL(redirectUrl,matcher.group(1)!!)
}\">" )
appendPos = matcher.end()
}while (matcher.find())
}else{ //图片有参
do{
val url = matcher.group(1)!!
val pos = url.indexOf(',')
sb.append(keepImgHtml.substring(appendPos, matcher.start()).replace("\n","\n  ")) //非图片部分换行缩减
sb.append(
"<img src=\"${
NetworkUtils.getAbsoluteURL(
redirectUrl,
url.substring(0, pos)
)
},${
url.substring(pos)
}\">"
)
appendPos = matcher.end()
}while(matcher.find())
while (matcher.find()) {
val urlArray = matcher.group(1)!!.split(AnalyzeUrl.splitUrlRegex)
var url = NetworkUtils.getAbsoluteURL(redirectUrl, urlArray[0])
if (urlArray.size > 1) {
url = "$url,${urlArray[1]}"
}
sb.append(keepImgHtml.substring(appendPos, matcher.start()))
sb.append("<img src=\"$url\" >")
appendPos = matcher.end()
}
if (appendPos < keepImgHtml.length) {
sb.append(keepImgHtml.substring(appendPos, keepImgHtml.length).replace("\n","\n  ")) //非图片部分换行缩减
sb.append(keepImgHtml.substring(appendPos, keepImgHtml.length))
}
return sb.toString()
}

Loading…
Cancel
Save