diff --git a/app/src/main/java/io/legado/app/constant/AppPattern.kt b/app/src/main/java/io/legado/app/constant/AppPattern.kt
index b664fee7c..450347735 100644
--- a/app/src/main/java/io/legado/app/constant/AppPattern.kt
+++ b/app/src/main/java/io/legado/app/constant/AppPattern.kt
@@ -5,14 +5,14 @@ import java.util.regex.Pattern
@Suppress("RegExpRedundantEscape")
object AppPattern {
val JS_PATTERN: Pattern =
- Pattern.compile("([\\w\\W]+?)|@js:([\\w\\W]*)", Pattern.CASE_INSENSITIVE)
+ Pattern.compile("([\\w\\W]*?|@js:[\\w\\W]*$)", Pattern.CASE_INSENSITIVE)
val EXP_PATTERN: Pattern = Pattern.compile("\\{\\{([\\w\\W]*?)\\}\\}")
-
- //匹配格式化后的图片格式
- val imgPattern: Pattern = Pattern.compile("]+)\">")
+ //图片有data-开头的数据属性时优先用数据属性作为src,没有数据属性时才匹配src
+ val imgPattern: Pattern =
+ Pattern.compile("]*data-)[^>]*src|[^>]*data-)[^=]*= *\"([^\"{]+(?:\\{(?:[^{}]|\\{[^{}]*\\})*\\})?)\"[^>]*>", Pattern.CASE_INSENSITIVE)
val nameRegex = Regex("\\s+作\\s*者.*|\\s+\\S+\\s+著")
val authorRegex = Regex("^.*?作\\s*者[::\\s]*|\\s+著")
val fileNameRegex = Regex("[\\\\/:*?\"<>|.]")
val splitGroupRegex = Regex("[,;,;]")
-}
\ No newline at end of file
+}
diff --git a/app/src/main/java/io/legado/app/data/entities/BookChapter.kt b/app/src/main/java/io/legado/app/data/entities/BookChapter.kt
index 67bba2e5d..bdf56417c 100644
--- a/app/src/main/java/io/legado/app/data/entities/BookChapter.kt
+++ b/app/src/main/java/io/legado/app/data/entities/BookChapter.kt
@@ -5,6 +5,7 @@ import androidx.room.Entity
import androidx.room.ForeignKey
import androidx.room.Ignore
import androidx.room.Index
+import io.legado.app.model.analyzeRule.AnalyzeUrl
import io.legado.app.utils.GSON
import io.legado.app.utils.MD5Utils
import io.legado.app.utils.NetworkUtils
@@ -12,32 +13,33 @@ import io.legado.app.utils.fromJsonObject
import kotlinx.parcelize.IgnoredOnParcel
import kotlinx.parcelize.Parcelize
+
@Parcelize
@Entity(
- tableName = "chapters",
- primaryKeys = ["url", "bookUrl"],
- indices = [(Index(value = ["bookUrl"], unique = false)),
- (Index(value = ["bookUrl", "index"], unique = true))],
- foreignKeys = [(ForeignKey(
- entity = Book::class,
- parentColumns = ["bookUrl"],
- childColumns = ["bookUrl"],
- onDelete = ForeignKey.CASCADE
- ))]
+ tableName = "chapters",
+ primaryKeys = ["url", "bookUrl"],
+ indices = [(Index(value = ["bookUrl"], unique = false)),
+ (Index(value = ["bookUrl", "index"], unique = true))],
+ foreignKeys = [(ForeignKey(
+ entity = Book::class,
+ parentColumns = ["bookUrl"],
+ childColumns = ["bookUrl"],
+ onDelete = ForeignKey.CASCADE
+ ))]
) // 删除书籍时自动删除章节
data class BookChapter(
- var url: String = "", // 章节地址
- var title: String = "", // 章节标题
- var baseUrl: String = "", //用来拼接相对url
- var bookUrl: String = "", // 书籍地址
- var index: Int = 0, // 章节序号
- var resourceUrl: String? = null, // 音频真实URL
- var tag: String? = null, //
- var start: Long? = null, // 章节起始位置
- var end: Long? = null, // 章节终止位置
- var startFragmentId: String? = null, //EPUB书籍当前章节的fragmentId
- var endFragmentId: String? = null, //EPUB书籍下一章节的fragmentId
- var variable: String? = null //变量
+ var url: String = "", // 章节地址
+ var title: String = "", // 章节标题
+ var baseUrl: String = "", //用来拼接相对url
+ var bookUrl: String = "", // 书籍地址
+ var index: Int = 0, // 章节序号
+ var resourceUrl: String? = null, // 音频真实URL
+ var tag: String? = null, //
+ var start: Long? = null, // 章节起始位置
+ var end: Long? = null, // 章节终止位置
+ var startFragmentId: String? = null, //EPUB书籍当前章节的fragmentId
+ var endFragmentId: String? = null, //EPUB书籍下一章节的fragmentId
+ var variable: String? = null //变量
) : Parcelable {
@delegate:Transient
@@ -61,13 +63,13 @@ data class BookChapter(
return false
}
- fun getAbsoluteURL():String{
- val pos = url.indexOf(',')
- return if(pos == -1) NetworkUtils.getAbsoluteURL(baseUrl,url)
- else NetworkUtils.getAbsoluteURL(
- baseUrl,
- url.substring(0, pos)
- ) + url.substring(pos)
+ fun getAbsoluteURL(): String {
+ val urlArray = url.split(AnalyzeUrl.splitUrlRegex)
+ var absoluteUrl = NetworkUtils.getAbsoluteURL(baseUrl, urlArray[0])
+ if (urlArray.size > 1) {
+ absoluteUrl = "$absoluteUrl,${urlArray[1]}"
+ }
+ return absoluteUrl
}
fun getFileName(): String = String.format("%05d-%s.nb", index, MD5Utils.md5Encode16(title))
diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt
index 3efbd2f02..57b3d25a1 100644
--- a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt
+++ b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt
@@ -25,9 +25,7 @@ import kotlin.collections.HashMap
@Keep
@Suppress("unused", "RegExpRedundantEscape")
class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
-
- var book: BaseBook? = if (ruleData is BaseBook) ruleData else null
-
+ var book: BaseBook? = null
var chapter: BookChapter? = null
var nextChapterUrl: String? = null
var content: Any? = null
@@ -44,11 +42,18 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
private var objectChangedJS = false
private var objectChangedJP = false
+ init {
+ if (ruleData is BaseBook) {
+ book = ruleData
+ }
+ }
+
@JvmOverloads
fun setContent(content: Any?, baseUrl: String? = null): AnalyzeRule {
- if (content == null) throw AssertionError("内容不可空(Content cannot be null)")
+ if (content == null) throw AssertionError("Content cannot be null")
this.content = content
setBaseUrl(baseUrl)
+ isJSON = content.toString().isJson()
objectChangedXP = true
objectChangedJS = true
objectChangedJP = true
@@ -64,8 +69,7 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
fun setRedirectUrl(url: String): URL? {
kotlin.runCatching {
- val pos = url.indexOf(',')
- redirectUrl = URL( if(pos == -1) url else url.substring(0,pos))
+ redirectUrl = URL(url.split(AnalyzeUrl.splitUrlRegex, 1)[0])
}
return redirectUrl
}
@@ -192,9 +196,9 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
@JvmOverloads
fun getString(
- ruleList: List,
- isUrl: Boolean = false,
- value: String? = null
+ ruleList: List,
+ isUrl: Boolean = false,
+ value: String? = null
): String {
var result: Any? = value
val content = this.content
@@ -258,8 +262,8 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
result?.let {
result = when (sourceRule.mode) {
Mode.Regex -> AnalyzeByRegex.getElement(
- result.toString(),
- sourceRule.rule.splitNotBlank("&&")
+ result.toString(),
+ sourceRule.rule.splitNotBlank("&&")
)
Mode.Js -> evalJS(sourceRule.rule, it)
Mode.Json -> getAnalyzeByJSonPath(it).getObject(sourceRule.rule)
@@ -289,8 +293,8 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
result?.let {
result = when (sourceRule.mode) {
Mode.Regex -> AnalyzeByRegex.getElements(
- result.toString(),
- sourceRule.rule.splitNotBlank("&&")
+ result.toString(),
+ sourceRule.rule.splitNotBlank("&&")
)
Mode.Js -> evalJS(sourceRule.rule, result)
Mode.Json -> getAnalyzeByJSonPath(it).getList(sourceRule.rule)
@@ -356,72 +360,58 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
/**
* 分解规则生成规则列表
*/
- fun splitSourceRule(ruleStr: String?): List {
- if (ruleStr.isNullOrEmpty()) return ArrayList()
+ fun splitSourceRule(ruleStr: String?, mode: Mode = Mode.Default): List {
+ var vRuleStr = ruleStr
val ruleList = ArrayList()
+ if (vRuleStr.isNullOrEmpty()) return ruleList
//检测Mode
- var mMode: Mode = Mode.Default
- fun mode(ruleStr0:String)=when {
- ruleStr0.startsWith("@@") -> {
- mMode = Mode.Default
- ruleStr0.substring(2)
+ var mMode: Mode = mode
+ when {
+ vRuleStr.startsWith("@@") -> {
+ vRuleStr = vRuleStr.substring(2)
}
- ruleStr0.startsWith("@XPath:", true) -> {
+ vRuleStr.startsWith("@XPath:", true) -> {
mMode = Mode.XPath
- ruleStr0.substring(7)
+ vRuleStr = vRuleStr.substring(7)
}
- ruleStr0.startsWith("/") -> {//XPath特征很明显,无需配置单独的识别标头
- mMode = Mode.XPath
- ruleStr0
- }
- ruleStr0.startsWith("@Json:", true) -> {
+ vRuleStr.startsWith("@Json:", true) -> {
mMode = Mode.Json
- ruleStr0.substring(6)
+ vRuleStr = vRuleStr.substring(6)
}
- ruleStr0.startsWith(":") -> { //:与伪类选择器冲突,改成?更合理
+ vRuleStr.startsWith(":") -> {
mMode = Mode.Regex
isRegex = true
- ruleStr0.substring(1)
- }
- ( ruleStr0[1] == '.' || ruleStr0[1] == '[') && ruleStr0[0] == '$' || content.toString().isJson() -> {
- mMode = Mode.Json
- ruleStr0
- }
- else -> {
- mMode = Mode.Default
- ruleStr0
+ vRuleStr = vRuleStr.substring(1)
}
+ isRegex -> mMode = Mode.Regex
+ isJSON -> mMode = Mode.Json
}
//拆分为规则列表
var start = 0
var tmp: String
- val jsMatcher = JS_PATTERN.matcher(ruleStr)
-
- while (jsMatcher.find()){
+ val jsMatcher = JS_PATTERN.matcher(vRuleStr)
+ while (jsMatcher.find()) {
if (jsMatcher.start() > start) {
- tmp = ruleStr.substring(start, jsMatcher.start()).trim { it <= ' ' }
- if (tmp.isNotEmpty()) {
- ruleList.add(SourceRule(mode(tmp), mMode))
+ tmp = vRuleStr.substring(start, jsMatcher.start()).trim { it <= ' ' }
+ if (!TextUtils.isEmpty(tmp)) {
+ ruleList.add(SourceRule(tmp, mMode))
}
}
- ruleList.add(SourceRule(jsMatcher.group(2)?:jsMatcher.group(1), Mode.Js))
+ ruleList.add(SourceRule(jsMatcher.group(), Mode.Js))
start = jsMatcher.end()
}
-
- if (ruleStr.length > start){
- tmp = ruleStr.substring(start).trim { it <= ' ' }
- if (tmp.isNotEmpty()) {
- ruleList.add(SourceRule(mode(tmp), mMode))
+ if (vRuleStr.length > start) {
+ tmp = vRuleStr.substring(start).trim { it <= ' ' }
+ if (!TextUtils.isEmpty(tmp)) {
+ ruleList.add(SourceRule(tmp, mMode))
}
}
-
return ruleList
}
/**
* 规则类
*/
-
inner class SourceRule internal constructor(ruleStr: String, mainMode: Mode = Mode.Default) {
internal var mode: Mode
internal var rule: String
@@ -437,24 +427,62 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
init {
this.mode = mainMode
+ if (mode == Mode.Js) {
+ rule = if (ruleStr.startsWith("")) {
+ ruleStr.substring(4, ruleStr.lastIndexOf("<"))
+ } else {
+ ruleStr.substring(4)
+ }
+ } else {
+ when {
+ ruleStr.startsWith("@CSS:", true) -> {
+ mode = Mode.Default
+ rule = ruleStr
+ }
+ ruleStr.startsWith("@@") -> {
+ mode = Mode.Default
+ rule = ruleStr.substring(2)
+ }
+ ruleStr.startsWith("@XPath:", true) -> {
+ mode = Mode.XPath
+ rule = ruleStr.substring(7)
+ }
+ ruleStr.startsWith("//") -> {//XPath特征很明显,无需配置单独的识别标头
+ mode = Mode.XPath
+ rule = ruleStr
+ }
+ ruleStr.startsWith("@Json:", true) -> {
+ mode = Mode.Json
+ rule = ruleStr.substring(6)
+ }
+ ruleStr.startsWith("$.") -> {
+ mode = Mode.Json
+ rule = ruleStr
+ }
+ else -> rule = ruleStr
+ }
+ }
//分离put
- rule = splitPutRule(ruleStr, putMap)
+ rule = splitPutRule(rule, putMap)
//@get,{{ }}, 拆分
var start = 0
var tmp: String
val evalMatcher = evalPattern.matcher(rule)
-
- if(evalMatcher.find()){
-
- var modeX = mode == Mode.Js || mode == Mode.Regex
- if (evalMatcher.start() > 0 ) {
- tmp = rule.substring(0, evalMatcher.start())
- modeX = modeX || tmp.contains("##")
+ while (evalMatcher.find()) {
+ if (evalMatcher.start() > start) {
+ tmp = rule.substring(start, evalMatcher.start())
+ if (mode != Mode.Js && mode != Mode.Regex
+ && start == 0 && !tmp.contains("##")
+ ) {
+ mode = Mode.Regex
+ }
splitRegex(tmp)
+ } else if (mode != Mode.Js && mode != Mode.Regex
+ && evalMatcher.start() == 0
+ ) {
+ mode = Mode.Regex
}
- if(!modeX)mode = Mode.Regex
tmp = evalMatcher.group()
-
when {
tmp.startsWith("@get:", true) -> {
ruleType.add(getRuleType)
@@ -468,32 +496,8 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
splitRegex(tmp)
}
}
-
start = evalMatcher.end()
-
- while (evalMatcher.find()){
- if (evalMatcher.start() > start) {
- tmp = rule.substring(start, evalMatcher.start())
- splitRegex(tmp)
- }
- tmp = evalMatcher.group()
- when {
- tmp.startsWith("@get:", true) -> {
- ruleType.add(getRuleType)
- ruleParam.add(tmp.substring(6, tmp.lastIndex))
- }
- tmp.startsWith("{{") -> {
- ruleType.add(jsRuleType)
- ruleParam.add(tmp.substring(2, tmp.length - 2))
- }
- else -> {
- splitRegex(tmp)
- }
- }
- start = evalMatcher.end()
- }
}
-
if (rule.length > start) {
tmp = rule.substring(start)
splitRegex(tmp)
@@ -508,22 +512,19 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
var tmp: String
val ruleStrArray = ruleStr.split("##")
val regexMatcher = regexPattern.matcher(ruleStrArray[0])
-
- if(regexMatcher.find()) {
+ while (regexMatcher.find()) {
if (mode != Mode.Js && mode != Mode.Regex) {
mode = Mode.Regex
}
- do{
- if (regexMatcher.start() > start) {
- tmp = ruleStr.substring(start, regexMatcher.start())
- ruleType.add(defaultRuleType)
- ruleParam.add(tmp)
- }
- tmp = regexMatcher.group()
- ruleType.add(tmp.substring(1).toInt())
+ if (regexMatcher.start() > start) {
+ tmp = ruleStr.substring(start, regexMatcher.start())
+ ruleType.add(defaultRuleType)
ruleParam.add(tmp)
- start = regexMatcher.end()
- }while (regexMatcher.find())
+ }
+ tmp = regexMatcher.group()
+ ruleType.add(tmp.substring(1).toInt())
+ ruleParam.add(tmp)
+ start = regexMatcher.end()
}
if (ruleStr.length > start) {
tmp = ruleStr.substring(start)
@@ -566,8 +567,8 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
jsEval == null -> Unit
jsEval is String -> infoVal.insert(0, jsEval)
jsEval is Double && jsEval % 1.0 == 0.0 -> infoVal.insert(
- 0,
- String.format("%.0f", jsEval)
+ 0,
+ String.format("%.0f", jsEval)
)
else -> infoVal.insert(0, jsEval.toString())
}
@@ -614,8 +615,8 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
fun put(key: String, value: String): String {
chapter?.putVariable(key, value)
- ?: book?.putVariable(key, value)
- ?: ruleData.putVariable(key, value)
+ ?: book?.putVariable(key, value)
+ ?: ruleData.putVariable(key, value)
return value
}
@@ -629,9 +630,9 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
}
}
return chapter?.variableMap?.get(key)
- ?: book?.variableMap?.get(key)
- ?: ruleData.variableMap[key]
- ?: ""
+ ?: book?.variableMap?.get(key)
+ ?: ruleData.variableMap[key]
+ ?: ""
}
/**
@@ -682,8 +683,9 @@ class AnalyzeRule(val ruleData: RuleDataInterface) : JsExtensions {
companion object {
private val putPattern = Pattern.compile("@put:(\\{[^}]+?\\})", Pattern.CASE_INSENSITIVE)
+ private val getPattern = Pattern.compile("@get:\\{([^}]+?)\\}", Pattern.CASE_INSENSITIVE)
private val evalPattern =
- Pattern.compile("@get:\\{[^}]+?\\}|\\{\\{[\\w\\W]*?\\}\\}", Pattern.CASE_INSENSITIVE)
+ Pattern.compile("@get:\\{[^}]+?\\}|\\{\\{[\\w\\W]*?\\}\\}", Pattern.CASE_INSENSITIVE)
private val regexPattern = Pattern.compile("\\$\\d{1,2}")
private val titleNumPattern = Pattern.compile("(第)(.+?)(章)")
}
diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt
index 919f970e9..73e90e782 100644
--- a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt
+++ b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt
@@ -1,11 +1,13 @@
package io.legado.app.model.analyzeRule
import android.annotation.SuppressLint
+import android.text.TextUtils
import androidx.annotation.Keep
import com.bumptech.glide.load.model.GlideUrl
import com.bumptech.glide.load.model.LazyHeaders
import io.legado.app.constant.AppConst.SCRIPT_ENGINE
import io.legado.app.constant.AppConst.UA_NAME
+import io.legado.app.constant.AppPattern.EXP_PATTERN
import io.legado.app.constant.AppPattern.JS_PATTERN
import io.legado.app.data.entities.BaseBook
import io.legado.app.data.entities.BookChapter
@@ -39,6 +41,7 @@ class AnalyzeUrl(
headerMapF: Map? = null
) : JsExtensions {
companion object {
+ val splitUrlRegex = Regex(",\\s*(?=\\{)")
private val pagePattern = Pattern.compile("<(.*?)>")
}
@@ -55,8 +58,7 @@ class AnalyzeUrl(
private var retry: Int = 0
init {
- val pos = baseUrl.indexOf(',')
- if(pos != -1)baseUrl = baseUrl.substring(0,pos)
+ baseUrl = baseUrl.split(splitUrlRegex, 1)[0]
headerMapF?.let {
headerMap.putAll(it)
if (it.containsKey("proxy")) {
@@ -72,24 +74,39 @@ class AnalyzeUrl(
}
private fun analyzeJs() {
+ val ruleList = arrayListOf()
var start = 0
var tmp: String
val jsMatcher = JS_PATTERN.matcher(ruleUrl)
while (jsMatcher.find()) {
if (jsMatcher.start() > start) {
tmp =
- ruleUrl.substring(start, jsMatcher.start()).trim { it <= ' ' }
- if (tmp.isNotEmpty()) {
- ruleUrl = tmp.replace("@result", ruleUrl)
+ ruleUrl.substring(start, jsMatcher.start()).replace("\n", "").trim { it <= ' ' }
+ if (!TextUtils.isEmpty(tmp)) {
+ ruleList.add(tmp)
}
}
- ruleUrl = evalJS(jsMatcher.group(2)?:jsMatcher.group(1), ruleUrl) as String
+ ruleList.add(jsMatcher.group())
start = jsMatcher.end()
}
if (ruleUrl.length > start) {
- tmp = ruleUrl.substring(start).trim { it <= ' ' }
- if (tmp.isNotEmpty()) {
- ruleUrl = tmp.replace("@result", ruleUrl)
+ tmp = ruleUrl.substring(start).replace("\n", "").trim { it <= ' ' }
+ if (!TextUtils.isEmpty(tmp)) {
+ ruleList.add(tmp)
+ }
+ }
+ for (rule in ruleList) {
+ var ruleStr = rule
+ when {
+ ruleStr.startsWith("") -> {
+ ruleStr = ruleStr.substring(4, ruleStr.lastIndexOf("<"))
+ ruleUrl = evalJS(ruleStr, ruleUrl) as String
+ }
+ ruleStr.startsWith("@js", true) -> {
+ ruleStr = ruleStr.substring(4)
+ ruleUrl = evalJS(ruleStr, ruleUrl) as String
+ }
+ else -> ruleUrl = ruleStr.replace("@result", ruleUrl)
}
}
}
@@ -97,12 +114,23 @@ class AnalyzeUrl(
/**
* 替换关键字,页数,JS
*/
- private fun replaceKeyPageJs() { //先替换内嵌规则再替换页数规则,避免内嵌规则中存在大于小于号时,规则被切错
+ private fun replaceKeyPageJs() {
+ //page
+ page?.let {
+ val matcher = pagePattern.matcher(ruleUrl)
+ while (matcher.find()) {
+ val pages = matcher.group(1)!!.split(",")
+ ruleUrl = if (page <= pages.size) {
+ ruleUrl.replace(matcher.group(), pages[page - 1].trim { it <= ' ' })
+ } else {
+ ruleUrl.replace(matcher.group(), pages.last().trim { it <= ' ' })
+ }
+ }
+ }
//js
if (ruleUrl.contains("{{") && ruleUrl.contains("}}")) {
-
- val analyze = RuleAnalyzer(ruleUrl) //创建解析
-
+ var jsEval: Any
+ val sb = StringBuffer()
val bindings = SimpleBindings()
bindings["java"] = this
bindings["cookie"] = CookieStore
@@ -113,28 +141,21 @@ class AnalyzeUrl(
bindings["speakText"] = speakText
bindings["speakSpeed"] = speakSpeed
bindings["book"] = book
-
- //替换所有内嵌{{js}}
- val url = analyze.innerRule("{{","}}"){
- when(val jsEval = SCRIPT_ENGINE.eval(it, bindings)){
- is String -> jsEval
- jsEval is Double && jsEval % 1.0 == 0.0 -> String.format("%.0f", jsEval)
- else -> jsEval.toString()
- }
- }
- if(url.isNotEmpty())ruleUrl = url
- }
- //page
- page?.let {
- val matcher = pagePattern.matcher(ruleUrl)
- while (matcher.find()) {
- val pages = matcher.group(1)!!.split(",")
- ruleUrl = if (page < pages.size) { //pages[pages.size - 1]等同于pages.last()
- ruleUrl.replace(matcher.group(), pages[page - 1].trim { it <= ' ' })
+ val expMatcher = EXP_PATTERN.matcher(ruleUrl)
+ while (expMatcher.find()) {
+ jsEval = expMatcher.group(1)?.let {
+ SCRIPT_ENGINE.eval(it, bindings)
+ } ?: ""
+ if (jsEval is String) {
+ expMatcher.appendReplacement(sb, jsEval)
+ } else if (jsEval is Double && jsEval % 1.0 == 0.0) {
+ expMatcher.appendReplacement(sb, String.format("%.0f", jsEval))
} else {
- ruleUrl.replace(matcher.group(), pages.last().trim { it <= ' ' })
+ expMatcher.appendReplacement(sb, jsEval.toString())
}
}
+ expMatcher.appendTail(sb)
+ ruleUrl = sb.toString()
}
}
@@ -142,20 +163,15 @@ class AnalyzeUrl(
* 处理URL
*/
private fun initUrl() {
-
- var pos = ruleUrl.indexOf(',')
-
- urlHasQuery = if(pos == -1) ruleUrl else ruleUrl.substring(0,pos)
-
- url = NetworkUtils.getAbsoluteURL(baseUrl,urlHasQuery )
-
+ var urlArray = ruleUrl.split(splitUrlRegex, 2)
+ url = NetworkUtils.getAbsoluteURL(baseUrl, urlArray[0])
+ urlHasQuery = urlArray[0]
NetworkUtils.getBaseUrl(url)?.let {
baseUrl = it
}
-
- if(pos != -1 ) {
- GSON.fromJsonObject(ruleUrl.substring(pos + 1).trim{ it < '!'})?.let { option ->
-
+ if (urlArray.size > 1) {
+ val option = GSON.fromJsonObject(urlArray[1])
+ option?.let { _ ->
option.method?.let {
if (it.equals("POST", true)) method = RequestMethod.POST
}
@@ -185,17 +201,16 @@ class AnalyzeUrl(
retry = option.retry
}
}
-
headerMap[UA_NAME] ?: let {
headerMap[UA_NAME] = AppConfig.userAgent
}
when (method) {
RequestMethod.GET -> {
if (!useWebView) {
- pos = url.indexOf('?')
- if(pos != -1) {
- analyzeFields(url.substring(pos + 1))
- url = url.substring(0,pos)
+ urlArray = url.split("?")
+ url = urlArray[0]
+ if (urlArray.size > 1) {
+ analyzeFields(urlArray[1])
}
}
}
@@ -218,7 +233,7 @@ class AnalyzeUrl(
for (query in queryS) {
val queryM = query.splitNotBlank("=")
val value = if (queryM.size > 1) queryM[1] else ""
- if (charset.isNullOrEmpty()) {
+ if (TextUtils.isEmpty(charset)) {
if (NetworkUtils.hasUrlEncoded(value)) {
fieldMap[queryM[0]] = value
} else {
diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/RuleAnalyzer.kt b/app/src/main/java/io/legado/app/model/analyzeRule/RuleAnalyzer.kt
index 050625dfc..2b6462827 100644
--- a/app/src/main/java/io/legado/app/model/analyzeRule/RuleAnalyzer.kt
+++ b/app/src/main/java/io/legado/app/model/analyzeRule/RuleAnalyzer.kt
@@ -1,9 +1,5 @@
package io.legado.app.model.analyzeRule
-import io.legado.app.utils.isJson
-import java.util.ArrayList
-import java.util.regex.Pattern
-
//通用的规则切分处理
class RuleAnalyzer(data: String, code: Boolean = false) {
@@ -13,10 +9,12 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
private var start = 0 //当前处理字段的开始
private var startX = 0 //当前规则的开始
- private var rule = ArrayList() //分割出的规则列表
+ private var rule = arrayOf() //分割出的规则列表
private var step: Int = 0 //分割字符的长度
var elementsType = "" //当前分割字符串
- var innerType = true //是否为内嵌{{}}
+
+ //设置平衡组函数,json或JavaScript时设置成chompCodeBalanced,否则为chompRuleBalanced
+ val chompBalanced = if (code) ::chompCodeBalanced else ::chompRuleBalanced
fun trim() { // 修剪当前规则之前的"@"或者空白符
if(queue[pos] == '@' || queue[pos] < '!') { //在while里重复设置start和startX会拖慢执行速度,所以先来个判断是否存在需要修剪的字段,最后再一次性设置start和startX
@@ -34,8 +32,8 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
}
/**
- * 从剩余字串中拉出一个字符串,直到但不包括匹配序列
- * @param seq 查找的字符串 **区分大小写**
+ * 从剩余字串中拉出一个字符串,直到但不包括匹配序列,或剩余字串用完。
+ * @param seq 分隔字符 **区分大小写**
* @return 是否找到相应字段。
*/
fun consumeTo(seq: String): Boolean {
@@ -68,31 +66,6 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
pos++ //逐个试探
}
- return false
- }
- /**
- * 从剩余字串中拉出一个字符串,直到但不包括匹配序列(匹配参数列表中一项即为匹配),或剩余字串用完。
- * @param seq 匹配字符串序列
- * @return 成功返回true并设置间隔,失败则直接返回fasle
- */
- fun chompToAny(vararg seq: String): Boolean {
- var pos = pos //声明新变量记录匹配位置,不更改类本身的位置
-
- while (pos != queue.length) {
-
- for (s in seq) {
- if (queue.regionMatches(pos, s, 0, s.length)) {
- rule += queue.substring(this.pos, pos)
- pos += s.length //跳过分隔符
- ruleTypeList += s //追加类型到列表
- start = this.pos
- this.pos = pos //匹配成功, 同步处理位置到类
- return true //匹配就返回 true
- }
- }
-
- pos++ //逐个试探
- }
return false
}
@@ -117,6 +90,75 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
return -1
}
+ //其中js只要符合语法,就不用避开任何阅读关键字,自由发挥
+ fun chompJsBalanced(
+ f: ((Char) -> Boolean?) = {
+ when (it) {
+ '{' -> true //开始嵌套一层
+ '}' -> false //闭合一层嵌套
+ else -> null
+ }
+ }
+ ): Boolean {
+ var pos = pos //声明变量记录临时处理位置
+ var depth = 0 //嵌套深度
+ var bracketsDepth = 0 //[]嵌套深度
+
+ var inSingleQuote = false //单引号
+ var inDoubleQuote = false //双引号
+ var inOtherQuote = false //js原始字串分隔字符
+ var regex = false //正则
+ var commit = false //单行注释
+ var commits = false //多行注释
+
+ do {
+ if (pos == queue.length) break
+ var c = queue[pos++]
+ if (c != '\\') { //非转义字符
+ if (c == '\'' && !commits && !commit && !regex && !inDoubleQuote && !inOtherQuote) inSingleQuote =
+ !inSingleQuote //匹配具有语法功能的单引号
+ else if (c == '"' && !commits && !commit && !regex && !inSingleQuote && !inOtherQuote) inDoubleQuote =
+ !inDoubleQuote //匹配具有语法功能的双引号
+ else if (c == '`' && !commits && !commit && !regex && !inSingleQuote && !inDoubleQuote) inOtherQuote =
+ !inOtherQuote //匹配具有语法功能的'`'
+ else if (c == '/' && !commits && !commit && !regex && !inSingleQuote && !inDoubleQuote && !inOtherQuote) { //匹配注释或正则起点
+ c = queue[pos++]
+ when (c) {
+ '/' -> commit = true //匹配单行注释起点
+ '*' -> commits = true //匹配多行注释起点
+ else -> regex = true //匹配正则起点
+ }
+ } else if (commits && c == '*') { //匹配多行注释终点
+ c = queue[pos++]
+ if (c == '/') commits = false
+ } else if (regex && c == '/') { //正则的终点或[]平衡
+
+ when (c) {
+ '/' -> regex = false//匹配正则终点
+
+ //为了保证当open为( 且 close 为 )时,正则中[(]或[)]的合法性。故对[]这对在任何规则中都平衡的成对符号做匹配。
+ // 注:正则里[(]、[)]、[{]、[}]都是合法的,所以只有[]必须平衡。
+ '[' -> bracketsDepth++ //开始嵌套一层[]
+ ']' -> bracketsDepth-- //闭合一层嵌套[]
+ }
+
+ } else if (c == '\n') commit = false
+
+ if (commits || commit || regex || inSingleQuote || inDoubleQuote || inOtherQuote) continue //语法单元未匹配结束,直接进入下个循环
+
+ val fn = f(c) ?: continue
+ if (fn) depth++ else depth-- //嵌套或者闭合
+
+ } else pos++
+
+ } while (depth > 0 || bracketsDepth > 0) //拉出全部符合js语法的字段
+
+ return if (depth > 0 || bracketsDepth > 0) false else {
+ this.pos = pos //同步位置
+ true
+ }
+ }
+
/**
* 拉出一个非内嵌代码平衡组,存在转义文本
*/
@@ -194,7 +236,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
* 不用正则,不到最后不切片也不用中间变量存储,只在序列中标记当前查找字段的开头结尾,到返回时才切片,高效快速准确切割规则
* 解决jsonPath自带的"&&"和"||"与阅读的规则冲突,以及规则正则或字符串中包含"&&"、"||"、"%%"、"@"导致的冲突
*/
- tailrec fun splitRule(vararg split: String): ArrayList { //首段匹配,elementsType为空
+ tailrec fun splitRule(vararg split: String): Array { //首段匹配,elementsType为空
if (split.size == 1) {
elementsType = split[0] //设置分割字串
@@ -218,7 +260,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
if (st == -1) {
- rule = arrayListOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
+ rule = arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
elementsType = queue.substring(end, end + step) //设置组合类型
pos = end + step //跳过分隔符
@@ -235,7 +277,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组
- rule = arrayListOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
+ rule = arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
elementsType = queue.substring(end, end + step) //设置组合类型
pos = end + step //跳过分隔符
@@ -269,7 +311,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
}
@JvmName("splitRuleNext")
- private tailrec fun splitRule(): ArrayList { //二段匹配被调用,elementsType非空(已在首段赋值),直接按elementsType查找,比首段采用的方式更快
+ private tailrec fun splitRule(): Array { //二段匹配被调用,elementsType非空(已在首段赋值),直接按elementsType查找,比首段采用的方式更快
val end = pos //记录分隔位置
pos = start //重回开始,启动另一种查找
@@ -294,7 +336,7 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组
- rule += arrayListOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
+ rule += arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
pos = end + step //跳过分隔符
while (consumeTo(elementsType) && pos < st) { //循环切分规则压入数组
@@ -331,9 +373,9 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
/**
* 替换内嵌规则
- * @param inner 起始标志,如{$.
+ * @param inner 起始标志,如{$. 或 {{
* @param startStep 不属于规则部分的前置字符长度,如{$.中{不属于规则的组成部分,故startStep为1
- * @param endStep 不属于规则部分的后置字符长度
+ * @param endStep 不属于规则部分的后置字符长度,如}}长度为2
* @param fr 查找到内嵌规则时,用于解析的函数
*
* */
@@ -343,13 +385,14 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
endStep: Int = 1,
fr: (String) -> String?
): String {
+
val st = StringBuilder()
while (consumeTo(inner)) { //拉取成功返回true,ruleAnalyzes里的字符序列索引变量pos后移相应位置,否则返回false,且isEmpty为true
val posPre = pos //记录consumeTo匹配位置
if (chompCodeBalanced('{', '}')) {
val frv = fr(queue.substring(posPre + startStep, pos - endStep))
- if (!frv.isNullOrEmpty()) {
+ if (frv != null) {
st.append(queue.substring(startX, posPre) + frv) //压入内嵌规则前的内容,及内嵌规则解析得到的字符串
startX = pos //记录下次规则起点
continue //获取内容成功,继续选择下个内嵌规则
@@ -363,206 +406,64 @@ class RuleAnalyzer(data: String, code: Boolean = false) {
}.toString()
}
- /**
- * 替换内嵌规则
- * @param fr 查找到内嵌规则时,用于解析的函数
- *
- * */
- fun innerRule(
- startStr:String,
- endStr:String,
- fr: (String) -> String?
- ): String {
-
- val st = StringBuilder()
- while (consumeTo(startStr)) { //拉取成功返回true,ruleAnalyzes里的字符序列索引变量pos后移相应位置,否则返回false,且isEmpty为true
- pos += startStr.length //跳过开始字符串
- val posPre = pos //记录consumeTo匹配位置
- if (consumeTo(endStr)) {
- val frv = fr(queue.substring(posPre, pos))
- st.append(queue.substring(startX, posPre - startStr.length) + frv) //压入内嵌规则前的内容,及内嵌规则解析得到的字符串
- pos += endStr.length //跳过结束字符串
- startX = pos //记录下次规则起点
- }
- }
-
- return if(startX == 0) queue else st.apply {
- append(queue.substring(startX))
- }.toString()
- }
-
- val ruleTypeList = ArrayList()
- //设置平衡组函数,json或JavaScript时设置成chompCodeBalanced,否则为chompRuleBalanced
- val chompBalanced = if (code) ::chompCodeBalanced else ::chompRuleBalanced
- enum class Mode {
- XPath, Json, Default, Js, Regex
- }
- /**
- * 不用正则,不到最后不切片也不用中间变量存储,只在序列中标记当前查找字段的开头结尾,到返回时才切片,高效快速准确切割规则
- * 解决jsonPath自带的"&&"和"||"与阅读的规则冲突,以及规则正则或字符串中包含"&&"、"||"、"%%"、"@"导致的冲突
- */
- tailrec fun splitAnyRule(): ArrayList { //首段匹配,elementsType为空
-
- if (!consumeToAny(* STARTSTR)) { //未找到分隔符
- rule += queue.substring(startX)
- return rule
- }
-
- val end = pos //记录分隔位置
- pos = start //重回开始,启动另一种查找
-
- do {
- val st = findToAny('[', '(') //查找筛选器位置
-
- if (st == -1) {
-
- rule += arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
-
- ruleTypeList += queue.substring(end, end + step) //追加类型到类型列表
- pos = end + step //跳过分隔符
-
- while (!chompToAny(elementsType)) { //循环切分规则压入数组
- rule += queue.substring(pos) //将剩余字段压入数组末尾
- return rule
- }
- }
-
- if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组
-
- rule += arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
-
- ruleTypeList += queue.substring(end, end + step) //设置组合类型
- pos = end + step //跳过分隔符
-
- while (!chompToAny(elementsType) && pos >= st) { //循环切分规则压入数组
- if (pos > st) {
- startX = start
- } else { //执行到此,证明后面再无分隔字符
- rule += queue.substring(pos) //将剩余字段压入数组末尾
- return rule
- }
- }
- }
-
- pos = st //位置回到筛选器处
- val next = if (queue[pos] == '[') ']' else ')' //平衡组末尾字符
-
- if (!chompBalanced(queue[pos], next)){
- ruleTypeList.clear()
- rule.clear()
- consumeToAny("","@js:")
- rule += queue.substring(0,pos)
- ruleTypeList += queue.substring(pos, pos + 4) //设置组合类型
- }
-
- } while (end > pos)
-
- start = pos //设置开始查找筛选器位置的起始位置
-
- return splitAnyRule() //递归调用首段匹配
- }
-
- var isJSON = false
-
- var isUrl = false
- var isUrlList = false
-
- var isMulu = false
- var isreverse = false
- var isAllInOne= false
-
- var isFind = false
- private val findName = ArrayList()
-
- var replaceRegex = ""
- var replacement = ""
- var replaceFirst = false
- val putMap = HashMap()
- private val ruleParam = ArrayList()
- private val ruleType = ArrayList()
- private val getRuleType = -2
- private val jsRuleType = -1
- private val defaultRuleType = 0
-
- @JvmOverloads
- fun setContent(cont: String,type:String = ""): RuleAnalyzer {
- queue = cont
- when(type){
- "mulu" -> {
- if(queue[0] =='-'){ //目录反转
- isreverse = true
- startX++
- pos++
- }else if(queue[0] =='?'){ //AllInOne
- isAllInOne = true
- startX++
- pos++
- }
- isMulu = true
- }
- "find" -> {
- pos = queue.indexOf("::")
- findName.add(queue.substring(startX,pos))
- pos+=2
- isFind = true
- }
- "url" -> {
-
- isUrl = true
- }
- "urlList" -> {
-
- isUrlList = true
- }
- else -> {
- isJSON = queue.toString().isJson()
- }
- }
-
- return this
- }
-
companion object {
-
/**
* 转义字符
*/
private const val ESC = '\\'
+ /**
+ * 阅读共有分隔字串起始部分
+ * "##","@@","{{","{[","", "@js:"
+ */
+ val splitList = arrayOf("##", "@@", "{{", "{[", "", "@js:")
+
+ /**
+ * 发现‘名称-链接’分隔字串
+ * "::"
+ */
+ const val splitListFaXian = "::"
+
+ /**
+ * 目录专有起始字符
+ * "-"
+ */
+ const val splitListMulu = "-"
+
+ /**
+ * 结果为元素列表的 all in one 模式起始字符
+ * "+"
+ */
+ const val splitListTongYi = "+"
- val validKeys = arrayOf("class", "id", "tag", "text", "children")
+ /**
+ * 结果为元素列表的项的同规则组合结构
+ * "||","&&","%%"
+ */
+ val splitListReSplit = arrayOf("||", "&&", "%%")
/**
- * 参数字符串
+ * js脚本结束字串
+ * ""
*/
- private val STARTSTRURL = arrayOf(",{",)
+ const val splitListEndJS = ""
- private val regexPattern = Pattern.compile("\\$\\d{1,2}")
- private val putPattern = Pattern.compile("@put:(\\{[^}]+?\\})", Pattern.CASE_INSENSITIVE)
- private val getPattern = Pattern.compile("@get:\\{([^}]+?)\\}", Pattern.CASE_INSENSITIVE)
- private val evalPattern = Pattern.compile("\\{\\{[\\w\\W]*?\\}\\}", Pattern.CASE_INSENSITIVE)
+ /**
+ *内嵌js结束字串
+ * "}}"
+ */
+ const val splitListEndInnerJS = "}}"
- val ENDSTR= mapOf(
- "" to "",
- "{{" to "}}",
- )
+ /**
+ * 内嵌规则结束字串
+ * "]}"
+ */
+ const val splitListEndInnerRule = "]}"
/**
- * 规则起始字符串
+ * '[', ']', '(', ')','{','}'
*/
- private val STARTSTR = arrayOf("@js:","","","##","@@","@"
- ,"{{@", "{{","}}"
- ,"}"
- , "{@", "{/", "{$", "{?"
- , "{class"
- , "{id"
- , "{tag"
- , "{text"
- , "{children"
- ,"/","$","@xpath:","@json:","@css:"
- ,"||", "&&", "%%"
- ,"@get:{","@put:{"
- )
+ val splitListPublic = charArrayOf('[', ']', '(', ')', '{', '}')
/**
* '*',"/","//",":","::","@","|","@xpath:"
diff --git a/app/src/main/java/io/legado/app/service/help/ReadBook.kt b/app/src/main/java/io/legado/app/service/help/ReadBook.kt
index 62ab815c0..56a8f4bb7 100644
--- a/app/src/main/java/io/legado/app/service/help/ReadBook.kt
+++ b/app/src/main/java/io/legado/app/service/help/ReadBook.kt
@@ -415,22 +415,30 @@ object ReadBook {
else -> chapter.title
}
val contents = contentProcessor!!.getContent(book, chapter.title, content)
- val textChapter = ChapterProvider.getTextChapter(book, chapter,contents,chapterSize)
-
- val offset = chapter.index - durChapterIndex
- if (upContent) callBack?.upContent(offset ,resetPageOffset)
- when (offset) {
- 0 -> {
- curTextChapter = textChapter
+ when (chapter.index) {
+ durChapterIndex -> {
+ curTextChapter =
+ ChapterProvider.getTextChapter(
+ book, chapter, contents, chapterSize
+ )
+ if (upContent) callBack?.upContent(resetPageOffset = resetPageOffset)
callBack?.upView()
curPageChanged()
callBack?.contentLoadFinish()
}
- - 1 -> {
- prevTextChapter = textChapter
+ durChapterIndex - 1 -> {
+ prevTextChapter =
+ ChapterProvider.getTextChapter(
+ book, chapter, contents, chapterSize
+ )
+ if (upContent) callBack?.upContent(-1, resetPageOffset)
}
- 1 -> {
- nextTextChapter = textChapter
+ durChapterIndex + 1 -> {
+ nextTextChapter =
+ ChapterProvider.getTextChapter(
+ book, chapter, contents, chapterSize
+ )
+ if (upContent) callBack?.upContent(1, resetPageOffset)
}
}
}
diff --git a/app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt b/app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt
index ca3643caa..814c8736d 100644
--- a/app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt
+++ b/app/src/main/java/io/legado/app/ui/book/read/page/provider/ChapterProvider.kt
@@ -115,7 +115,15 @@ object ChapterProvider {
content.replace(AppPattern.imgPattern.toRegex(), "\n\$0\n")
.split("\n").forEach { text ->
if (text.isNotBlank()) {
- if (!text.startsWith("
+ durY = setTypeImage(
+ book, bookChapter, src,
+ durY, textPages, book.getImageStyle()
+ )
+ }
+ } else {
val isTitle = index == 0
val textPaint = if (isTitle) titlePaint else contentPaint
if (!(isTitle && ReadBookConfig.titleMode == 2)) {
@@ -124,11 +132,6 @@ object ChapterProvider {
stringBuilder, isTitle, textPaint
)
}
- } else { //图片
- durY = setTypeImage(
- book, bookChapter, text.substring(10, text.length-2),
- durY, textPages, book.getImageStyle()
- )
}
}
}
@@ -147,10 +150,7 @@ object ChapterProvider {
return TextChapter(
bookChapter.index, bookChapter.title,
- bookChapter.getAbsoluteURL().run{
- val pos = indexOf(',')
- if(pos == -1) this else substring(0,pos)
- },
+ bookChapter.getAbsoluteURL().split(AnalyzeUrl.splitUrlRegex)[0],
textPages, chapterSize
)
}
diff --git a/app/src/main/java/io/legado/app/utils/HtmlFormatter.kt b/app/src/main/java/io/legado/app/utils/HtmlFormatter.kt
index e5ab0972d..129d5018c 100644
--- a/app/src/main/java/io/legado/app/utils/HtmlFormatter.kt
+++ b/app/src/main/java/io/legado/app/utils/HtmlFormatter.kt
@@ -1,7 +1,8 @@
package io.legado.app.utils
+import io.legado.app.constant.AppPattern
+import io.legado.app.model.analyzeRule.AnalyzeUrl
import java.net.URL
-import java.util.regex.Pattern
object HtmlFormatter {
private val wrapHtmlRegex = "?(?:div|p|br|hr|h\\d|article|dd|dl)[^>]*>".toRegex()
@@ -13,7 +14,7 @@ object HtmlFormatter {
return html.replace(wrapHtmlRegex, "\n")
.replace(otherRegex, "")
.replace("\\s*\\n+\\s*".toRegex(), "\n ")
- .replace("^[\\n\\s]*".toRegex(), " ")
+ .replace("^[\\n\\s]+".toRegex(), " ")
.replace("[\\n\\s]+$".toRegex(), "")
}
@@ -21,64 +22,22 @@ object HtmlFormatter {
fun formatKeepImg(html: String?, redirectUrl: URL?): String {
html ?: return ""
- val keepImgHtml = html.replace(wrapHtmlRegex, "\n")
- .replace(notImgHtmlRegex, "")
- .replace("[\\n\\s]+\$|^[\\n\\s]*".toRegex(), "")
- .replace("\\s*\\n+\\s*".toRegex(), "\n")
-
- val sb = StringBuffer(" ") //前置缩减
- val hasDataType:Boolean //是否有数据属性
-
- //图片有data-开头的数据属性时优先用数据属性作为src,没有数据属性时匹配src
- val imgPattern = Pattern.compile(
- if(keepImgHtml.matches("]*data-".toRegex())) {
- hasDataType = true
- "]*data-[^=]*= *\"([^\"])\"[^>]*>"
- }
- else {
- hasDataType = false
- "]*src *= *\"([^\"{]+(?:\\{(?:[^{}]|\\{[^{}]*\\})*\\})?)\"[^>]*>"
- }, Pattern.CASE_INSENSITIVE
- )
-
- val matcher = imgPattern.matcher(keepImgHtml)
+ val keepImgHtml = formatKeepImg(html)
+ val sb = StringBuffer()
+ val matcher = AppPattern.imgPattern.matcher(keepImgHtml)
var appendPos = 0
-
- if(matcher.find()){
- if(hasDataType || matcher.group(1)!!.indexOf(',') == -1) { //图片无参
-
- do{
- sb.append(keepImgHtml.substring(appendPos, matcher.start()).replace("\n","\n ")) //非图片部分换行缩减
- sb.append( "" )
- appendPos = matcher.end()
- }while (matcher.find())
-
- }else{ //图片有参
-
- do{
- val url = matcher.group(1)!!
- val pos = url.indexOf(',')
- sb.append(keepImgHtml.substring(appendPos, matcher.start()).replace("\n","\n ")) //非图片部分换行缩减
- sb.append(
- ""
- )
- appendPos = matcher.end()
- }while(matcher.find())
-
+ while (matcher.find()) {
+ val urlArray = matcher.group(1)!!.split(AnalyzeUrl.splitUrlRegex)
+ var url = NetworkUtils.getAbsoluteURL(redirectUrl, urlArray[0])
+ if (urlArray.size > 1) {
+ url = "$url,${urlArray[1]}"
}
+ sb.append(keepImgHtml.substring(appendPos, matcher.start()))
+ sb.append("")
+ appendPos = matcher.end()
}
-
if (appendPos < keepImgHtml.length) {
- sb.append(keepImgHtml.substring(appendPos, keepImgHtml.length).replace("\n","\n ")) //非图片部分换行缩减
+ sb.append(keepImgHtml.substring(appendPos, keepImgHtml.length))
}
return sb.toString()
}