From c24eb1b807b5a377bc5882fe7f85f0699169be44 Mon Sep 17 00:00:00 2001
From: bushixuanqi <57338301+bushixuanqi@users.noreply.github.com>
Date: Sun, 13 Jun 2021 20:16:59 +0800
Subject: [PATCH] Update HtmlFormatter.kt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
简化网页格式化程序,并修复格式化图片只格式了第一张的问题。
---
.../java/io/legado/app/utils/HtmlFormatter.kt | 56 +++++++++----------
1 file changed, 25 insertions(+), 31 deletions(-)
diff --git a/app/src/main/java/io/legado/app/utils/HtmlFormatter.kt b/app/src/main/java/io/legado/app/utils/HtmlFormatter.kt
index 5a69f373e..626bbf70c 100644
--- a/app/src/main/java/io/legado/app/utils/HtmlFormatter.kt
+++ b/app/src/main/java/io/legado/app/utils/HtmlFormatter.kt
@@ -2,55 +2,49 @@ package io.legado.app.utils
import io.legado.app.model.analyzeRule.AnalyzeUrl
import java.net.URL
-import java.util.regex.Pattern
+import io.legado.app.constant.AppPattern
object HtmlFormatter {
private val wrapHtmlRegex = "?(?:div|p|br|hr|h\\d|article|dd|dl)[^>]*>".toRegex()
private val notImgHtmlRegex = "?(?!img)[a-zA-Z]+(?=[ >])[^<>]*>".toRegex()
private val otherHtmlRegex = "?[a-zA-Z]+(?=[ >])[^<>]*>".toRegex()
- private val imgPattern = Pattern.compile("]*src=.*?\"(.*?(?:,\\{.*\\})?)\".*?>")
- fun format(html: String?): String {
+ fun format(html: String?, otherRegex: Regex = otherHtmlRegex): String {
html ?: return ""
return html.replace(wrapHtmlRegex, "\n")
- .replace(otherHtmlRegex, "")
- .replace("\\s*\\n+\\s*".toRegex(), "\n ")
+ .replace(otherRegex, "")
.replace("^[\\n\\s]+".toRegex(), " ")
.replace("[\\n\\s]+$".toRegex(), "")
- }
-
- fun formatKeepImg(html: String?): String {
- html ?: return ""
- return html.replace(wrapHtmlRegex, "\n")
- .replace(notImgHtmlRegex, "")
.replace("\\s*\\n+\\s*".toRegex(), "\n ")
- .replace("^[\\n\\s]+".toRegex(), " ")
- .replace("[\\n\\s]+$".toRegex(), "")
}
+ fun formatKeepImg(html: String?) = format(html,notImgHtmlRegex)
+
fun formatKeepImg(html: String?, redirectUrl: URL?): String {
html ?: return ""
+ var formatHtml = formatKeepImg(html)
val sb = StringBuffer()
- val matcher = imgPattern.matcher(html)
var appendPos = 0
- while (matcher.find()) {
- val urlArray = matcher.group(1)!!.split(AnalyzeUrl.splitUrlRegex)
- var url = NetworkUtils.getAbsoluteURL(redirectUrl, urlArray[0])
- if (urlArray.size > 1) {
- url = "$url,${urlArray[1]}"
+ while (appendPos < formatHtml.length) {
+ val matcher = AppPattern.imgPattern.matcher(formatHtml)
+ if(matcher.find()) {
+ val urlArray = matcher.group(1)!!.split(AnalyzeUrl.splitUrlRegex)
+ var url = NetworkUtils.getAbsoluteURL(redirectUrl, urlArray[0])
+ if (urlArray.size > 1) {
+ url = "$url,${urlArray[1]}"
+ }
+ sb.append(formatHtml.substring(appendPos, matcher.start()))
+ sb.append("")
+ appendPos = matcher.end()
+ formatHtml = formatHtml.substring(appendPos, formatHtml.length)
+ appendPos = 0
+ } else {
+ sb.append(formatHtml)
+ appendPos = formatHtml.length
}
- sb.append(html.substring(appendPos, matcher.start()))
- sb.append("")
- appendPos = matcher.end()
- }
- if (appendPos < html.length) {
- sb.append(html.substring(appendPos, html.length))
}
- return sb.replace(wrapHtmlRegex, "\n")
- .replace(notImgHtmlRegex, "")
- .replace("\\s*\\n+\\s*".toRegex(), "\n ")
- .replace("^[\\n\\s]+".toRegex(), " ")
- .replace("[\\n\\s]+$".toRegex(), "")
+
+ return sb.toString()
}
-}
\ No newline at end of file
+}