Update HtmlFormatter.kt

简化网页格式化程序,并修复格式化图片只格式了第一张的问题。
pull/1058/head
bushixuanqi 3 years ago committed by GitHub
parent da3fb420f2
commit c24eb1b807
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 40
      app/src/main/java/io/legado/app/utils/HtmlFormatter.kt

@ -2,55 +2,49 @@ package io.legado.app.utils
import io.legado.app.model.analyzeRule.AnalyzeUrl import io.legado.app.model.analyzeRule.AnalyzeUrl
import java.net.URL import java.net.URL
import java.util.regex.Pattern import io.legado.app.constant.AppPattern
object HtmlFormatter { object HtmlFormatter {
private val wrapHtmlRegex = "</?(?:div|p|br|hr|h\\d|article|dd|dl)[^>]*>".toRegex() private val wrapHtmlRegex = "</?(?:div|p|br|hr|h\\d|article|dd|dl)[^>]*>".toRegex()
private val notImgHtmlRegex = "</?(?!img)[a-zA-Z]+(?=[ >])[^<>]*>".toRegex() private val notImgHtmlRegex = "</?(?!img)[a-zA-Z]+(?=[ >])[^<>]*>".toRegex()
private val otherHtmlRegex = "</?[a-zA-Z]+(?=[ >])[^<>]*>".toRegex() private val otherHtmlRegex = "</?[a-zA-Z]+(?=[ >])[^<>]*>".toRegex()
private val imgPattern = Pattern.compile("<img [^>]*src=.*?\"(.*?(?:,\\{.*\\})?)\".*?>")
fun format(html: String?): String { fun format(html: String?, otherRegex: Regex = otherHtmlRegex): String {
html ?: return "" html ?: return ""
return html.replace(wrapHtmlRegex, "\n") return html.replace(wrapHtmlRegex, "\n")
.replace(otherHtmlRegex, "") .replace(otherRegex, "")
.replace("\\s*\\n+\\s*".toRegex(), "\n  ")
.replace("^[\\n\\s]+".toRegex(), "  ") .replace("^[\\n\\s]+".toRegex(), "  ")
.replace("[\\n\\s]+$".toRegex(), "") .replace("[\\n\\s]+$".toRegex(), "")
}
fun formatKeepImg(html: String?): String {
html ?: return ""
return html.replace(wrapHtmlRegex, "\n")
.replace(notImgHtmlRegex, "")
.replace("\\s*\\n+\\s*".toRegex(), "\n  ") .replace("\\s*\\n+\\s*".toRegex(), "\n  ")
.replace("^[\\n\\s]+".toRegex(), "  ")
.replace("[\\n\\s]+$".toRegex(), "")
} }
fun formatKeepImg(html: String?) = format(html,notImgHtmlRegex)
fun formatKeepImg(html: String?, redirectUrl: URL?): String { fun formatKeepImg(html: String?, redirectUrl: URL?): String {
html ?: return "" html ?: return ""
var formatHtml = formatKeepImg(html)
val sb = StringBuffer() val sb = StringBuffer()
val matcher = imgPattern.matcher(html)
var appendPos = 0 var appendPos = 0
while (matcher.find()) { while (appendPos < formatHtml.length) {
val matcher = AppPattern.imgPattern.matcher(formatHtml)
if(matcher.find()) {
val urlArray = matcher.group(1)!!.split(AnalyzeUrl.splitUrlRegex) val urlArray = matcher.group(1)!!.split(AnalyzeUrl.splitUrlRegex)
var url = NetworkUtils.getAbsoluteURL(redirectUrl, urlArray[0]) var url = NetworkUtils.getAbsoluteURL(redirectUrl, urlArray[0])
if (urlArray.size > 1) { if (urlArray.size > 1) {
url = "$url,${urlArray[1]}" url = "$url,${urlArray[1]}"
} }
sb.append(html.substring(appendPos, matcher.start())) sb.append(formatHtml.substring(appendPos, matcher.start()))
sb.append("<img src=\"$url\" >") sb.append("<img src=\"$url\" >")
appendPos = matcher.end() appendPos = matcher.end()
formatHtml = formatHtml.substring(appendPos, formatHtml.length)
appendPos = 0
} else {
sb.append(formatHtml)
appendPos = formatHtml.length
} }
if (appendPos < html.length) {
sb.append(html.substring(appendPos, html.length))
} }
return sb.replace(wrapHtmlRegex, "\n")
.replace(notImgHtmlRegex, "") return sb.toString()
.replace("\\s*\\n+\\s*".toRegex(), "\n  ")
.replace("^[\\n\\s]+".toRegex(), "  ")
.replace("[\\n\\s]+$".toRegex(), "")
} }
} }
Loading…
Cancel
Save