格式化html时去除   等

record2021
gedoor 3 years ago
parent 240cbf0eb1
commit d8adac1af1
  1. 8
      app/src/main/java/io/legado/app/utils/HtmlFormatter.kt

@ -6,6 +6,9 @@ import java.util.regex.Pattern
@Suppress("RegExpRedundantEscape")
object HtmlFormatter {
private val nbspRegex = "( )+".toRegex()
private val espRegex = "( | )".toRegex()
private val noPrintRegex = "( |‌|‍)".toRegex()
private val wrapHtmlRegex = "</?(?:div|p|br|hr|h\\d|article|dd|dl)[^>]*>".toRegex()
private val commentRegex = "<!--[^>]*-->".toRegex() //注释
private val notImgHtmlRegex = "</?(?!img)[a-zA-Z]+(?=[ >])[^<>]*>".toRegex()
@ -17,7 +20,10 @@ object HtmlFormatter {
fun format(html: String?, otherRegex: Regex = otherHtmlRegex): String {
html ?: return ""
return html.replace(wrapHtmlRegex, "\n")
return html.replace(nbspRegex, " ")
.replace(espRegex, " ")
.replace(noPrintRegex, "")
.replace(wrapHtmlRegex, "\n")
.replace(commentRegex, "")
.replace(otherRegex, "")
.replace("\\s*\\n+\\s*".toRegex(), "\n  ")

Loading…
Cancel
Save