From 1f9acf5ae4742098467a39efdcfd26cd784c57fd Mon Sep 17 00:00:00 2001 From: kunfei Date: Fri, 28 Jun 2019 16:26:24 +0800 Subject: [PATCH] up --- .../java/io/legado/app/constant/AppConst.kt | 3 + .../java/io/legado/app/constant/Pattern.kt | 8 + .../app/model/analyzeRule/AnalyzeUrl.kt | 290 ++++++++++++++++++ .../java/io/legado/app/utils/NetworkUtils.kt | 110 +++++++ .../io/legado/app/utils/StringExtensions.kt | 22 ++ 5 files changed, 433 insertions(+) create mode 100644 app/src/main/java/io/legado/app/constant/Pattern.kt create mode 100644 app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt create mode 100644 app/src/main/java/io/legado/app/utils/NetworkUtils.kt diff --git a/app/src/main/java/io/legado/app/constant/AppConst.kt b/app/src/main/java/io/legado/app/constant/AppConst.kt index 863f9e27a..0d98ed75e 100644 --- a/app/src/main/java/io/legado/app/constant/AppConst.kt +++ b/app/src/main/java/io/legado/app/constant/AppConst.kt @@ -2,6 +2,7 @@ package io.legado.app.constant import io.legado.app.App import io.legado.app.R +import javax.script.ScriptEngineManager object AppConst { const val channelIdDownload = "channel_download" @@ -11,6 +12,8 @@ object AppConst { const val APP_TAG = "Legado" const val RC_IMPORT_YUEDU_DATA = 100 + val SCRIPT_ENGINE = ScriptEngineManager().getEngineByName("rhino") + val NOT_AVAILABLE = App.INSTANCE.getString(R.string.not_available) } \ No newline at end of file diff --git a/app/src/main/java/io/legado/app/constant/Pattern.kt b/app/src/main/java/io/legado/app/constant/Pattern.kt new file mode 100644 index 000000000..917f4d52e --- /dev/null +++ b/app/src/main/java/io/legado/app/constant/Pattern.kt @@ -0,0 +1,8 @@ +package io.legado.app.constant + +import java.util.regex.Pattern + +object Pattern { + val JS_PATTERN = Pattern.compile("([\\w\\W]*?|@js:[\\w\\W]*$)", Pattern.CASE_INSENSITIVE) + val EXP_PATTERN = Pattern.compile("\\{\\{([\\w\\W]*?)\\}\\}") +} \ No newline at end of file diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt new file mode 100644 index 000000000..dffcd7aee --- /dev/null +++ b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeUrl.kt @@ -0,0 +1,290 @@ +package io.legado.app.model.analyzeRule + +import android.annotation.SuppressLint +import android.text.TextUtils +import androidx.annotation.Keep +import io.legado.app.constant.AppConst.SCRIPT_ENGINE +import io.legado.app.constant.Pattern.EXP_PATTERN +import io.legado.app.constant.Pattern.JS_PATTERN +import io.legado.app.utils.NetworkUtils +import java.net.URLEncoder +import java.util.* +import java.util.regex.Pattern +import javax.script.SimpleBindings + + +/** + * Created by GKF on 2018/1/24. + * 搜索URL规则解析 + */ +@Keep +class AnalyzeUrl @SuppressLint("DefaultLocale") +@Throws(Exception::class) +constructor(ruleUrl: String, key: String?, page: Int?, headerMapF: Map?, baseUrl: String?) { + private var baseUrl: String? = null + var url: String? = null + private set + var host: String? = null + private set + var path: String? = null + private set + var queryStr: String? = null + private set + private val queryMap = LinkedHashMap() + private val headerMap = HashMap() + private var charCode: String? = null + var urlMode = UrlMode.DEFAULT + private set + + val postData: ByteArray + get() { + val builder = StringBuilder() + val keys = queryMap.keys + for (key in keys) { + builder.append(String.format("%s=%s&", key, queryMap[key])) + } + builder.deleteCharAt(builder.lastIndexOf("&")) + return builder.toString().toByteArray() + } + + @Throws(Exception::class) + constructor(urlRule: String) : this(urlRule, null, null, null, null) + + @Throws(Exception::class) + constructor(urlRule: String, headerMapF: Map, baseUrl: String) : this( + urlRule, + null, + null, + headerMapF, + baseUrl + ) + + init { + var ruleUrl = ruleUrl + if (!TextUtils.isEmpty(baseUrl)) { +// this.baseUrl = headerPattern.matcher(baseUrl).replaceAll("") + } + //解析Header + ruleUrl = analyzeHeader(ruleUrl, headerMapF) + //替换关键字 + key?.let { + if (it.isNotBlank()) { + ruleUrl = ruleUrl.replace("searchKey", it) + } + } + //分离编码规则 + ruleUrl = splitCharCode(ruleUrl) + //判断是否有下一页 + if (page != null && page > 1 && !ruleUrl.contains("searchPage")) + throw Exception("没有下一页") + //替换js + ruleUrl = replaceJs(ruleUrl, baseUrl, page, key) + //设置页数 + ruleUrl = analyzePage(ruleUrl, page) + //执行规则列表 + val ruleList = splitRule(ruleUrl) + for (rule in ruleList) { + ruleUrl = if (rule.startsWith("")) { + evalJS(rule.substring(4, rule.lastIndexOf("<")), ruleUrl) as String + } else { + rule.replace("@result", ruleUrl) + } + } + //分离post参数 + var ruleUrlS = ruleUrl.split("@".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray() + if (ruleUrlS.size > 1) { + urlMode = UrlMode.POST + } else { + //分离get参数 + ruleUrlS = ruleUrlS[0].split("\\?".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray() + if (ruleUrlS.size > 1) { + urlMode = UrlMode.GET + } + } + generateUrlPath(ruleUrlS[0]) + if (urlMode != UrlMode.DEFAULT) { + analyzeQuery(ruleUrlS[1]) + } + } + + /** + * 解析Header + */ + private fun analyzeHeader(ruleUrl: String, headerMapF: Map?): String { +// var ruleUrl = ruleUrl +// if (headerMapF != null) { +// headerMap.putAll(headerMapF) +// } +// val matcher = headerPattern.matcher(ruleUrl) +// if (matcher.find()) { +// var find = matcher.group(0) +// ruleUrl = ruleUrl.replace(find, "") +// find = find.substring(8) +// try { +// val map = Gson().fromJson>(find) +// headerMap.putAll(map) +// } catch (ignored: Exception) { +// } +// } + return ruleUrl + } + + /** + * 分离编码规则 + */ + private fun splitCharCode(rule: String): String { + val ruleUrlS = rule.split("\\|".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray() + if (ruleUrlS.size > 1) { + if (!TextUtils.isEmpty(ruleUrlS[1])) { + val qtS = ruleUrlS[1].split("&".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray() + for (qt in qtS) { + val gz = qt.split("=".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray() + if (gz[0] == "char") { + charCode = gz[1] + } + } + } + } + return ruleUrlS[0] + } + + /** + * 解析页数 + */ + private fun analyzePage(ruleUrl: String, searchPage: Int?): String { + var ruleUrl = ruleUrl + if (searchPage == null) return ruleUrl + val matcher = pagePattern.matcher(ruleUrl) + while (matcher.find()) { + val pages = matcher.group(1).split(",".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray() + if (searchPage <= pages.size) { + ruleUrl = ruleUrl.replace(matcher.group(), pages[searchPage - 1].trim { it <= ' ' }) + } else { + ruleUrl = ruleUrl.replace(matcher.group(), pages[pages.size - 1].trim { it <= ' ' }) + } + } + return ruleUrl.replace("searchPage-1", (searchPage - 1).toString()) + .replace("searchPage+1", (searchPage + 1).toString()) + .replace("searchPage", searchPage.toString()) + } + + /** + * 替换js + */ + @SuppressLint("DefaultLocale") + @Throws(Exception::class) + private fun replaceJs(ruleUrl: String, baseUrl: String?, searchPage: Int?, searchKey: String?): String { + var ruleUrl = ruleUrl + if (ruleUrl.contains("{{") && ruleUrl.contains("}}")) { + var jsEval: Any + val sb = StringBuffer(ruleUrl.length) + val simpleBindings = object : SimpleBindings() { + init { + this["baseUrl"] = baseUrl + this["searchPage"] = searchPage + this["searchKey"] = searchKey + } + } + val expMatcher = EXP_PATTERN.matcher(ruleUrl) + while (expMatcher.find()) { + jsEval = SCRIPT_ENGINE.eval(expMatcher.group(1), simpleBindings) + if (jsEval is String) { + expMatcher.appendReplacement(sb, jsEval) + } else if (jsEval is Double && jsEval % 1.0 == 0.0) { + expMatcher.appendReplacement(sb, String.format("%.0f", jsEval)) + } else { + expMatcher.appendReplacement(sb, jsEval.toString()) + } + } + expMatcher.appendTail(sb) + ruleUrl = sb.toString() + } + return ruleUrl + } + + /** + * 解析QueryMap + */ + @Throws(Exception::class) + private fun analyzeQuery(allQuery: String) { + queryStr = allQuery + val queryS = allQuery.split("&".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray() + for (query in queryS) { + val queryM = query.split("=".toRegex()).dropLastWhile { it.isEmpty() }.toTypedArray() + val value = if (queryM.size > 1) queryM[1] else "" + if (TextUtils.isEmpty(charCode)) { + if (NetworkUtils.hasUrlEncoded(value)) { + queryMap[queryM[0]] = value + } else { + queryMap[queryM[0]] = URLEncoder.encode(value, "UTF-8") + } + } else if (charCode == "escape") { +// queryMap[queryM[0]] = StringUtils.escape(value) + } else { + queryMap[queryM[0]] = URLEncoder.encode(value, charCode) + } + } + } + + /** + * 拆分规则 + */ + private fun splitRule(ruleStr: String): List { + val ruleList = ArrayList() + val jsMatcher = JS_PATTERN.matcher(ruleStr) + var start = 0 + var tmp: String + while (jsMatcher.find()) { + if (jsMatcher.start() > start) { + tmp = ruleStr.substring(start, jsMatcher.start()).replace("\n".toRegex(), "").trim { it <= ' ' } + if (!TextUtils.isEmpty(tmp)) { + ruleList.add(tmp) + } + } + ruleList.add(jsMatcher.group()) + start = jsMatcher.end() + } + if (ruleStr.length > start) { + tmp = ruleStr.substring(start).replace("\n".toRegex(), "").trim { it <= ' ' } + if (!TextUtils.isEmpty(tmp)) { + ruleList.add(tmp) + } + } + return ruleList + } + + /** + * 分解URL + */ + private fun generateUrlPath(ruleUrl: String) { + baseUrl?.let { url = NetworkUtils.getAbsoluteURL(it, ruleUrl) } + host = NetworkUtils.getBaseUrl(url) + path = url!!.substring(host!!.length) + } + + /** + * 执行JS + */ + @Throws(Exception::class) + private fun evalJS(jsStr: String, result: Any): Any { + val bindings = SimpleBindings() + bindings["result"] = result + return SCRIPT_ENGINE.eval(jsStr, bindings) + } + + fun getQueryMap(): Map { + return queryMap + } + + fun getHeaderMap(): Map { + return headerMap + } + + enum class UrlMode { + GET, POST, DEFAULT + } + + companion object { + private val pagePattern = Pattern.compile("\\{(.*?)\\}") + } +} diff --git a/app/src/main/java/io/legado/app/utils/NetworkUtils.kt b/app/src/main/java/io/legado/app/utils/NetworkUtils.kt new file mode 100644 index 000000000..79ce84ef9 --- /dev/null +++ b/app/src/main/java/io/legado/app/utils/NetworkUtils.kt @@ -0,0 +1,110 @@ +package io.legado.app.utils + +import android.text.TextUtils +import java.net.URL +import java.util.* + +object NetworkUtils { + + private val notNeedEncoding: BitSet by lazy { + val bitSet = BitSet(256) + var i: Int = 'a'.toInt() + while (i <= 'z'.toInt()) { + bitSet.set(i) + i++ + } + i = 'A'.toInt() + while (i <= 'Z'.toInt()) { + bitSet.set(i) + i++ + } + i = '0'.toInt() + while (i <= '9'.toInt()) { + bitSet.set(i) + i++ + } + bitSet.set('+'.toInt()) + bitSet.set('-'.toInt()) + bitSet.set('_'.toInt()) + bitSet.set('.'.toInt()) + bitSet.set('$'.toInt()) + bitSet.set(':'.toInt()) + bitSet.set('('.toInt()) + bitSet.set(')'.toInt()) + bitSet.set('!'.toInt()) + bitSet.set('*'.toInt()) + bitSet.set('@'.toInt()) + bitSet.set('&'.toInt()) + bitSet.set('#'.toInt()) + bitSet.set(','.toInt()) + bitSet.set('['.toInt()) + bitSet.set(']'.toInt()) + return@lazy bitSet + } + + /** + * 支持JAVA的URLEncoder.encode出来的string做判断。 即: 将' '转成'+' + * 0-9a-zA-Z保留

+ * ! * ' ( ) ; : @ & = + $ , / ? # [ ] 保留 + * 其他字符转成%XX的格式,X是16进制的大写字符,范围是[0-9A-F] + */ + fun hasUrlEncoded(str: String): Boolean { + var needEncode = false + var i = 0 + while (i < str.length) { + val c = str[i] + if (notNeedEncoding.get(c.toInt())) { + i++ + continue + } + if (c == '%' && i + 2 < str.length) { + // 判断是否符合urlEncode规范 + val c1 = str[++i] + val c2 = str[++i] + if (isDigit16Char(c1) && isDigit16Char(c2)) { + i++ + continue + } + } + // 其他字符,肯定需要urlEncode + needEncode = true + break + i++ + } + + return !needEncode + } + + /** + * 判断c是否是16进制的字符 + */ + private fun isDigit16Char(c: Char): Boolean { + return c >= '0' && c <= '9' || c >= 'A' && c <= 'F' + } + + /** + * 获取绝对地址 + */ + fun getAbsoluteURL(baseURL: String, relativePath: String): String { + var relativeUrl = relativePath + if (TextUtils.isEmpty(baseURL)) return relativePath + try { + val absoluteUrl = URL(baseURL) + val parseUrl = URL(absoluteUrl, relativePath) + relativeUrl = parseUrl.toString() + return relativeUrl + } catch (e: Exception) { + e.printStackTrace() + } + return relativeUrl + } + + fun getBaseUrl(url: String?): String? { + if (url == null || !url.startsWith("http")) return null + val index = url.indexOf("/", 9) + return if (index == -1) { + url + } else url.substring(0, index) + } + +} \ No newline at end of file diff --git a/app/src/main/java/io/legado/app/utils/StringExtensions.kt b/app/src/main/java/io/legado/app/utils/StringExtensions.kt index 2706e05dd..2abbf078d 100644 --- a/app/src/main/java/io/legado/app/utils/StringExtensions.kt +++ b/app/src/main/java/io/legado/app/utils/StringExtensions.kt @@ -1,5 +1,7 @@ package io.legado.app.utils +import android.text.TextUtils + // import org.apache.commons.text.StringEscapeUtils fun String?.safeTrim() = if (this.isNullOrBlank()) null else this.trim() @@ -7,6 +9,26 @@ fun String?.safeTrim() = if (this.isNullOrBlank()) null else this.trim() fun String.isAbsUrl() = this.startsWith("http://", true) || this.startsWith("https://", true) +fun String.isJson(): Boolean = kotlin.run { + var result = false + if (!TextUtils.isEmpty(this)) { + val str = this.trim() + if (str.startsWith("{") && str.endsWith("}")) { + result = true + } else if (str.startsWith("[") && str.endsWith("]")) { + result = true + } + } + return result +} + +fun String.htmlFormat(): String = if (TextUtils.isEmpty(this)) "" else + this.replace("(?i)<(br[\\s/]*|/*p.*?|/*div.*?)>".toRegex(), "\n")// 替换特定标签为换行符 + .replace("<[script>]*.*?>| ".toRegex(), "")// 删除script标签对和空格转义符 + .replace("\\s*\\n+\\s*".toRegex(), "\n  ")// 移除空行,并增加段前缩进2个汉字 + .replace("^[\\n\\s]+".toRegex(), "  ")//移除开头空行,并增加段前缩进2个汉字 + .replace("[\\n\\s]+$".toRegex(), "") //移除尾部空行 + fun String.splitNotBlank(delim: String) = if (!this.contains(delim)) sequenceOf(this) else this.split(delim).asSequence().map { it.trim() }.filterNot { it.isBlank() }