Merge pull request #1106 from bushixuanqi/master

整合优化修复
pull/1108/head
kunfei 3 years ago committed by GitHub
commit 8d8b1e0f39
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 288
      app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt
  2. 231
      app/src/main/java/io/legado/app/model/analyzeRule/RuleAnalyzer.kt

@ -137,11 +137,10 @@ class AnalyzeByJSoup(doc: Any) {
} }
} else { } else {
for (ruleStr in ruleStrS) { for (ruleStr in ruleStrS) {
//将原getElementsSingle函数调用的函数的部分代码内联过来,方便简化getElementsSingle函数
val rsRule = RuleAnalyzer(ruleStr) val rsRule = RuleAnalyzer(ruleStr)
if( rsRule.peek() =='@' || rsRule.peek() < '!' ) rsRule.advance() // 修剪当前规则之前的"@"或者空白符 rsRule.trim() // 修剪当前规则之前的"@"或者空白符
val rs = rsRule.splitRule("@") val rs = rsRule.splitRule("@")
@ -157,7 +156,7 @@ class AnalyzeByJSoup(doc: Any) {
el.addAll(es) el.addAll(es)
} }
el el
}else getElementsSingle(temp,ruleStr) }else ElementsSingle().getElementsSingle(temp,ruleStr)
elementsList.add(el) elementsList.add(el)
if (el.size > 0 && ruleAnalyzes.elementsType == "||") { if (el.size > 0 && ruleAnalyzes.elementsType == "||") {
@ -183,6 +182,131 @@ class AnalyzeByJSoup(doc: Any) {
return elements return elements
} }
/**
* 获取内容列表
*/
private fun getResultList(ruleStr: String): List<String>? {
if (ruleStr.isEmpty()) return null
var elements = Elements()
elements.add(element)
val rule = RuleAnalyzer(ruleStr) //创建解析
rule.trim() //修建前置赘余符号
val rules = rule.splitRule("@") // 切割成列表
val last = rules.size - 1
for (i in 0 until last) {
val es = Elements()
for (elt in elements) {
es.addAll(ElementsSingle().getElementsSingle(elt, rules[i]))
}
elements.clear()
elements = es
}
return if (elements.isEmpty()) null else getResultLast(elements, rules[last])
}
/**
* 根据最后一个规则获取内容
*/
private fun getResultLast(elements: Elements, lastRule: String): List<String> {
val textS = ArrayList<String>()
try {
when (lastRule) {
"text" -> for (element in elements) {
textS.add(element.text())
}
"textNodes" -> for (element in elements) {
val tn = arrayListOf<String>()
val contentEs = element.textNodes()
for (item in contentEs) {
val temp = item.text().trim { it <= ' ' }
if (temp.isNotEmpty()) {
tn.add(temp)
}
}
textS.add(join("\n", tn))
}
"ownText" -> for (element in elements) {
textS.add(element.ownText())
}
"html" -> {
elements.select("script").remove()
elements.select("style").remove()
val html = elements.outerHtml()
textS.add(html)
}
"all" -> textS.add(elements.outerHtml())
else -> for (element in elements) {
val url = element.attr(lastRule)
if(url.isEmpty() || textS.contains(url)) break
textS.add(url)
}
}
} catch (e: Exception) {
e.printStackTrace()
}
return textS
}
data class ElementsSingle(var split:Char = '.',
var beforeRule:String = "",
val indexDefault:MutableList<Int> = mutableListOf(),
val indexs:MutableList<Any> = mutableListOf()){
/**
* 获取Elements按照一个规则
*/
fun getElementsSingle(temp: Element, rule: String): Elements {
var elements = Elements()
findIndexSet(rule) //执行索引列表处理器
val rules = beforeRule.split(".")
elements.addAll(
if(beforeRule.isEmpty()) temp.children() //允许索引直接作为根元素,此时前置规则为空,效果与children相同
else when (rules[0]) {
"children" -> temp.children() //允许索引直接作为根元素,此时前置规则为空,效果与children相同
"class" -> temp.getElementsByClass(rules[1])
"tag" -> temp.getElementsByTag(rules[1])
"id" -> Collector.collect(Evaluator.Id(rules[1]), temp)
"text" -> temp.getElementsContainingOwnText(rules[1])
else -> temp.select(beforeRule)
} )
val indexSet = getIndexs(elements.size) //传入元素数量,处理负数索引及索引越界问题,生成可用索引集合。
if(split == '!'){ //排除
for (pcInt in indexSet) elements[pcInt] = null
elements.removeAll(listOf(null)) //测试过,这样就行
}else if(split == '.'){ //选择
val es = Elements()
for (pcInt in indexSet) es.add(elements[pcInt])
elements = es
}
return elements
}
/** /**
* 1.支持阅读原有写法':'分隔索引!.表示筛选方式索引可为负数 * 1.支持阅读原有写法':'分隔索引!.表示筛选方式索引可为负数
* *
@ -201,10 +325,8 @@ class AnalyzeByJSoup(doc: Any) {
* 特殊用法 tag.div[-1:0] 可在任意地方让列表反向 * 特殊用法 tag.div[-1:0] 可在任意地方让列表反向
* *
* */ * */
fun findIndexSet( rule:String ): ElementsSingle {
fun findIndexSet( rule:String ): IndexSet {
val indexSet = IndexSet()
val rus = rule.trim{ it <= ' '} val rus = rule.trim{ it <= ' '}
var len = rus.length var len = rus.length
@ -241,25 +363,26 @@ class AnalyzeByJSoup(doc: Any) {
if(curInt == null) break //是jsoup选择器而非索引列表,跳出 if(curInt == null) break //是jsoup选择器而非索引列表,跳出
indexSet.indexs.add(curInt) indexs.add(curInt)
} }
else{ else{
//列表最后压入的是区间右端,若列表有两位则最先压入的是间隔 //列表最后压入的是区间右端,若列表有两位则最先压入的是间隔
indexSet.indexs.add( Triple(curInt, curList.last(), if(curList.size == 2) curList.first() else 1) ) indexs.add( Triple(curInt, curList.last(), if(curList.size == 2) curList.first() else 1) )
curList.clear() //重置临时列表,避免影响到下个区间的处理 curList.clear() //重置临时列表,避免影响到下个区间的处理
} }
if(rl == '!'){ if(rl == '!'){
indexSet.split='!' split='!'
do{ rl = rus[--len] } while (len > 0 && rl == ' ')//跳过所有空格 do{ rl = rus[--len] } while (len > 0 && rl == ' ')//跳过所有空格
} }
if(rl == '[') return indexSet.apply { if(rl == '[') {
beforeRule = rus.substring(0, len) beforeRule = rus.substring(0, len) //遇到索引边界,返回结果
} //遇到索引边界,返回结果 return this
}
if(rl != ',') break //非索引结构,跳出 if(rl != ',') break //非索引结构,跳出
@ -281,11 +404,12 @@ class AnalyzeByJSoup(doc: Any) {
if(rl == '!' || rl == '.' || rl == ':') { //分隔符或起始符 if(rl == '!' || rl == '.' || rl == ':') { //分隔符或起始符
indexSet.indexDefault.add(if (curMinus) -l.toInt() else l.toInt()) // 当前数字追加到列表 indexDefault.add(if (curMinus) -l.toInt() else l.toInt()) // 当前数字追加到列表
if (rl != ':') return indexSet.apply { //rl == '!' || rl == '.' if (rl != ':'){ //rl == '!' || rl == '.'
split = rl split = rl
beforeRule = rus.substring(0, len) beforeRule = rus.substring(0, len)
return this
} }
}else break //非索引结构,跳出循环 }else break //非索引结构,跳出循环
@ -296,142 +420,12 @@ class AnalyzeByJSoup(doc: Any) {
} }
return indexSet.apply{
split = ' ' split = ' '
beforeRule = rus } //非索引格式 beforeRule = rus
} return this //非索引格式
/**
* 获取Elements按照一个规则
*/
private fun getElementsSingle(temp: Element, rule: String): Elements {
var elements = Elements()
val fi = findIndexSet(rule) //执行索引列表处理器
val (filterType,ruleStr) = fi //获取操作类型及非索引部分的规则字串
// val rulePc = rulePcx[0].trim { it <= ' ' }.split(">")
// jsoup中,当前节点是参与选择的,tag.div 与 tag.div@tag.div 结果相同
// 此处">"效果和“@”完全相同,且容易让人误解成选择子节点,实际并不是。以后不允许这种无意义的写法
val rules = ruleStr.split(".")
elements.addAll(
if(ruleStr.isEmpty()) temp.children() //允许索引直接作为根元素,此时前置规则为空,效果与children相同
else when (rules[0]) {
"children" -> temp.children() //允许索引直接作为根元素,此时前置规则为空,效果与children相同
"class" -> temp.getElementsByClass(rules[1])
"tag" -> temp.getElementsByTag(rules[1])
"id" -> Collector.collect(Evaluator.Id(rules[1]), temp)
"text" -> temp.getElementsContainingOwnText(rules[1])
else -> temp.select(ruleStr)
} )
val indexSet = fi.getIndexs(elements.size) //传入元素数量,处理负数索引及索引越界问题,生成可用索引集合。
if(filterType == '!'){ //排除
for (pcInt in indexSet) elements[pcInt] = null
elements.removeAll(listOf(null)) //测试过,这样就行
}else if(filterType == '.'){ //选择
val es = Elements()
for (pcInt in indexSet) es.add(elements[pcInt])
elements = es
}
return elements
}
/**
* 获取内容列表
*/
private fun getResultList(ruleStr: String): List<String>? {
if (ruleStr.isEmpty()) return null
var elements = Elements()
elements.add(element)
val rule = RuleAnalyzer(ruleStr) //创建解析
while( rule.peek() =='@' || rule.peek() < '!' ) rule.advance() // 修剪当前规则之前的"@"或者空白符
val rules = rule.splitRule("@") // 切割成列表
val last = rules.size - 1
for (i in 0 until last) {
val es = Elements()
for (elt in elements) {
es.addAll(getElementsSingle(elt, rules[i]))
}
elements.clear()
elements = es
}
return if (elements.isEmpty()) null else getResultLast(elements, rules[last])
}
/**
* 根据最后一个规则获取内容
*/
private fun getResultLast(elements: Elements, lastRule: String): List<String> {
val textS = ArrayList<String>()
try {
when (lastRule) {
"text" -> for (element in elements) {
textS.add(element.text())
}
"textNodes" -> for (element in elements) {
val tn = arrayListOf<String>()
val contentEs = element.textNodes()
for (item in contentEs) {
val temp = item.text().trim { it <= ' ' }
if (temp.isNotEmpty()) {
tn.add(temp)
}
}
textS.add(join("\n", tn))
} }
"ownText" -> for (element in elements) {
textS.add(element.ownText())
}
"html" -> {
elements.select("script").remove()
elements.select("style").remove()
val html = elements.outerHtml()
textS.add(html)
}
"all" -> textS.add(elements.outerHtml())
else -> for (element in elements) {
val url = element.attr(lastRule)
if(url.isEmpty() || textS.contains(url)) break
textS.add(url)
}
}
} catch (e: Exception) {
e.printStackTrace()
}
return textS
}
data class IndexSet(var split:Char = '.',
var beforeRule:String = "",
val indexDefault:MutableList<Int> = mutableListOf(),
val indexs:MutableList<Any> = mutableListOf()){
fun getIndexs(len:Int): MutableSet<Int> { private fun getIndexs(len:Int): MutableSet<Int> {
val indexSet = mutableSetOf<Int>() val indexSet = mutableSetOf<Int>()

@ -6,15 +6,22 @@ class RuleAnalyzer(data: String) {
private var queue: String = data //被处理字符串 private var queue: String = data //被处理字符串
private var pos = 0 //处理到的位置 private var pos = 0 //处理到的位置
private var rule = arrayOf<String>()
private var start = 0 //每次处理字段的开始 private var start = 0 //每次处理字段的开始
private var startX = 0 //规则的开始
private var step:Int = 0 //分割字符的长度 private var step:Int = 0 //分割字符的长度
var elementsType = "" var elementsType = ""
//当前平衡字段 //当前平衡字段
fun currBalancedString( stepStart:Int = 1 , stepEnd:Int = 1): String { //stepStart平衡字符的起始分隔字串长度,stepEnd平衡字符的结束分隔字串长度 fun currBalancedString( stepStart:Int = 1 , stepEnd:Int = 1): String { //stepStart平衡字符的起始分隔字串长度,stepEnd平衡字符的结束分隔字串长度
return queue.substring(start+stepStart,pos-stepEnd) //当前平衡字段 return queue.substring(startX+stepStart,pos-stepEnd) //当前平衡字段
}
fun trim(){ // 修剪当前规则之前的"@"或者空白符
while (queue[pos] == '@' || queue[pos] < '!') pos++
} }
//将pos重置为0,方便复用 //将pos重置为0,方便复用
@ -34,21 +41,6 @@ class RuleAnalyzer(data: String) {
return queue.substring(start) return queue.substring(start)
} }
/**
* pos位置回退
*/
fun back(num :Int = 0) {
if(num == 0)pos = start //回退
else pos -= num
}
/**
* pos位置后移
*/
fun advance(num :Int = 1) {
pos+=num
}
/** /**
* 是否已无剩余字符? * 是否已无剩余字符?
* @return 若剩余字串中已无字符则返回true * @return 若剩余字串中已无字符则返回true
@ -56,14 +48,6 @@ class RuleAnalyzer(data: String) {
val isEmpty: Boolean val isEmpty: Boolean
get() = queue.length - pos == 0 //是否处理到最后 get() = queue.length - pos == 0 //是否处理到最后
/**
* 检索并返回首字符,但pos不变
* @return 首字符若为空则为 0 号字符
*/
fun peek(): Char { //检索首字符
return if (isEmpty) 0.toChar() else queue[pos]
}
/** /**
* 消耗剩余字串中一个字符 * 消耗剩余字串中一个字符
* @return 返回剩余字串中的下个字符 * @return 返回剩余字串中的下个字符
@ -109,9 +93,8 @@ class RuleAnalyzer(data: String) {
while (pos != queue.length) { while (pos != queue.length) {
for (s in seq) { for (s in seq) {
if (matches(s)) { if (queue.regionMatches(pos, s, 0, s.length)) {
step = s.length //间隔数 step = s.length //间隔数
start = this.pos //匹配成功, 设置规则下次起始位置
this.pos = pos //匹配成功, 同步处理位置到类 this.pos = pos //匹配成功, 同步处理位置到类
return true //匹配就返回 true return true //匹配就返回 true
} }
@ -163,7 +146,7 @@ class RuleAnalyzer(data: String) {
var commits = false //多行注释 var commits = false //多行注释
do { do {
if (isEmpty) break if (pos == queue.length) break
var c = queue[pos++] var c = queue[pos++]
if (c != '\\') { //非转义字符 if (c != '\\') { //非转义字符
if (c == '\'' && !commits && !commit && !regex && !inDoubleQuote && !inOtherQuote) inSingleQuote = !inSingleQuote //匹配具有语法功能的单引号 if (c == '\'' && !commits && !commit && !regex && !inDoubleQuote && !inOtherQuote) inSingleQuote = !inSingleQuote //匹配具有语法功能的单引号
@ -192,22 +175,15 @@ class RuleAnalyzer(data: String) {
']' -> bracketsDepth-- //闭合一层嵌套[] ']' -> bracketsDepth-- //闭合一层嵌套[]
} }
} }else if(c == '\n') commit = false
if (commits || commit || regex || inSingleQuote || inDoubleQuote || inOtherQuote) continue //语法单元未匹配结束,直接进入下个循环 if (commits || commit || regex || inSingleQuote || inDoubleQuote || inOtherQuote) continue //语法单元未匹配结束,直接进入下个循环
val fn = f(c) ?: continue val fn = f(c) ?: continue
if (fn) depth++ else depth-- //嵌套或者闭合 if (fn) depth++ else depth-- //嵌套或者闭合
}else { //转义字符 }else pos++
var next = queue[pos++] //拉出被转义字符
if(commit && next == 'n') commit = false //匹配单行注释终点。当前为\,下个为n,表示换行
else if (!commits && !commit && next == '\\') {
queue[pos++] //当前为\,下个为\,双重转义中"\\"表示转义字符本身,根据if条件"\\"字串不在注释中,则只能在字串或正则中
next = queue[pos++] //拉出下个字符,因为在双重转义的字串或正则中,类似于 \\/ 这样的结构才是转义结构
if(next == '\\')queue[pos++] //若为转义字符则继续拉出,因为双重转义中转义字符成对存在,即 \\\\
}
}
} while (depth > 0 || bracketsDepth >0) //拉出全部符合js语法的字段 } while (depth > 0 || bracketsDepth >0) //拉出全部符合js语法的字段
return if(depth > 0 || bracketsDepth > 0) false else { return if(depth > 0 || bracketsDepth > 0) false else {
@ -217,9 +193,9 @@ class RuleAnalyzer(data: String) {
} }
/** /**
* 在双重转义字串中拉出一个规则平衡组 * 拉出一个代码平衡组存在转义文本没有实体字符通常以{}作为模块
*/ */
fun chompRuleBalanced(open: Char = '[', close: Char = ']',f: ((Char) ->Boolean?)? = null ): Boolean { fun chompCodeBalanced(open: Char = '{', close: Char = '}'): Boolean {
var pos = pos //声明临时变量记录匹配位置,匹配成功后才同步到类的pos var pos = pos //声明临时变量记录匹配位置,匹配成功后才同步到类的pos
@ -230,7 +206,7 @@ class RuleAnalyzer(data: String) {
var inDoubleQuote = false //双引号 var inDoubleQuote = false //双引号
do { do {
if (isEmpty) break if (pos == queue.length) break
val c = queue[pos++] val c = queue[pos++]
if (c != ESC) { //非转义字符 if (c != ESC) { //非转义字符
if (c == '\'' && !inDoubleQuote) inSingleQuote = !inSingleQuote //匹配具有语法功能的单引号 if (c == '\'' && !inDoubleQuote) inSingleQuote = !inSingleQuote //匹配具有语法功能的单引号
@ -238,24 +214,50 @@ class RuleAnalyzer(data: String) {
if (inSingleQuote || inDoubleQuote) continue //语法单元未匹配结束,直接进入下个循环 if (inSingleQuote || inDoubleQuote) continue //语法单元未匹配结束,直接进入下个循环
if ( c == open )depth++ //开始嵌套一层 if ( c == '[' )depth++ //开始嵌套一层
else if ( c== close) depth-- //闭合一层嵌套 else if ( c== ']') depth-- //闭合一层嵌套
else if(depth == 0 && f != null) { //处于默认嵌套中的非默认字符不需要平衡,仅depth为0时默认嵌套全部闭合,此字符才进行嵌套 else if(depth == 0 ) {
val fn = f(c) ?: continue //处于默认嵌套中的非默认字符不需要平衡,仅depth为0时默认嵌套全部闭合,此字符才进行嵌套
if (fn) otherDepth++ else otherDepth-- if(c == open)otherDepth++
else if(c == close)otherDepth--
} }
}else { //转义字符 }else pos++
var next = queue[pos++] //拉出被转义字符,匹配\/、\"、\'等
if (next == ESC) {
queue[pos++] //当前为\,下个为\,双重转义中"\\"表示转义字符本身,根据语法特征当前字段在字串或正则中
next = queue[pos++] //拉出下个字符,因为在双重转义的字串或正则中,类似于 \\/ 这样的结构才是转义结构
if(next == ESC)queue[pos++] //若为转义字符则继续拉出,因为双重转义中转义字符成对存在,即 \\\\
}
}
} while (depth > 0 || otherDepth > 0) //拉出一个平衡字串 } while (depth > 0 || otherDepth > 0) //拉出一个平衡字串
return if(depth > 0 || otherDepth > 0) false else { return if(depth > 0 || otherDepth > 0) false else {
startX = this.pos //内嵌规则起始
this.pos = pos //同步位置
true
}
}
/**
* 拉出一个规则平衡组没有转义文本有实体字符通常以[]作为选择器
*/
fun chompRuleBalanced(open: Char = '[', close: Char = ']'): Boolean {
var pos = pos //声明临时变量记录匹配位置,匹配成功后才同步到类的pos
var depth = 0 //嵌套深度
var inSingleQuote = false //单引号
var inDoubleQuote = false //双引号
do {
if (pos == queue.length) break
val c = queue[pos++]
if (c == '\'' && !inDoubleQuote) inSingleQuote = !inSingleQuote //匹配具有语法功能的单引号
else if (c == '"' && !inSingleQuote) inDoubleQuote = !inDoubleQuote //匹配具有语法功能的双引号
if (inSingleQuote || inDoubleQuote) continue //语法单元未匹配结束,直接进入下个循环
if ( c == open )depth++ //开始嵌套一层
else if ( c== close) depth-- //闭合一层嵌套
} while (depth > 0 ) //拉出一个平衡字串
return if(depth > 0) false else {
this.pos = pos //同步位置 this.pos = pos //同步位置
true true
} }
@ -269,20 +271,32 @@ class RuleAnalyzer(data: String) {
if(split.size == 1) { if(split.size == 1) {
elementsType = split[0] //设置分割字串 elementsType = split[0] //设置分割字串
return if(!consumeTo(elementsType)) {
rule += queue.substring(startX)
rule
}else {
step = elementsType.length //设置分隔符长度 step = elementsType.length //设置分隔符长度
return splitRule(arrayOf()) //仅一个分隔字串时,直接二段解析更快 splitRule()
}else if (!consumeToAny(* split)) return arrayOf(queue) //未找到分隔符 } //递归匹配
}else if (!consumeToAny(* split)) { //未找到分隔符
rule += queue.substring(startX)
return rule
}
val st = findToAny( '[','(' ) //查找筛选器 val end = pos //记录分隔位置
pos = start //重回开始,启动另一种查找
do{
val st = findToAny('[', '(') //查找筛选器位置
if (st == -1) { if (st == -1) {
var rule = arrayOf(queue.substring(0, pos)) //压入分隔的首段规则到数组 rule = arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
elementsType = queue.substring(pos, pos + step) //设置组合类型 elementsType = queue.substring(end, end + step) //设置组合类型
pos += step //跳过分隔符 pos = end + step //跳过分隔符
while (consumeToAny(* split)) { //循环切分规则压入数组 while (consumeTo(elementsType)) { //循环切分规则压入数组
rule += queue.substring(start, pos) rule += queue.substring(start, pos)
pos += step //跳过分隔符 pos += step //跳过分隔符
} }
@ -292,73 +306,107 @@ class RuleAnalyzer(data: String) {
return rule return rule
} }
val rule = if(st >pos ){ //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组 if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组
var rule = arrayOf(queue.substring(0, pos)) //压入分隔的首段规则到数组 rule = arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
elementsType = queue.substring(pos, pos + step) //设置组合类型 elementsType = queue.substring(end, end + step) //设置组合类型
pos += step //跳过分隔符 pos = end + step //跳过分隔符
while (consumeToAny( * split ) && pos < st ) { //循环切分规则压入数组 while (consumeTo(elementsType) && pos < st) { //循环切分规则压入数组
rule += queue.substring(start, pos) rule += queue.substring(start, pos)
pos += step //跳过分隔符 pos += step //跳过分隔符
} }
return if(pos > st) {
startX = start
splitRule() //首段已匹配,但当前段匹配未完成,调用二段匹配
}
else { //执行到此,证明后面再无分隔字符
rule += queue.substring(pos) //将剩余字段压入数组末尾
rule rule
}
}else null }
pos = st //位置推移到筛选器处 pos = st //位置推移到筛选器处
val next = if (queue[pos] == '[') ']' else ')' //平衡组末尾字符 val next = if (queue[pos] == '[') ']' else ')' //平衡组末尾字符
return if (rule == null) { //rule为空,首段未匹配完成 if (!chompRuleBalanced(queue[pos], next)) throw Error(
queue.substring(
0,
start
) + "后未平衡"
) //拉出一个筛选器,不平衡则报错
if(!chompRuleBalanced(queue[pos],next)) throw Error(queue.substring(0, start)+"后未平衡") //拉出一个筛选器,不平衡则报错 }while( end > pos )
splitRule(* split) //递归调用首段匹配
} else { start = pos //设置开始查找筛选器位置的起始位置
if(!chompRuleBalanced(queue[pos],next)) throw Error(queue.substring(0, start)+"后未平衡") //拉出一个筛选器,不平衡则报错
splitRule(rule) //首段已匹配,但当前段匹配未完成,调用二段匹配
}
return splitRule(* split) //递归调用首段匹配
} }
@JvmName("splitRuleNext") @JvmName("splitRuleNext")
private tailrec fun splitRule(rules:Array<String>): Array<String>{ //二段匹配被调用,elementsType非空(已在首段赋值),直接按elementsType查找,比首段采用的方式更快 private tailrec fun splitRule(): Array<String>{ //二段匹配被调用,elementsType非空(已在首段赋值),直接按elementsType查找,比首段采用的方式更快
if (!consumeTo(elementsType,false)) return rules + queue.substring(start) //此处consumeTo(...)开始位置不是规则的开始位置,start沿用上次设置 val end = pos //记录分隔位置
pos = start //重回开始,启动另一种查找
val st = findToAny( '[','(' ) //查找筛选器 do{
val st = findToAny('[', '(') //查找筛选器位置
if (st == -1) { if (st == -1) {
var rule = rules + queue.substring(start, pos) //压入本次分隔的首段规则到数组
pos += step //跳过分隔符 rule += arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
pos = end + step //跳过分隔符
while (consumeTo(elementsType)) { //循环切分规则压入数组 while (consumeTo(elementsType)) { //循环切分规则压入数组
rule += queue.substring(start, pos) rule += queue.substring(start, pos)
pos += step //跳过分隔符 pos += step //跳过分隔符
} }
rule += queue.substring(pos) //将剩余字段压入数组末尾 rule += queue.substring(pos) //将剩余字段压入数组末尾
return rule return rule
} }
val rule = if(st > pos ){//先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组 if (st > end) { //先匹配到st1pos,表明分隔字串不在选择器中,将选择器前分隔字串分隔的字段依次压入数组
var rule = rules + queue.substring(start, pos) //压入本次分隔的首段规则到数组
pos += step //跳过分隔符 rule += arrayOf(queue.substring(startX, end)) //压入分隔的首段规则到数组
pos = end + step //跳过分隔符
while (consumeTo(elementsType) && pos < st) { //循环切分规则压入数组 while (consumeTo(elementsType) && pos < st) { //循环切分规则压入数组
rule += queue.substring(start, pos) rule += queue.substring(start, pos)
pos += step //跳过分隔符 pos += step //跳过分隔符
} }
return if(pos > st) {
startX = start
splitRule() //首段已匹配,但当前段匹配未完成,调用二段匹配
}
else { //执行到此,证明后面再无分隔字符
rule += queue.substring(pos) //将剩余字段压入数组末尾
rule rule
}else rules }
}
pos = st //位置推移到筛选器处 pos = st //位置推移到筛选器处
val next = if (queue[pos] == '[') ']' else ')' //平衡组末尾字符 val next = if (queue[pos] == '[') ']' else ')' //平衡组末尾字符
if(!chompRuleBalanced(queue[pos],next)) throw Error(queue.substring(0, start)+"后未平衡") //拉出一个筛选器,不平衡时返回true,表示未平衡 if (!chompRuleBalanced(queue[pos], next)) throw Error(
queue.substring(
0,
start
) + "后未平衡"
) //拉出一个筛选器,不平衡则报错
}while( end > pos )
return splitRule(rule) //递归匹配 start = pos //设置开始查找筛选器位置的起始位置
return if(!consumeTo(elementsType)) {
rule += queue.substring(startX)
rule
}else splitRule() //递归匹配
} }
@ -379,16 +427,9 @@ class RuleAnalyzer(data: String) {
while (!isEmpty && consumeTo(inner)) { //拉取成功返回true,ruleAnalyzes里的字符序列索引变量pos后移相应位置,否则返回false,且isEmpty为true while (!isEmpty && consumeTo(inner)) { //拉取成功返回true,ruleAnalyzes里的字符序列索引变量pos后移相应位置,否则返回false,且isEmpty为true
val posPre = pos //记录上次结束位置 val posPre = pos //记录上次结束位置
if (chompRuleBalanced {//拉出一个以[]为默认嵌套、以{}为补充嵌套的平衡字段 if (chompCodeBalanced()) {
when (it) {
'{' -> true
'}' -> false
else -> null
}
}) {
val frv= fr(currBalancedString(startStep,endStep)) val frv= fr(currBalancedString(startStep,endStep))
if(frv != null) { if(frv != null) {
st.append(queue.substring(start,posPre)+frv) //压入内嵌规则前的内容,及内嵌规则解析得到的字符串 st.append(queue.substring(start,posPre)+frv) //压入内嵌规则前的内容,及内嵌规则解析得到的字符串
continue //获取内容成功,继续选择下个内嵌规则 continue //获取内容成功,继续选择下个内嵌规则

Loading…
Cancel
Save