|
|
@ -17,22 +17,8 @@ object ContentHelp { |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
fun reSegment(content: String, chapterName: String): String { |
|
|
|
fun reSegment(content: String, chapterName: String): String { |
|
|
|
var content1 = content |
|
|
|
var content1 = content |
|
|
|
val content2: String |
|
|
|
val dict = makeDict(content1) |
|
|
|
val chapterNameLength = chapterName.trim { it <= ' ' }.length |
|
|
|
var p = content1 |
|
|
|
content2 = if (chapterNameLength > 1) { |
|
|
|
|
|
|
|
val regexp = |
|
|
|
|
|
|
|
chapterName.trim { it <= ' ' }.replace("\\s+".toRegex(), "(\\\\s*)") |
|
|
|
|
|
|
|
// 质量较低的页面,章节内可能重复出现章节标题 |
|
|
|
|
|
|
|
if (chapterNameLength > 5) content1.replace(regexp.toRegex(), "") |
|
|
|
|
|
|
|
.trim { it <= ' ' } else content1.replaceFirst( |
|
|
|
|
|
|
|
"^\\s*" + regexp.toRegex(), |
|
|
|
|
|
|
|
"" |
|
|
|
|
|
|
|
).trim { it <= ' ' } |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
content1 |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
val dict = makeDict(content2) |
|
|
|
|
|
|
|
var p = content2 |
|
|
|
|
|
|
|
.replace(""".toRegex(), "“") |
|
|
|
.replace(""".toRegex(), "“") |
|
|
|
.replace("[::]['\"‘”“]+".toRegex(), ":“") |
|
|
|
.replace("[::]['\"‘”“]+".toRegex(), ":“") |
|
|
|
.replace("[\"”“]+[\\s]*[\"”“][\\s\"”“]*".toRegex(), "”\n“") |
|
|
|
.replace("[\"”“]+[\\s]*[\"”“][\\s\"”“]*".toRegex(), "”\n“") |
|
|
|