pull/1511/head
gedoor 3 years ago
parent d562bdf7a7
commit e6ccd50656
  1. 300
      app/src/main/java/io/legado/app/model/localBook/TextFile.kt

@ -51,10 +51,12 @@ class TextFile(private val book: Book) {
} else {
book.tocUrl.toPattern(Pattern.MULTILINE)
}
return analyze(rulePattern)
return rulePattern?.let {
analyze(rulePattern)
} ?: analyze()
}
private fun analyze(pattern: Pattern?): ArrayList<BookChapter> {
private fun analyze(pattern: Pattern): ArrayList<BookChapter> {
val toc = arrayListOf<BookChapter>()
LocalBook.getBookInputStream(book).use { bis ->
var blockContent: String
@ -76,142 +78,188 @@ class TextFile(private val book: Book) {
.also { length = it } > 0
) {
blockPos++
//如果存在Chapter
if (pattern != null) {
var end = bufferStart + length
for (i in bufferStart + length - 1 downTo 0) {
if (buffer[i] == blank) {
end = i
break
}
}
//将数据转换成String, 不能超过length
blockContent = String(buffer, 0, end, charset)
buffer.copyInto(buffer, 0, end, bufferStart + length)
bufferStart = bufferStart + length - end
length = end
//当前Block下使过的String的指针
var seekPos = 0
//进行正则匹配
val matcher: Matcher = pattern.matcher(blockContent)
//如果存在相应章节
while (matcher.find()) { //获取匹配到的字符在字符串中的起始位置
val chapterStart = matcher.start()
//获取章节内容
val chapterContent = blockContent.substring(seekPos, chapterStart)
val chapterLength = chapterContent.toByteArray(charset).size
val lastStart = toc.lastOrNull()?.start ?: 0
if (curOffset + chapterLength - lastStart > 50000) {
bis.close()
//移除不匹配的规则
tocRules.removeFirstOrNull()
return analyze(tocRules.firstOrNull()?.rule?.toPattern(Pattern.MULTILINE))
}
//如果 seekPos == 0 && nextChapterPos != 0 表示当前block处前面有一段内容
//第一种情况一定是序章 第二种情况是上一个章节的内容
if (seekPos == 0 && chapterStart != 0) { //获取当前章节的内容
if (toc.isEmpty()) { //如果当前没有章节,那么就是序章
//加入简介
if (StringUtils.trim(chapterContent).isNotEmpty()) {
val qyChapter = BookChapter()
qyChapter.title = "前言"
qyChapter.start = 0
qyChapter.end = chapterLength.toLong()
toc.add(qyChapter)
}
//创建当前章节
val curChapter = BookChapter()
curChapter.title = matcher.group()
curChapter.start = chapterLength.toLong()
toc.add(curChapter)
} else { //否则就block分割之后,上一个章节的剩余内容
//获取上一章节
val lastChapter = toc.last()
//将当前段落添加上一章去
lastChapter.end =
lastChapter.end!! + chapterLength.toLong()
//创建当前章节
val curChapter = BookChapter()
curChapter.title = matcher.group()
curChapter.start = lastChapter.end
toc.add(curChapter)
}
} else {
if (toc.isNotEmpty()) { //获取章节内容
//获取上一章节
val lastChapter = toc.last()
lastChapter.end =
lastChapter.start!! + chapterContent.toByteArray(charset).size.toLong()
//创建当前章节
val curChapter = BookChapter()
curChapter.title = matcher.group()
curChapter.start = lastChapter.end
toc.add(curChapter)
} else { //如果章节不存在则创建章节
val curChapter = BookChapter()
curChapter.title = matcher.group()
curChapter.start = 0
curChapter.end = 0
toc.add(curChapter)
}
}
//设置指针偏移
seekPos += chapterContent.length
var end = bufferStart + length
for (i in bufferStart + length - 1 downTo 0) {
if (buffer[i] == blank) {
end = i
break
}
if (seekPos == 0 && length > 50000) {
}
//将数据转换成String, 不能超过length
blockContent = String(buffer, 0, end, charset)
buffer.copyInto(buffer, 0, end, bufferStart + length)
bufferStart = bufferStart + length - end
length = end
//当前Block下使过的String的指针
var seekPos = 0
//进行正则匹配
val matcher: Matcher = pattern.matcher(blockContent)
//如果存在相应章节
while (matcher.find()) { //获取匹配到的字符在字符串中的起始位置
val chapterStart = matcher.start()
//获取章节内容
val chapterContent = blockContent.substring(seekPos, chapterStart)
val chapterLength = chapterContent.toByteArray(charset).size
val lastStart = toc.lastOrNull()?.start ?: 0
if (curOffset + chapterLength - lastStart > 50000) {
bis.close()
//移除不匹配的规则
tocRules.remove(tocRules.removeFirstOrNull())
return analyze(tocRules.firstOrNull()?.rule?.toPattern(Pattern.MULTILINE))
tocRules.removeFirstOrNull()
return tocRules.firstOrNull()?.let {
analyze(it.rule.toPattern(Pattern.MULTILINE))
} ?: analyze()
}
} else { //进行本地虚拟分章
bufferStart = 0
//章节在buffer的偏移量
var chapterOffset = 0
//当前剩余可分配的长度
var strLength = length
//分章的位置
var chapterPos = 0
while (strLength > 0) {
++chapterPos
//是否长度超过一章
if (strLength > maxLengthWithNoToc) { //在buffer中一章的终止点
var end = length
//寻找换行符作为终止点
for (i in chapterOffset + maxLengthWithNoToc until length) {
if (buffer[i] == blank) {
end = i
break
}
//如果 seekPos == 0 && nextChapterPos != 0 表示当前block处前面有一段内容
//第一种情况一定是序章 第二种情况是上一个章节的内容
if (seekPos == 0 && chapterStart != 0) { //获取当前章节的内容
if (toc.isEmpty()) { //如果当前没有章节,那么就是序章
//加入简介
if (StringUtils.trim(chapterContent).isNotEmpty()) {
val qyChapter = BookChapter()
qyChapter.title = "前言"
qyChapter.start = 0
qyChapter.end = chapterLength.toLong()
toc.add(qyChapter)
}
val chapter = BookChapter()
chapter.title = "${blockPos}章($chapterPos)"
chapter.start = curOffset + chapterOffset + 1
chapter.end = curOffset + end
toc.add(chapter)
//减去已经被分配的长度
strLength -= (end - chapterOffset)
//设置偏移的位置
chapterOffset = end
} else {
val chapter = BookChapter()
chapter.title = "" + blockPos + "" + "(" + chapterPos + ")"
chapter.start = curOffset + chapterOffset + 1
chapter.end = curOffset + length
toc.add(chapter)
strLength = 0
//创建当前章节
val curChapter = BookChapter()
curChapter.title = matcher.group()
curChapter.start = chapterLength.toLong()
toc.add(curChapter)
} else { //否则就block分割之后,上一个章节的剩余内容
//获取上一章节
val lastChapter = toc.last()
//将当前段落添加上一章去
lastChapter.end =
lastChapter.end!! + chapterLength.toLong()
//创建当前章节
val curChapter = BookChapter()
curChapter.title = matcher.group()
curChapter.start = lastChapter.end
toc.add(curChapter)
}
} else {
if (toc.isNotEmpty()) { //获取章节内容
//获取上一章节
val lastChapter = toc.last()
lastChapter.end =
lastChapter.start!! + chapterContent.toByteArray(charset).size.toLong()
//创建当前章节
val curChapter = BookChapter()
curChapter.title = matcher.group()
curChapter.start = lastChapter.end
toc.add(curChapter)
} else { //如果章节不存在则创建章节
val curChapter = BookChapter()
curChapter.title = matcher.group()
curChapter.start = 0
curChapter.end = 0
toc.add(curChapter)
}
}
//设置指针偏移
seekPos += chapterContent.length
}
if (seekPos == 0 && length > 50000) {
bis.close()
//移除不匹配的规则
tocRules.remove(tocRules.removeFirstOrNull())
return tocRules.firstOrNull()?.let {
analyze(it.rule.toPattern(Pattern.MULTILINE))
} ?: analyze()
}
//block的偏移点
curOffset += length.toLong()
if (pattern != null) {
//设置上一章的结尾
val lastChapter = toc.last()
lastChapter.end = curOffset
//设置上一章的结尾
val lastChapter = toc.last()
lastChapter.end = curOffset
//当添加的block太多的时候,执行GC
if (blockPos % 15 == 0) {
System.gc()
System.runFinalization()
}
}
}
for (i in toc.indices) {
val bean = toc[i]
bean.index = i
bean.bookUrl = book.bookUrl
bean.url = (MD5Utils.md5Encode16(book.originName + i + bean.title))
}
book.latestChapterTitle = toc.last().title
book.totalChapterNum = toc.size
System.gc()
System.runFinalization()
book.tocUrl = pattern.pattern()
book.save()
return toc
}
private fun analyze(): ArrayList<BookChapter> {
val toc = arrayListOf<BookChapter>()
LocalBook.getBookInputStream(book).use { bis ->
//加载章节
var curOffset: Long = 0
//block的个数
var blockPos = 0
//读取的长度
var length: Int
val buffer = ByteArray(bufferSize)
var bufferStart = 3
bis.read(buffer, 0, 3)
if (Utf8BomUtils.hasBom(buffer)) {
bufferStart = 0
curOffset = 3
}
//获取文件中的数据到buffer,直到没有数据为止
while (bis.read(buffer, bufferStart, bufferSize - bufferStart)
.also { length = it } > 0
) {
bufferStart = 0
blockPos++
//章节在buffer的偏移量
var chapterOffset = 0
//当前剩余可分配的长度
var strLength = length
//分章的位置
var chapterPos = 0
while (strLength > 0) {
++chapterPos
//是否长度超过一章
if (strLength > maxLengthWithNoToc) { //在buffer中一章的终止点
var end = length
//寻找换行符作为终止点
for (i in chapterOffset + maxLengthWithNoToc until length) {
if (buffer[i] == blank) {
end = i
break
}
}
val chapter = BookChapter()
chapter.title = "${blockPos}章($chapterPos)"
chapter.start = curOffset + chapterOffset + 1
chapter.end = curOffset + end
toc.add(chapter)
//减去已经被分配的长度
strLength -= (end - chapterOffset)
//设置偏移的位置
chapterOffset = end
} else {
val chapter = BookChapter()
chapter.title = "" + blockPos + "" + "(" + chapterPos + ")"
chapter.start = curOffset + chapterOffset + 1
chapter.end = curOffset + length
toc.add(chapter)
strLength = 0
}
}
//block的偏移点
curOffset += length.toLong()
//当添加的block太多的时候,执行GC
if (blockPos % 15 == 0) {
@ -231,7 +279,7 @@ class TextFile(private val book: Book) {
System.gc()
System.runFinalization()
book.tocUrl = pattern?.pattern() ?: ""
book.tocUrl = ""
book.save()
return toc
}

Loading…
Cancel
Save