diff --git a/app/src/main/java/io/legado/app/model/localBook/TextFile.kt b/app/src/main/java/io/legado/app/model/localBook/TextFile.kt index 6c16312f3..ddf8462c9 100644 --- a/app/src/main/java/io/legado/app/model/localBook/TextFile.kt +++ b/app/src/main/java/io/legado/app/model/localBook/TextFile.kt @@ -51,10 +51,12 @@ class TextFile(private val book: Book) { } else { book.tocUrl.toPattern(Pattern.MULTILINE) } - return analyze(rulePattern) + return rulePattern?.let { + analyze(rulePattern) + } ?: analyze() } - private fun analyze(pattern: Pattern?): ArrayList { + private fun analyze(pattern: Pattern): ArrayList { val toc = arrayListOf() LocalBook.getBookInputStream(book).use { bis -> var blockContent: String @@ -76,142 +78,188 @@ class TextFile(private val book: Book) { .also { length = it } > 0 ) { blockPos++ - //如果存在Chapter - if (pattern != null) { - var end = bufferStart + length - for (i in bufferStart + length - 1 downTo 0) { - if (buffer[i] == blank) { - end = i - break - } - } - //将数据转换成String, 不能超过length - blockContent = String(buffer, 0, end, charset) - buffer.copyInto(buffer, 0, end, bufferStart + length) - bufferStart = bufferStart + length - end - length = end - //当前Block下使过的String的指针 - var seekPos = 0 - //进行正则匹配 - val matcher: Matcher = pattern.matcher(blockContent) - //如果存在相应章节 - while (matcher.find()) { //获取匹配到的字符在字符串中的起始位置 - val chapterStart = matcher.start() - //获取章节内容 - val chapterContent = blockContent.substring(seekPos, chapterStart) - val chapterLength = chapterContent.toByteArray(charset).size - val lastStart = toc.lastOrNull()?.start ?: 0 - if (curOffset + chapterLength - lastStart > 50000) { - bis.close() - //移除不匹配的规则 - tocRules.removeFirstOrNull() - return analyze(tocRules.firstOrNull()?.rule?.toPattern(Pattern.MULTILINE)) - } - //如果 seekPos == 0 && nextChapterPos != 0 表示当前block处前面有一段内容 - //第一种情况一定是序章 第二种情况是上一个章节的内容 - if (seekPos == 0 && chapterStart != 0) { //获取当前章节的内容 - if (toc.isEmpty()) { //如果当前没有章节,那么就是序章 - //加入简介 - if (StringUtils.trim(chapterContent).isNotEmpty()) { - val qyChapter = BookChapter() - qyChapter.title = "前言" - qyChapter.start = 0 - qyChapter.end = chapterLength.toLong() - toc.add(qyChapter) - } - //创建当前章节 - val curChapter = BookChapter() - curChapter.title = matcher.group() - curChapter.start = chapterLength.toLong() - toc.add(curChapter) - } else { //否则就block分割之后,上一个章节的剩余内容 - //获取上一章节 - val lastChapter = toc.last() - //将当前段落添加上一章去 - lastChapter.end = - lastChapter.end!! + chapterLength.toLong() - //创建当前章节 - val curChapter = BookChapter() - curChapter.title = matcher.group() - curChapter.start = lastChapter.end - toc.add(curChapter) - } - } else { - if (toc.isNotEmpty()) { //获取章节内容 - //获取上一章节 - val lastChapter = toc.last() - lastChapter.end = - lastChapter.start!! + chapterContent.toByteArray(charset).size.toLong() - //创建当前章节 - val curChapter = BookChapter() - curChapter.title = matcher.group() - curChapter.start = lastChapter.end - toc.add(curChapter) - } else { //如果章节不存在则创建章节 - val curChapter = BookChapter() - curChapter.title = matcher.group() - curChapter.start = 0 - curChapter.end = 0 - toc.add(curChapter) - } - } - //设置指针偏移 - seekPos += chapterContent.length + var end = bufferStart + length + for (i in bufferStart + length - 1 downTo 0) { + if (buffer[i] == blank) { + end = i + break } - if (seekPos == 0 && length > 50000) { + } + //将数据转换成String, 不能超过length + blockContent = String(buffer, 0, end, charset) + buffer.copyInto(buffer, 0, end, bufferStart + length) + bufferStart = bufferStart + length - end + length = end + //当前Block下使过的String的指针 + var seekPos = 0 + //进行正则匹配 + val matcher: Matcher = pattern.matcher(blockContent) + //如果存在相应章节 + while (matcher.find()) { //获取匹配到的字符在字符串中的起始位置 + val chapterStart = matcher.start() + //获取章节内容 + val chapterContent = blockContent.substring(seekPos, chapterStart) + val chapterLength = chapterContent.toByteArray(charset).size + val lastStart = toc.lastOrNull()?.start ?: 0 + if (curOffset + chapterLength - lastStart > 50000) { bis.close() //移除不匹配的规则 - tocRules.remove(tocRules.removeFirstOrNull()) - return analyze(tocRules.firstOrNull()?.rule?.toPattern(Pattern.MULTILINE)) + tocRules.removeFirstOrNull() + return tocRules.firstOrNull()?.let { + analyze(it.rule.toPattern(Pattern.MULTILINE)) + } ?: analyze() } - } else { //进行本地虚拟分章 - bufferStart = 0 - //章节在buffer的偏移量 - var chapterOffset = 0 - //当前剩余可分配的长度 - var strLength = length - //分章的位置 - var chapterPos = 0 - while (strLength > 0) { - ++chapterPos - //是否长度超过一章 - if (strLength > maxLengthWithNoToc) { //在buffer中一章的终止点 - var end = length - //寻找换行符作为终止点 - for (i in chapterOffset + maxLengthWithNoToc until length) { - if (buffer[i] == blank) { - end = i - break - } + //如果 seekPos == 0 && nextChapterPos != 0 表示当前block处前面有一段内容 + //第一种情况一定是序章 第二种情况是上一个章节的内容 + if (seekPos == 0 && chapterStart != 0) { //获取当前章节的内容 + if (toc.isEmpty()) { //如果当前没有章节,那么就是序章 + //加入简介 + if (StringUtils.trim(chapterContent).isNotEmpty()) { + val qyChapter = BookChapter() + qyChapter.title = "前言" + qyChapter.start = 0 + qyChapter.end = chapterLength.toLong() + toc.add(qyChapter) } - val chapter = BookChapter() - chapter.title = "第${blockPos}章($chapterPos)" - chapter.start = curOffset + chapterOffset + 1 - chapter.end = curOffset + end - toc.add(chapter) - //减去已经被分配的长度 - strLength -= (end - chapterOffset) - //设置偏移的位置 - chapterOffset = end - } else { - val chapter = BookChapter() - chapter.title = "第" + blockPos + "章" + "(" + chapterPos + ")" - chapter.start = curOffset + chapterOffset + 1 - chapter.end = curOffset + length - toc.add(chapter) - strLength = 0 + //创建当前章节 + val curChapter = BookChapter() + curChapter.title = matcher.group() + curChapter.start = chapterLength.toLong() + toc.add(curChapter) + } else { //否则就block分割之后,上一个章节的剩余内容 + //获取上一章节 + val lastChapter = toc.last() + //将当前段落添加上一章去 + lastChapter.end = + lastChapter.end!! + chapterLength.toLong() + //创建当前章节 + val curChapter = BookChapter() + curChapter.title = matcher.group() + curChapter.start = lastChapter.end + toc.add(curChapter) + } + } else { + if (toc.isNotEmpty()) { //获取章节内容 + //获取上一章节 + val lastChapter = toc.last() + lastChapter.end = + lastChapter.start!! + chapterContent.toByteArray(charset).size.toLong() + //创建当前章节 + val curChapter = BookChapter() + curChapter.title = matcher.group() + curChapter.start = lastChapter.end + toc.add(curChapter) + } else { //如果章节不存在则创建章节 + val curChapter = BookChapter() + curChapter.title = matcher.group() + curChapter.start = 0 + curChapter.end = 0 + toc.add(curChapter) } } + //设置指针偏移 + seekPos += chapterContent.length + } + if (seekPos == 0 && length > 50000) { + bis.close() + //移除不匹配的规则 + tocRules.remove(tocRules.removeFirstOrNull()) + return tocRules.firstOrNull()?.let { + analyze(it.rule.toPattern(Pattern.MULTILINE)) + } ?: analyze() } //block的偏移点 curOffset += length.toLong() - if (pattern != null) { - //设置上一章的结尾 - val lastChapter = toc.last() - lastChapter.end = curOffset + //设置上一章的结尾 + val lastChapter = toc.last() + lastChapter.end = curOffset + + //当添加的block太多的时候,执行GC + if (blockPos % 15 == 0) { + System.gc() + System.runFinalization() } + } + } + for (i in toc.indices) { + val bean = toc[i] + bean.index = i + bean.bookUrl = book.bookUrl + bean.url = (MD5Utils.md5Encode16(book.originName + i + bean.title)) + } + book.latestChapterTitle = toc.last().title + book.totalChapterNum = toc.size + + System.gc() + System.runFinalization() + book.tocUrl = pattern.pattern() + book.save() + return toc + } + + private fun analyze(): ArrayList { + val toc = arrayListOf() + LocalBook.getBookInputStream(book).use { bis -> + //加载章节 + var curOffset: Long = 0 + //block的个数 + var blockPos = 0 + //读取的长度 + var length: Int + val buffer = ByteArray(bufferSize) + var bufferStart = 3 + bis.read(buffer, 0, 3) + if (Utf8BomUtils.hasBom(buffer)) { + bufferStart = 0 + curOffset = 3 + } + //获取文件中的数据到buffer,直到没有数据为止 + while (bis.read(buffer, bufferStart, bufferSize - bufferStart) + .also { length = it } > 0 + ) { + bufferStart = 0 + blockPos++ + //章节在buffer的偏移量 + var chapterOffset = 0 + //当前剩余可分配的长度 + var strLength = length + //分章的位置 + var chapterPos = 0 + while (strLength > 0) { + ++chapterPos + //是否长度超过一章 + if (strLength > maxLengthWithNoToc) { //在buffer中一章的终止点 + var end = length + //寻找换行符作为终止点 + for (i in chapterOffset + maxLengthWithNoToc until length) { + if (buffer[i] == blank) { + end = i + break + } + } + val chapter = BookChapter() + chapter.title = "第${blockPos}章($chapterPos)" + chapter.start = curOffset + chapterOffset + 1 + chapter.end = curOffset + end + toc.add(chapter) + //减去已经被分配的长度 + strLength -= (end - chapterOffset) + //设置偏移的位置 + chapterOffset = end + } else { + val chapter = BookChapter() + chapter.title = "第" + blockPos + "章" + "(" + chapterPos + ")" + chapter.start = curOffset + chapterOffset + 1 + chapter.end = curOffset + length + toc.add(chapter) + strLength = 0 + } + } + + //block的偏移点 + curOffset += length.toLong() //当添加的block太多的时候,执行GC if (blockPos % 15 == 0) { @@ -231,7 +279,7 @@ class TextFile(private val book: Book) { System.gc() System.runFinalization() - book.tocUrl = pattern?.pattern() ?: "" + book.tocUrl = "" book.save() return toc }