From 8c22b09cc24b09bac586f6296e15ed47e79c414f Mon Sep 17 00:00:00 2001 From: kunfei Date: Sun, 2 Feb 2020 14:37:00 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../app/model/localBook/AnalyzeTxtFile.kt | 177 +++++++++++++++++- 1 file changed, 168 insertions(+), 9 deletions(-) diff --git a/app/src/main/java/io/legado/app/model/localBook/AnalyzeTxtFile.kt b/app/src/main/java/io/legado/app/model/localBook/AnalyzeTxtFile.kt index 61693d8f6..85c83c514 100644 --- a/app/src/main/java/io/legado/app/model/localBook/AnalyzeTxtFile.kt +++ b/app/src/main/java/io/legado/app/model/localBook/AnalyzeTxtFile.kt @@ -9,9 +9,17 @@ import io.legado.app.data.entities.TxtTocRule import io.legado.app.utils.* import java.io.File import java.io.RandomAccessFile +import java.nio.charset.Charset +import java.util.regex.Matcher +import java.util.regex.Pattern object AnalyzeTxtFile { private const val folderName = "bookTxt" + private const val BLANK: Byte = 0x0a + //默认从文件中获取数据的长度 + private const val BUFFER_SIZE = 512 * 1024 + //没有标题的时候,每个章节的最大长度 + private const val MAX_LENGTH_WITH_NO_CHAPTER = 10 * 1024 private val cacheFolder: File by lazy { val rootFile = App.INSTANCE.getExternalFilesDir(null) ?: App.INSTANCE.externalCacheDir @@ -19,7 +27,7 @@ object AnalyzeTxtFile { FileUtils.createFileIfNotExist(rootFile, subDirs = *arrayOf(folderName)) } - fun analyze(context: Context, book: Book) { + fun analyze(context: Context, book: Book): ArrayList { val uri = Uri.parse(book.bookUrl) val bookFile = FileUtils.getFile(cacheFolder, book.originName, subDirs = *arrayOf()) if (!bookFile.exists()) { @@ -28,26 +36,177 @@ object AnalyzeTxtFile { bookFile.writeBytes(it) } } - book.charset = EncodingDetect.getEncode(bookFile) + val charset = charset(EncodingDetect.getEncode(bookFile)) + book.charset = charset.name() val toc = arrayListOf() //获取文件流 val bookStream = RandomAccessFile(bookFile, "r") - val tocRule = getTocRule(bookStream) + val rulePattern = getTocRule(bookStream, charset) + //加载章节 + val buffer = ByteArray(BUFFER_SIZE) + //获取到的块起始点,在文件中的位置 + var curOffset: Long = 0 + //block的个数 + var blockPos = 0 + //读取的长度 + var length: Int + var allLength = 0 + //获取文件中的数据到buffer,直到没有数据为止 + while (bookStream.read(buffer, 0, buffer.size).also { length = it } > 0) { + ++blockPos + //如果存在Chapter + if (rulePattern != null) { //将数据转换成String + var blockContent = String(buffer, 0, length, charset) + val lastN = blockContent.lastIndexOf("\n") + if (lastN != 0) { + blockContent = blockContent.substring(0, lastN) + length = blockContent.toByteArray(charset).size + allLength += length + bookStream.seek(allLength.toLong()) + } + //当前Block下使过的String的指针 + var seekPos = 0 + //进行正则匹配 + val matcher: Matcher = rulePattern.matcher(blockContent) + //如果存在相应章节 + while (matcher.find()) { //获取匹配到的字符在字符串中的起始位置 + val chapterStart = matcher.start() + //如果 seekPos == 0 && nextChapterPos != 0 表示当前block处前面有一段内容 + //第一种情况一定是序章 第二种情况可能是上一个章节的内容 + if (seekPos == 0 && chapterStart != 0) { //获取当前章节的内容 + val chapterContent = blockContent.substring(seekPos, chapterStart) + //设置指针偏移 + seekPos += chapterContent.length + if (toc.size == 0) { //如果当前没有章节,那么就是序章 + //加入简介 + book.intro = chapterContent + //创建当前章节 + val curChapter = BookChapter() + curChapter.title = matcher.group() + curChapter.start = chapterContent.toByteArray(charset).size.toLong() + toc.add(curChapter) + } else { //否则就block分割之后,上一个章节的剩余内容 + //获取上一章节 + val lastChapter = toc.last() + //将当前段落添加上一章去 + lastChapter.end = + lastChapter.end!! + chapterContent.toByteArray(charset).size + //创建当前章节 + val curChapter = BookChapter() + curChapter.title = matcher.group() + curChapter.start = lastChapter.end + toc.add(curChapter) + } + } else { //是否存在章节 + if (toc.size != 0) { //获取章节内容 + val chapterContent = blockContent.substring(seekPos, matcher.start()) + seekPos += chapterContent.length + //获取上一章节 + val lastChapter = toc.last() + lastChapter.end = + lastChapter.start!! + chapterContent.toByteArray(charset).size + //创建当前章节 + val curChapter = BookChapter() + curChapter.title = matcher.group() + curChapter.start = lastChapter.end + toc.add(curChapter) + } else { //如果章节不存在则创建章节 + val curChapter = BookChapter() + curChapter.title = matcher.group() + curChapter.start = 0L + curChapter.end = 0L + toc.add(curChapter) + } + } + } + } else { //进行本地虚拟分章 + //章节在buffer的偏移量 + var chapterOffset = 0 + //当前剩余可分配的长度 + var strLength = length + //分章的位置 + var chapterPos = 0 + while (strLength > 0) { + ++chapterPos + //是否长度超过一章 + if (strLength > MAX_LENGTH_WITH_NO_CHAPTER) { //在buffer中一章的终止点 + var end = length + //寻找换行符作为终止点 + for (i in chapterOffset + MAX_LENGTH_WITH_NO_CHAPTER until length) { + if (buffer[i] == BLANK) { + end = i + break + } + } + val chapter = BookChapter() + chapter.title = "第${blockPos}章($chapterPos)" + chapter.start = curOffset + chapterOffset + 1 + chapter.end = curOffset + end + toc.add(chapter) + //减去已经被分配的长度 + strLength -= (end - chapterOffset) + //设置偏移的位置 + chapterOffset = end + } else { + val chapter = BookChapter() + chapter.title = "第" + blockPos + "章" + "(" + chapterPos + ")" + chapter.start = curOffset + chapterOffset + 1 + chapter.end = curOffset + length + toc.add(chapter) + strLength = 0 + } + } + } + + //block的偏移点 + curOffset += length.toLong() + + if (rulePattern != null) { //设置上一章的结尾 + val lastChapter = toc.last() + lastChapter.end = curOffset + } + + //当添加的block太多的时候,执行GC + //当添加的block太多的时候,执行GC + if (blockPos % 15 == 0) { + System.gc() + System.runFinalization() + } + } + + for (i in toc.indices) { + val bean = toc[i] + bean.index = i + bean.bookUrl = book.bookUrl + bean.url = (MD5Utils.md5Encode16(book.originName + i + bean.title) ?: "") + } + bookStream.close() + + System.gc() + System.runFinalization() + return toc } - private fun getTocRule(bookStream: RandomAccessFile): String? { + private fun getTocRule(bookStream: RandomAccessFile, charset: Charset): Pattern? { val tocRules = getTocRules() - var tocRule: String? = null + var rulePattern: Pattern? = null //首先获取128k的数据 - val buffer = ByteArray(10240) + val buffer = ByteArray(BUFFER_SIZE / 4) val length = bookStream.read(buffer, 0, buffer.size) - for (str in tocRules) { - + val content = String(buffer, 0, length, charset) + for (tocRule in tocRules) { + val pattern = Pattern.compile(tocRule.rule, Pattern.MULTILINE) + val matcher = pattern.matcher(content) + if (matcher.find()) { + rulePattern = pattern + break + } } - return tocRule + bookStream.seek(0) + return rulePattern } private fun getTocRules(): List {