From 434f4238f5e31bcf633a8ac49f58048b4684b757 Mon Sep 17 00:00:00 2001 From: gedoor Date: Sat, 1 Jan 2022 10:41:56 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9C=AC=E5=9C=B0=E4=B9=A6=E7=B1=8D=E6=97=A0?= =?UTF-8?q?=E6=9D=83=E9=99=90=E5=88=99=E4=BF=9D=E5=AD=98=E5=88=B0=E8=87=AA?= =?UTF-8?q?=E5=B7=B1=E9=80=89=E5=AE=9A=E7=9A=84=E6=96=87=E4=BB=B6=E5=A4=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../io/legado/app/model/localBook/TextFile.kt | 382 +++++++++--------- 1 file changed, 190 insertions(+), 192 deletions(-) diff --git a/app/src/main/java/io/legado/app/model/localBook/TextFile.kt b/app/src/main/java/io/legado/app/model/localBook/TextFile.kt index 781ab95ea..8ebc1a79f 100644 --- a/app/src/main/java/io/legado/app/model/localBook/TextFile.kt +++ b/app/src/main/java/io/legado/app/model/localBook/TextFile.kt @@ -1,8 +1,6 @@ package io.legado.app.model.localBook import android.net.Uri -import android.system.Os -import android.system.OsConstants.SEEK_SET import io.legado.app.data.appDb import io.legado.app.data.entities.Book import io.legado.app.data.entities.BookChapter @@ -11,8 +9,10 @@ import io.legado.app.help.DefaultData import io.legado.app.model.localBook.LocalBook.cacheFolder import io.legado.app.utils.* import splitties.init.appCtx -import java.io.* -import java.nio.ByteBuffer +import java.io.File +import java.io.FileInputStream +import java.io.FileNotFoundException +import java.io.FileOutputStream import java.nio.charset.Charset import java.util.regex.Matcher import java.util.regex.Pattern @@ -24,198 +24,198 @@ class TextFile(private val book: Book) { @Throws(FileNotFoundException::class) fun getChapterList(): ArrayList { - return getBookFD(book).let { fd -> - try { - val rulePattern = if (book.tocUrl.isNotEmpty()) { + var rulePattern: Pattern? = null + if (book.charset == null || book.tocUrl.isNotEmpty()) { + getBookInputStream(book).use { bis -> + val buffer = ByteArray(BUFFER_SIZE) + var blockContent: String + bis.read(buffer) + book.charset = EncodingDetect.getEncode(buffer) + charset = book.fileCharset() + blockContent = String(buffer, charset) + rulePattern = if (book.tocUrl.isNotEmpty()) { Pattern.compile(book.tocUrl, Pattern.MULTILINE) } else { tocRules.addAll(getTocRules()) - null - } - analyze(fd, book, rulePattern) - } finally { - if (fd.valid()) { - Os.close(fd) + if (blockContent.isEmpty()) { + bis.read(buffer) + book.charset = EncodingDetect.getEncode(buffer) + blockContent = String(buffer, charset) + } + getTocRule(blockContent)?.let { + Pattern.compile(it.rule, Pattern.MULTILINE) + } } } } + return analyze(rulePattern) } - private fun analyze( - bookFd: FileDescriptor, - book: Book, - pattern: Pattern? - ): ArrayList { + private fun analyze(pattern: Pattern?): ArrayList { val toc = arrayListOf() - var tocRule: TxtTocRule? = null - val buffer = ByteArray(BUFFER_SIZE) - var blockContent = "" - if (book.charset == null) { - Os.lseek(bookFd, 0, SEEK_SET) - val length = Os.read(bookFd, buffer, 0, BUFFER_SIZE) - book.charset = EncodingDetect.getEncode(buffer) - blockContent = String(buffer, 0, length, charset) - charset = book.fileCharset() - } - val rulePattern = pattern ?: let { - if (blockContent.isEmpty()) { - Os.lseek(bookFd, 0, SEEK_SET) - val length = Os.read(bookFd, buffer, 0, BUFFER_SIZE) - blockContent = String(buffer, 0, length, charset) - } - tocRule = getTocRule(blockContent) - tocRule?.let { - Pattern.compile(it.rule, Pattern.MULTILINE) - } - } - //加载章节 - //获取到的块起始点,在文件中的位置 - Os.lseek(bookFd, 0, SEEK_SET) - var curOffset: Long = 0 - //block的个数 - var blockPos = 0 - //读取的长度 - var length: Int - var allLength = 0 - val xx = ByteBuffer.allocate(BUFFER_SIZE) - //获取文件中的数据到buffer,直到没有数据为止 - while (Os.read(bookFd, xx).also { length = it } > 0) { - blockPos++ - //如果存在Chapter - if (rulePattern != null) { - //将数据转换成String, 不能超过length - blockContent = String(xx.array(), 0, length, charset) - val lastN = blockContent.lastIndexOf("\n") - if (lastN > 0) { - blockContent = blockContent.substring(0, lastN) - length = blockContent.toByteArray(charset).size - allLength += length - Os.lseek(bookFd, allLength.toLong(), SEEK_SET) + getBookInputStream(book).use { bis -> + var tocRule: TxtTocRule? = null + val buffer = ByteArray(BUFFER_SIZE) + var blockContent: String + val rulePattern = pattern ?: let { + val length = bis.read(buffer) + bis.skip(-length.toLong()) + blockContent = String(buffer, charset) + tocRule = getTocRule(blockContent) + tocRule?.let { + Pattern.compile(it.rule, Pattern.MULTILINE) } - //当前Block下使过的String的指针 - var seekPos = 0 - //进行正则匹配 - val matcher: Matcher = rulePattern.matcher(blockContent) - //如果存在相应章节 - while (matcher.find()) { //获取匹配到的字符在字符串中的起始位置 - val chapterStart = matcher.start() - //获取章节内容 - val chapterContent = blockContent.substring(seekPos, chapterStart) - val chapterLength = chapterContent.toByteArray(charset).size - val lastStart = toc.lastOrNull()?.start ?: 0 - if (curOffset + chapterLength - lastStart > 50000 && pattern == null) { - //移除不匹配的规则 - tocRules.remove(tocRule) - return analyze(bookFd, book, null) + } + //加载章节 + var curOffset: Long = 0 + //block的个数 + var blockPos = 0 + //读取的长度 + var length: Int + //获取文件中的数据到buffer,直到没有数据为止 + while (bis.read(buffer).also { length = it } > 0) { + blockPos++ + //如果存在Chapter + if (rulePattern != null) { + //将数据转换成String, 不能超过length + blockContent = String(buffer, 0, length, charset) + val lastN = blockContent.lastIndexOf("\n") + if (lastN > 0) { + blockContent = blockContent.substring(0, lastN) + val blockContentSize = blockContent.toByteArray(charset).size + bis.skip(-(length - blockContentSize).toLong()) + length = blockContentSize } - //如果 seekPos == 0 && nextChapterPos != 0 表示当前block处前面有一段内容 - //第一种情况一定是序章 第二种情况是上一个章节的内容 - if (seekPos == 0 && chapterStart != 0) { //获取当前章节的内容 - if (toc.isEmpty()) { //如果当前没有章节,那么就是序章 - //加入简介 - if (StringUtils.trim(chapterContent).isNotEmpty()) { - val qyChapter = BookChapter() - qyChapter.title = "前言" - qyChapter.start = 0 - qyChapter.end = chapterLength.toLong() - toc.add(qyChapter) - } - //创建当前章节 - val curChapter = BookChapter() - curChapter.title = matcher.group() - curChapter.start = chapterLength.toLong() - toc.add(curChapter) - } else { //否则就block分割之后,上一个章节的剩余内容 - //获取上一章节 - val lastChapter = toc.last() - //将当前段落添加上一章去 - lastChapter.end = - lastChapter.end!! + chapterLength.toLong() - //创建当前章节 - val curChapter = BookChapter() - curChapter.title = matcher.group() - curChapter.start = lastChapter.end - toc.add(curChapter) + //当前Block下使过的String的指针 + var seekPos = 0 + //进行正则匹配 + val matcher: Matcher = rulePattern.matcher(blockContent) + //如果存在相应章节 + while (matcher.find()) { //获取匹配到的字符在字符串中的起始位置 + val chapterStart = matcher.start() + //获取章节内容 + val chapterContent = blockContent.substring(seekPos, chapterStart) + val chapterLength = chapterContent.toByteArray(charset).size + val lastStart = toc.lastOrNull()?.start ?: 0 + if (curOffset + chapterLength - lastStart > 50000 && pattern == null) { + //移除不匹配的规则 + tocRules.remove(tocRule) + bis.close() + return analyze(null) } - } else { - if (toc.isNotEmpty()) { //获取章节内容 - //获取上一章节 - val lastChapter = toc.last() - lastChapter.end = - lastChapter.start!! + chapterContent.toByteArray(charset).size.toLong() - //创建当前章节 - val curChapter = BookChapter() - curChapter.title = matcher.group() - curChapter.start = lastChapter.end - toc.add(curChapter) - } else { //如果章节不存在则创建章节 - val curChapter = BookChapter() - curChapter.title = matcher.group() - curChapter.start = 0 - curChapter.end = 0 - toc.add(curChapter) + //如果 seekPos == 0 && nextChapterPos != 0 表示当前block处前面有一段内容 + //第一种情况一定是序章 第二种情况是上一个章节的内容 + if (seekPos == 0 && chapterStart != 0) { //获取当前章节的内容 + if (toc.isEmpty()) { //如果当前没有章节,那么就是序章 + //加入简介 + if (StringUtils.trim(chapterContent).isNotEmpty()) { + val qyChapter = BookChapter() + qyChapter.title = "前言" + qyChapter.start = 0 + qyChapter.end = chapterLength.toLong() + toc.add(qyChapter) + } + //创建当前章节 + val curChapter = BookChapter() + curChapter.title = matcher.group() + curChapter.start = chapterLength.toLong() + toc.add(curChapter) + } else { //否则就block分割之后,上一个章节的剩余内容 + //获取上一章节 + val lastChapter = toc.last() + //将当前段落添加上一章去 + lastChapter.end = + lastChapter.end!! + chapterLength.toLong() + //创建当前章节 + val curChapter = BookChapter() + curChapter.title = matcher.group() + curChapter.start = lastChapter.end + toc.add(curChapter) + } + } else { + if (toc.isNotEmpty()) { //获取章节内容 + //获取上一章节 + val lastChapter = toc.last() + lastChapter.end = + lastChapter.start!! + chapterContent.toByteArray(charset).size.toLong() + //创建当前章节 + val curChapter = BookChapter() + curChapter.title = matcher.group() + curChapter.start = lastChapter.end + toc.add(curChapter) + } else { //如果章节不存在则创建章节 + val curChapter = BookChapter() + curChapter.title = matcher.group() + curChapter.start = 0 + curChapter.end = 0 + toc.add(curChapter) + } } + //设置指针偏移 + seekPos += chapterContent.length } - //设置指针偏移 - seekPos += chapterContent.length - } - if (seekPos == 0 && length > 50000 && pattern == null) { - //移除不匹配的规则 - tocRules.remove(tocRule) - return analyze(bookFd, book, null) - } - } else { //进行本地虚拟分章 - //章节在buffer的偏移量 - var chapterOffset = 0 - //当前剩余可分配的长度 - var strLength = length - //分章的位置 - var chapterPos = 0 - while (strLength > 0) { - ++chapterPos - //是否长度超过一章 - if (strLength > MAX_LENGTH_WITH_NO_CHAPTER) { //在buffer中一章的终止点 - var end = length - //寻找换行符作为终止点 - for (i in chapterOffset + MAX_LENGTH_WITH_NO_CHAPTER until length) { - if (buffer[i] == BLANK) { - end = i - break + if (seekPos == 0 && length > 50000 && pattern == null) { + //移除不匹配的规则 + tocRules.remove(tocRule) + bis.close() + return analyze(null) + } + } else { //进行本地虚拟分章 + //章节在buffer的偏移量 + var chapterOffset = 0 + //当前剩余可分配的长度 + var strLength = length + //分章的位置 + var chapterPos = 0 + while (strLength > 0) { + ++chapterPos + //是否长度超过一章 + if (strLength > MAX_LENGTH_WITH_NO_CHAPTER) { //在buffer中一章的终止点 + var end = length + //寻找换行符作为终止点 + for (i in chapterOffset + MAX_LENGTH_WITH_NO_CHAPTER until length) { + if (buffer[i] == BLANK) { + end = i + break + } } + val chapter = BookChapter() + chapter.title = "第${blockPos}章($chapterPos)" + chapter.start = curOffset + chapterOffset + 1 + chapter.end = curOffset + end + toc.add(chapter) + //减去已经被分配的长度 + strLength -= (end - chapterOffset) + //设置偏移的位置 + chapterOffset = end + } else { + val chapter = BookChapter() + chapter.title = "第" + blockPos + "章" + "(" + chapterPos + ")" + chapter.start = curOffset + chapterOffset + 1 + chapter.end = curOffset + length + toc.add(chapter) + strLength = 0 } - val chapter = BookChapter() - chapter.title = "第${blockPos}章($chapterPos)" - chapter.start = curOffset + chapterOffset + 1 - chapter.end = curOffset + end - toc.add(chapter) - //减去已经被分配的长度 - strLength -= (end - chapterOffset) - //设置偏移的位置 - chapterOffset = end - } else { - val chapter = BookChapter() - chapter.title = "第" + blockPos + "章" + "(" + chapterPos + ")" - chapter.start = curOffset + chapterOffset + 1 - chapter.end = curOffset + length - toc.add(chapter) - strLength = 0 } } - } - //block的偏移点 - curOffset += length.toLong() + //block的偏移点 + curOffset += length.toLong() - if (rulePattern != null) { - //设置上一章的结尾 - val lastChapter = toc.last() - lastChapter.end = curOffset - } + if (rulePattern != null) { + //设置上一章的结尾 + val lastChapter = toc.last() + lastChapter.end = curOffset + } - //当添加的block太多的时候,执行GC - if (blockPos % 15 == 0) { - System.gc() - System.runFinalization() + //当添加的block太多的时候,执行GC + if (blockPos % 15 == 0) { + System.gc() + System.runFinalization() + } + } + tocRule?.let { + book.tocUrl = it.rule } } for (i in toc.indices) { @@ -229,9 +229,7 @@ class TextFile(private val book: Book) { System.gc() System.runFinalization() - tocRule?.let { - book.tocUrl = it.rule - } + return toc } @@ -241,18 +239,22 @@ class TextFile(private val book: Book) { private fun getTocRule(content: String): TxtTocRule? { var txtTocRule: TxtTocRule? = null var maxCs = 0 - for (tocRule in tocRules) { + val removeRules = hashSetOf() + tocRules.forEach { tocRule -> val pattern = Pattern.compile(tocRule.rule, Pattern.MULTILINE) val matcher = pattern.matcher(content) var cs = 0 while (matcher.find()) { cs++ } - if (cs > maxCs) { + if (cs == 0) { + removeRules.add(tocRule) + } else if (cs > maxCs) { maxCs = cs txtTocRule = tocRule } } + tocRules.removeAll(removeRules) return txtTocRule } @@ -274,22 +276,18 @@ class TextFile(private val book: Book) { @Throws(FileNotFoundException::class) fun getContent(book: Book, bookChapter: BookChapter): String { val count = (bookChapter.end!! - bookChapter.start!!).toInt() - val content = ByteArray(count) - getBookFD(book).let { fd -> - try { - Os.lseek(fd, bookChapter.start!!, SEEK_SET) - Os.read(fd, content, 0, count) - } finally { - Os.close(fd) - } + val buffer = ByteArray(count) + getBookInputStream(book).use { bis -> + bis.skip(bookChapter.start!!) + bis.read(buffer) } - return String(content, book.fileCharset()) + return String(buffer, book.fileCharset()) .substringAfter(bookChapter.title) .replace("^[\\n\\s]+".toRegex(), "  ") } @Throws(FileNotFoundException::class) - private fun getBookFD(book: Book): FileDescriptor { + private fun getBookInputStream(book: Book): FileInputStream { if (book.bookUrl.isContentScheme()) { val uri = Uri.parse(book.bookUrl) val bookFile = cacheFolder.getFile(book.name) @@ -302,10 +300,10 @@ class TextFile(private val book: Book) { } } } - return FileInputStream(bookFile).fd + return FileInputStream(bookFile) //return appCtx.contentResolver.openFileDescriptor(uri, "r")!!.fileDescriptor } - return FileInputStream(File(book.bookUrl)).fd + return FileInputStream(File(book.bookUrl)) } private fun getTocRules(): List {