pull/84/head
kunfei 5 years ago
parent 90d10a907d
commit 8c22b09cc2
  1. 177
      app/src/main/java/io/legado/app/model/localBook/AnalyzeTxtFile.kt

@ -9,9 +9,17 @@ import io.legado.app.data.entities.TxtTocRule
import io.legado.app.utils.*
import java.io.File
import java.io.RandomAccessFile
import java.nio.charset.Charset
import java.util.regex.Matcher
import java.util.regex.Pattern
object AnalyzeTxtFile {
private const val folderName = "bookTxt"
private const val BLANK: Byte = 0x0a
//默认从文件中获取数据的长度
private const val BUFFER_SIZE = 512 * 1024
//没有标题的时候,每个章节的最大长度
private const val MAX_LENGTH_WITH_NO_CHAPTER = 10 * 1024
private val cacheFolder: File by lazy {
val rootFile = App.INSTANCE.getExternalFilesDir(null)
?: App.INSTANCE.externalCacheDir
@ -19,7 +27,7 @@ object AnalyzeTxtFile {
FileUtils.createFileIfNotExist(rootFile, subDirs = *arrayOf(folderName))
}
fun analyze(context: Context, book: Book) {
fun analyze(context: Context, book: Book): ArrayList<BookChapter> {
val uri = Uri.parse(book.bookUrl)
val bookFile = FileUtils.getFile(cacheFolder, book.originName, subDirs = *arrayOf())
if (!bookFile.exists()) {
@ -28,26 +36,177 @@ object AnalyzeTxtFile {
bookFile.writeBytes(it)
}
}
book.charset = EncodingDetect.getEncode(bookFile)
val charset = charset(EncodingDetect.getEncode(bookFile))
book.charset = charset.name()
val toc = arrayListOf<BookChapter>()
//获取文件流
val bookStream = RandomAccessFile(bookFile, "r")
val tocRule = getTocRule(bookStream)
val rulePattern = getTocRule(bookStream, charset)
//加载章节
val buffer = ByteArray(BUFFER_SIZE)
//获取到的块起始点,在文件中的位置
var curOffset: Long = 0
//block的个数
var blockPos = 0
//读取的长度
var length: Int
var allLength = 0
//获取文件中的数据到buffer,直到没有数据为止
while (bookStream.read(buffer, 0, buffer.size).also { length = it } > 0) {
++blockPos
//如果存在Chapter
if (rulePattern != null) { //将数据转换成String
var blockContent = String(buffer, 0, length, charset)
val lastN = blockContent.lastIndexOf("\n")
if (lastN != 0) {
blockContent = blockContent.substring(0, lastN)
length = blockContent.toByteArray(charset).size
allLength += length
bookStream.seek(allLength.toLong())
}
//当前Block下使过的String的指针
var seekPos = 0
//进行正则匹配
val matcher: Matcher = rulePattern.matcher(blockContent)
//如果存在相应章节
while (matcher.find()) { //获取匹配到的字符在字符串中的起始位置
val chapterStart = matcher.start()
//如果 seekPos == 0 && nextChapterPos != 0 表示当前block处前面有一段内容
//第一种情况一定是序章 第二种情况可能是上一个章节的内容
if (seekPos == 0 && chapterStart != 0) { //获取当前章节的内容
val chapterContent = blockContent.substring(seekPos, chapterStart)
//设置指针偏移
seekPos += chapterContent.length
if (toc.size == 0) { //如果当前没有章节,那么就是序章
//加入简介
book.intro = chapterContent
//创建当前章节
val curChapter = BookChapter()
curChapter.title = matcher.group()
curChapter.start = chapterContent.toByteArray(charset).size.toLong()
toc.add(curChapter)
} else { //否则就block分割之后,上一个章节的剩余内容
//获取上一章节
val lastChapter = toc.last()
//将当前段落添加上一章去
lastChapter.end =
lastChapter.end!! + chapterContent.toByteArray(charset).size
//创建当前章节
val curChapter = BookChapter()
curChapter.title = matcher.group()
curChapter.start = lastChapter.end
toc.add(curChapter)
}
} else { //是否存在章节
if (toc.size != 0) { //获取章节内容
val chapterContent = blockContent.substring(seekPos, matcher.start())
seekPos += chapterContent.length
//获取上一章节
val lastChapter = toc.last()
lastChapter.end =
lastChapter.start!! + chapterContent.toByteArray(charset).size
//创建当前章节
val curChapter = BookChapter()
curChapter.title = matcher.group()
curChapter.start = lastChapter.end
toc.add(curChapter)
} else { //如果章节不存在则创建章节
val curChapter = BookChapter()
curChapter.title = matcher.group()
curChapter.start = 0L
curChapter.end = 0L
toc.add(curChapter)
}
}
}
} else { //进行本地虚拟分章
//章节在buffer的偏移量
var chapterOffset = 0
//当前剩余可分配的长度
var strLength = length
//分章的位置
var chapterPos = 0
while (strLength > 0) {
++chapterPos
//是否长度超过一章
if (strLength > MAX_LENGTH_WITH_NO_CHAPTER) { //在buffer中一章的终止点
var end = length
//寻找换行符作为终止点
for (i in chapterOffset + MAX_LENGTH_WITH_NO_CHAPTER until length) {
if (buffer[i] == BLANK) {
end = i
break
}
}
val chapter = BookChapter()
chapter.title = "${blockPos}章($chapterPos)"
chapter.start = curOffset + chapterOffset + 1
chapter.end = curOffset + end
toc.add(chapter)
//减去已经被分配的长度
strLength -= (end - chapterOffset)
//设置偏移的位置
chapterOffset = end
} else {
val chapter = BookChapter()
chapter.title = "" + blockPos + "" + "(" + chapterPos + ")"
chapter.start = curOffset + chapterOffset + 1
chapter.end = curOffset + length
toc.add(chapter)
strLength = 0
}
}
}
//block的偏移点
curOffset += length.toLong()
if (rulePattern != null) { //设置上一章的结尾
val lastChapter = toc.last()
lastChapter.end = curOffset
}
//当添加的block太多的时候,执行GC
//当添加的block太多的时候,执行GC
if (blockPos % 15 == 0) {
System.gc()
System.runFinalization()
}
}
for (i in toc.indices) {
val bean = toc[i]
bean.index = i
bean.bookUrl = book.bookUrl
bean.url = (MD5Utils.md5Encode16(book.originName + i + bean.title) ?: "")
}
bookStream.close()
System.gc()
System.runFinalization()
return toc
}
private fun getTocRule(bookStream: RandomAccessFile): String? {
private fun getTocRule(bookStream: RandomAccessFile, charset: Charset): Pattern? {
val tocRules = getTocRules()
var tocRule: String? = null
var rulePattern: Pattern? = null
//首先获取128k的数据
val buffer = ByteArray(10240)
val buffer = ByteArray(BUFFER_SIZE / 4)
val length = bookStream.read(buffer, 0, buffer.size)
for (str in tocRules) {
val content = String(buffer, 0, length, charset)
for (tocRule in tocRules) {
val pattern = Pattern.compile(tocRule.rule, Pattern.MULTILINE)
val matcher = pattern.matcher(content)
if (matcher.find()) {
rulePattern = pattern
break
}
}
return tocRule
bookStream.seek(0)
return rulePattern
}
private fun getTocRules(): List<TxtTocRule> {

Loading…
Cancel
Save