|
|
|
@ -13,34 +13,43 @@ import java.nio.charset.Charset |
|
|
|
|
import java.util.regex.Matcher |
|
|
|
|
import java.util.regex.Pattern |
|
|
|
|
|
|
|
|
|
object AnalyzeTxtFile { |
|
|
|
|
private const val folderName = "bookTxt" |
|
|
|
|
private const val BLANK: Byte = 0x0a |
|
|
|
|
|
|
|
|
|
//默认从文件中获取数据的长度 |
|
|
|
|
private const val BUFFER_SIZE = 512 * 1024 |
|
|
|
|
class AnalyzeTxtFile { |
|
|
|
|
|
|
|
|
|
//没有标题的时候,每个章节的最大长度 |
|
|
|
|
private const val MAX_LENGTH_WITH_NO_CHAPTER = 10 * 1024 |
|
|
|
|
val cacheFolder: File by lazy { |
|
|
|
|
val rootFile = App.INSTANCE.getExternalFilesDir(null) |
|
|
|
|
?: App.INSTANCE.externalCacheDir |
|
|
|
|
?: App.INSTANCE.cacheDir |
|
|
|
|
FileUtils.createFolderIfNotExist(rootFile, subDirs = *arrayOf(folderName)) |
|
|
|
|
} |
|
|
|
|
private val tocRules = arrayListOf<TxtTocRule>() |
|
|
|
|
private lateinit var charset: Charset |
|
|
|
|
|
|
|
|
|
fun analyze(context: Context, book: Book): ArrayList<BookChapter> { |
|
|
|
|
val bookFile = getBookFile(context, book) |
|
|
|
|
book.charset = EncodingDetect.getEncode(bookFile) |
|
|
|
|
val charset = book.fileCharset() |
|
|
|
|
val toc = arrayListOf<BookChapter>() |
|
|
|
|
charset = book.fileCharset() |
|
|
|
|
val rulePattern = if (book.tocUrl.isNotEmpty()) { |
|
|
|
|
Pattern.compile(book.tocUrl, Pattern.MULTILINE) |
|
|
|
|
} else { |
|
|
|
|
tocRules.addAll(getTocRules()) |
|
|
|
|
null |
|
|
|
|
} |
|
|
|
|
//获取文件流 |
|
|
|
|
val bookStream = RandomAccessFile(bookFile, "r") |
|
|
|
|
val rulePattern = getTocRule(book, bookStream, charset) |
|
|
|
|
return analyze(bookStream, book, rulePattern) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private fun analyze( |
|
|
|
|
bookStream: RandomAccessFile, |
|
|
|
|
book: Book, |
|
|
|
|
pattern: Pattern? |
|
|
|
|
): ArrayList<BookChapter> { |
|
|
|
|
val toc = arrayListOf<BookChapter>() |
|
|
|
|
var tocRule: TxtTocRule? = null |
|
|
|
|
val rulePattern = pattern ?: let { |
|
|
|
|
tocRule = getTocRule(bookStream) |
|
|
|
|
tocRule?.let { |
|
|
|
|
Pattern.compile(it.rule, Pattern.MULTILINE) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
//加载章节 |
|
|
|
|
val buffer = ByteArray(BUFFER_SIZE) |
|
|
|
|
//获取到的块起始点,在文件中的位置 |
|
|
|
|
bookStream.seek(0) |
|
|
|
|
var curOffset: Long = 0 |
|
|
|
|
//block的个数 |
|
|
|
|
var blockPos = 0 |
|
|
|
@ -70,6 +79,10 @@ object AnalyzeTxtFile { |
|
|
|
|
val chapterStart = matcher.start() |
|
|
|
|
//获取章节内容 |
|
|
|
|
val chapterContent = blockContent.substring(seekPos, chapterStart) |
|
|
|
|
if (chapterContent.length > 30000 && pattern == null) { |
|
|
|
|
tocRules.remove(tocRule) |
|
|
|
|
return analyze(bookStream, book, null) |
|
|
|
|
} |
|
|
|
|
//如果 seekPos == 0 && nextChapterPos != 0 表示当前block处前面有一段内容 |
|
|
|
|
//第一种情况一定是序章 第二种情况可能是上一个章节的内容 |
|
|
|
|
if (seekPos == 0 && chapterStart != 0) { //获取当前章节的内容 |
|
|
|
@ -161,9 +174,46 @@ object AnalyzeTxtFile { |
|
|
|
|
|
|
|
|
|
System.gc() |
|
|
|
|
System.runFinalization() |
|
|
|
|
tocRule?.let { |
|
|
|
|
book.tocUrl = it.rule |
|
|
|
|
} |
|
|
|
|
return toc |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private fun getTocRule(bookStream: RandomAccessFile): TxtTocRule? { |
|
|
|
|
var txtTocRule: TxtTocRule? = null |
|
|
|
|
//首先获取128k的数据 |
|
|
|
|
val buffer = ByteArray(BUFFER_SIZE / 4) |
|
|
|
|
val length = bookStream.read(buffer, 0, buffer.size) |
|
|
|
|
val content = String(buffer, 0, length, charset) |
|
|
|
|
for (tocRule in tocRules) { |
|
|
|
|
val pattern = Pattern.compile(tocRule.rule, Pattern.MULTILINE) |
|
|
|
|
val matcher = pattern.matcher(content) |
|
|
|
|
if (matcher.find()) { |
|
|
|
|
txtTocRule = tocRule |
|
|
|
|
break |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return txtTocRule |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
companion object { |
|
|
|
|
private const val folderName = "bookTxt" |
|
|
|
|
private const val BLANK: Byte = 0x0a |
|
|
|
|
|
|
|
|
|
//默认从文件中获取数据的长度 |
|
|
|
|
private const val BUFFER_SIZE = 512 * 1024 |
|
|
|
|
|
|
|
|
|
//没有标题的时候,每个章节的最大长度 |
|
|
|
|
private const val MAX_LENGTH_WITH_NO_CHAPTER = 10 * 1024 |
|
|
|
|
val cacheFolder: File by lazy { |
|
|
|
|
val rootFile = App.INSTANCE.getExternalFilesDir(null) |
|
|
|
|
?: App.INSTANCE.externalCacheDir |
|
|
|
|
?: App.INSTANCE.cacheDir |
|
|
|
|
FileUtils.createFolderIfNotExist(rootFile, subDirs = *arrayOf(folderName)) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
fun getContent(book: Book, bookChapter: BookChapter): String { |
|
|
|
|
val bookFile = getBookFile(App.INSTANCE, book) |
|
|
|
|
//获取文件流 |
|
|
|
@ -190,29 +240,6 @@ object AnalyzeTxtFile { |
|
|
|
|
return File(book.bookUrl) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private fun getTocRule(book: Book, bookStream: RandomAccessFile, charset: Charset): Pattern? { |
|
|
|
|
if (book.tocUrl.isNotEmpty()) { |
|
|
|
|
return Pattern.compile(book.tocUrl, Pattern.MULTILINE) |
|
|
|
|
} |
|
|
|
|
val tocRules = getTocRules() |
|
|
|
|
var rulePattern: Pattern? = null |
|
|
|
|
//首先获取128k的数据 |
|
|
|
|
val buffer = ByteArray(BUFFER_SIZE / 4) |
|
|
|
|
val length = bookStream.read(buffer, 0, buffer.size) |
|
|
|
|
val content = String(buffer, 0, length, charset) |
|
|
|
|
for (tocRule in tocRules) { |
|
|
|
|
val pattern = Pattern.compile(tocRule.rule, Pattern.MULTILINE) |
|
|
|
|
val matcher = pattern.matcher(content) |
|
|
|
|
if (matcher.find()) { |
|
|
|
|
book.tocUrl = tocRule.rule |
|
|
|
|
rulePattern = pattern |
|
|
|
|
break |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
bookStream.seek(0) |
|
|
|
|
return rulePattern |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private fun getTocRules(): List<TxtTocRule> { |
|
|
|
|
val rules = App.db.txtTocRule().all |
|
|
|
|
if (rules.isEmpty()) { |
|
|
|
@ -231,3 +258,5 @@ object AnalyzeTxtFile { |
|
|
|
|
return emptyList() |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
} |