|
|
|
@ -4,7 +4,9 @@ import android.graphics.Bitmap |
|
|
|
|
import android.graphics.BitmapFactory |
|
|
|
|
import android.net.Uri |
|
|
|
|
import android.text.TextUtils |
|
|
|
|
import io.legado.app.data.appDb |
|
|
|
|
import io.legado.app.data.entities.BookChapter |
|
|
|
|
import io.legado.app.data.entities.EpubChapter |
|
|
|
|
import io.legado.app.utils.* |
|
|
|
|
import net.sf.jazzlib.ZipEntry |
|
|
|
|
import net.sf.jazzlib.ZipInputStream |
|
|
|
@ -23,7 +25,7 @@ import java.io.InputStream |
|
|
|
|
import java.nio.charset.Charset |
|
|
|
|
import java.util.* |
|
|
|
|
|
|
|
|
|
class EPUBFile(val book: io.legado.app.data.entities.Book) { |
|
|
|
|
class EPUBFile(var book: io.legado.app.data.entities.Book) { |
|
|
|
|
|
|
|
|
|
companion object { |
|
|
|
|
private var eFile: EPUBFile? = null |
|
|
|
@ -34,6 +36,7 @@ class EPUBFile(val book: io.legado.app.data.entities.Book) { |
|
|
|
|
eFile = EPUBFile(book) |
|
|
|
|
return eFile!! |
|
|
|
|
} |
|
|
|
|
eFile?.book = book |
|
|
|
|
return eFile!! |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -54,6 +57,11 @@ class EPUBFile(val book: io.legado.app.data.entities.Book) { |
|
|
|
|
): InputStream? { |
|
|
|
|
return getEFile(book).getImage(href) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
@Synchronized |
|
|
|
|
fun getBookInfo(book: io.legado.app.data.entities.Book) { |
|
|
|
|
return getEFile(book).getBookInfo() |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private var epubBook: Book? = null |
|
|
|
@ -77,6 +85,7 @@ class EPUBFile(val book: io.legado.app.data.entities.Book) { |
|
|
|
|
) |
|
|
|
|
} |
|
|
|
|
if (!File(book.coverUrl!!).exists()) { |
|
|
|
|
/*部分书籍DRM处理后,封面获取异常,待优化*/ |
|
|
|
|
epubBook!!.coverImage?.inputStream?.use { |
|
|
|
|
val cover = BitmapFactory.decodeStream(it) |
|
|
|
|
val out = FileOutputStream(FileUtils.createFileIfNotExist(book.coverUrl!!)) |
|
|
|
@ -102,10 +111,8 @@ class EPUBFile(val book: io.legado.app.data.entities.Book) { |
|
|
|
|
zipEntry = inZip.nextEntry |
|
|
|
|
if ((zipEntry == null) || zipEntry.isDirectory || zipEntry == ZipEntry("<error>")) continue |
|
|
|
|
val resource = ResourceUtil.createResource(zipEntry, inZip) |
|
|
|
|
if (resource.mediaType == MediatypeService.XHTML) { |
|
|
|
|
resource.inputEncoding = "UTF-8"; |
|
|
|
|
} |
|
|
|
|
if (zipEntry.name.endsWith("opf")) { |
|
|
|
|
if (resource.mediaType == MediatypeService.XHTML) resource.inputEncoding = "UTF-8"; |
|
|
|
|
if (zipEntry.name.endsWith(".opf")) { |
|
|
|
|
/*掌上书苑有很多自制书OPF的nameSpace格式不标准,强制修复成正确的格式*/ |
|
|
|
|
val newS = String(resource.data).replace( |
|
|
|
|
"\\smlns=\"http://www.idpf.org/2007/opf\"".toRegex(), |
|
|
|
@ -123,13 +130,61 @@ class EPUBFile(val book: io.legado.app.data.entities.Book) { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private fun getContent(chapter: BookChapter): String? { |
|
|
|
|
epubBook?.let { eBook -> |
|
|
|
|
val resource = eBook.resources.getByHref(chapter.url) |
|
|
|
|
val doc = Jsoup.parse(String(resource.data, mCharset)) |
|
|
|
|
val elements = doc.body().children() |
|
|
|
|
/*获取当前章节文本*/ |
|
|
|
|
var string = getChildChapter(chapter, chapter.url) |
|
|
|
|
val childContends = appDb.epubChapterDao.get(book.bookUrl, chapter.url) |
|
|
|
|
if (childContends != null) { |
|
|
|
|
/*如果书籍当前章节有多个html文件,追加文本*/ |
|
|
|
|
for (child in childContends) { |
|
|
|
|
string += "\n" + getChildChapter(chapter, child.href) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return string |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private fun getChildChapter(chapter: BookChapter, href: String): String? { |
|
|
|
|
epubBook?.let { |
|
|
|
|
val body = Jsoup.parse(String(it.resources.getByHref(href).data, mCharset)).body() |
|
|
|
|
|
|
|
|
|
if (chapter.url == href) { |
|
|
|
|
val startFragmentId = chapter.startFragmentId |
|
|
|
|
val endFragmentId = chapter.endFragmentId |
|
|
|
|
/*一些书籍依靠href索引的resource会包含多个章节,需要依靠fragmentId来截取到当前章节的内容*/ |
|
|
|
|
/*注:这里较大增加了内容加载的时间,所以首次获取内容后可存储到本地cache,减少重复加载*/ |
|
|
|
|
if (!startFragmentId.isNullOrBlank()) |
|
|
|
|
body.getElementById(startFragmentId)?.previousElementSiblings()?.remove() |
|
|
|
|
if (!endFragmentId.isNullOrBlank() && endFragmentId != startFragmentId) |
|
|
|
|
body.getElementById(endFragmentId)?.nextElementSiblings()?.remove() |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*选择去除正文中的H标签,部分书籍标题与阅读标题重复待优化*/ |
|
|
|
|
var tag = io.legado.app.data.entities.Book.hTag |
|
|
|
|
if (book.getDelTag(tag)) { |
|
|
|
|
body.getElementsByTag("h1")?.remove() |
|
|
|
|
body.getElementsByTag("h2")?.remove() |
|
|
|
|
body.getElementsByTag("h3")?.remove() |
|
|
|
|
body.getElementsByTag("h4")?.remove() |
|
|
|
|
body.getElementsByTag("h5")?.remove() |
|
|
|
|
body.getElementsByTag("h6")?.remove() |
|
|
|
|
//body.getElementsMatchingOwnText(chapter.title)?.remove() |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*选择去除正文中的img标签,目前图片支持效果待优化*/ |
|
|
|
|
tag = io.legado.app.data.entities.Book.imgTag |
|
|
|
|
if (book.getDelTag(tag)) { |
|
|
|
|
body.getElementsByTag("img")?.remove() |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
val elements = body.children() |
|
|
|
|
elements.select("script").remove() |
|
|
|
|
elements.select("style").remove() |
|
|
|
|
return elements.outerHtml().htmlFormat() |
|
|
|
|
/*选择去除正文中的ruby标签,目前注释支持效果待优化*/ |
|
|
|
|
tag = io.legado.app.data.entities.Book.rubyTag |
|
|
|
|
var html = elements.outerHtml() |
|
|
|
|
if (book.getDelTag(tag)) { |
|
|
|
|
html = html.replace("<ruby>\\s?([\\u4e00-\\u9fa5])\\s?.*?</ruby>".toRegex(), "$1") |
|
|
|
|
} |
|
|
|
|
return html.htmlFormat() |
|
|
|
|
} |
|
|
|
|
return null |
|
|
|
|
} |
|
|
|
@ -139,11 +194,13 @@ class EPUBFile(val book: io.legado.app.data.entities.Book) { |
|
|
|
|
return epubBook?.resources?.getByHref(abHref)?.inputStream |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private fun getChapterList(): ArrayList<BookChapter> { |
|
|
|
|
val chapterList = ArrayList<BookChapter>() |
|
|
|
|
epubBook?.let { eBook -> |
|
|
|
|
val metadata = eBook.metadata |
|
|
|
|
book.name = metadata.firstTitle |
|
|
|
|
private fun getBookInfo() { |
|
|
|
|
if (epubBook == null) { |
|
|
|
|
eFile = null |
|
|
|
|
book.intro = "书籍导入异常" |
|
|
|
|
} else { |
|
|
|
|
val metadata = epubBook!!.metadata |
|
|
|
|
book.name = book.originName |
|
|
|
|
if (metadata.authors.size > 0) { |
|
|
|
|
val author = |
|
|
|
|
metadata.authors[0].toString().replace("^, |, $".toRegex(), "") |
|
|
|
@ -152,7 +209,12 @@ class EPUBFile(val book: io.legado.app.data.entities.Book) { |
|
|
|
|
if (metadata.descriptions.size > 0) { |
|
|
|
|
book.intro = Jsoup.parse(metadata.descriptions[0]).text() |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private fun getChapterList(): ArrayList<BookChapter> { |
|
|
|
|
val chapterList = ArrayList<BookChapter>() |
|
|
|
|
epubBook?.let { eBook -> |
|
|
|
|
val refs = eBook.tableOfContents.tocReferences |
|
|
|
|
if (refs == null || refs.isEmpty()) { |
|
|
|
|
val spineReferences = eBook.spine.spineReferences |
|
|
|
@ -167,7 +229,7 @@ class EPUBFile(val book: io.legado.app.data.entities.Book) { |
|
|
|
|
val doc = |
|
|
|
|
Jsoup.parse(String(resource.data, mCharset)) |
|
|
|
|
val elements = doc.getElementsByTag("title") |
|
|
|
|
if (elements.size > 0) { |
|
|
|
|
if (elements != null && elements.size > 0) { |
|
|
|
|
title = elements[0].text() |
|
|
|
|
} |
|
|
|
|
} catch (e: IOException) { |
|
|
|
@ -187,10 +249,12 @@ class EPUBFile(val book: io.legado.app.data.entities.Book) { |
|
|
|
|
i++ |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
parseFirstPage(chapterList, refs) |
|
|
|
|
parseMenu(chapterList, refs, 0) |
|
|
|
|
for (i in chapterList.indices) { |
|
|
|
|
chapterList[i].index = i |
|
|
|
|
} |
|
|
|
|
getChildChapter(chapterList) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
book.latestChapterTitle = chapterList.lastOrNull()?.title |
|
|
|
@ -198,6 +262,82 @@ class EPUBFile(val book: io.legado.app.data.entities.Book) { |
|
|
|
|
return chapterList |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*获取当前章节的子章节。部分书籍一个章节包含多个html文件,(一些精排书籍,每一章节正文前的标题、标题封面、引言等都会有独立html)*/ |
|
|
|
|
/*需在读取常规章节列表后调用,遍历书籍全内容,根据href检索原不包含在章节内的html归属父章节*/ |
|
|
|
|
private fun getChildChapter(chapterList: ArrayList<BookChapter>) { |
|
|
|
|
epubBook?.let { |
|
|
|
|
val contents = it.contents |
|
|
|
|
val chapters = ArrayList<EpubChapter>() |
|
|
|
|
if (contents != null) { |
|
|
|
|
var i = 0 |
|
|
|
|
var j = 0 |
|
|
|
|
var parentHref: String? = null |
|
|
|
|
while (i < contents.size) { |
|
|
|
|
val content = contents[i] |
|
|
|
|
if (j < chapterList.size && content.href == chapterList[j].url) { |
|
|
|
|
parentHref = content.href |
|
|
|
|
j++ |
|
|
|
|
} else if (!parentHref.isNullOrBlank() && content.mediaType.toString() |
|
|
|
|
.contains("htm") |
|
|
|
|
) { |
|
|
|
|
val epubChapter = EpubChapter() |
|
|
|
|
epubChapter.bookUrl = book.bookUrl |
|
|
|
|
epubChapter.href = content.href |
|
|
|
|
epubChapter.parentHref = parentHref |
|
|
|
|
chapters.add(epubChapter) |
|
|
|
|
} |
|
|
|
|
i++ |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
appDb.epubChapterDao.deleteByName(book.bookUrl) |
|
|
|
|
if (chapters.size > 0) appDb.epubChapterDao.insert(*chapters.toTypedArray()) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*获取书籍起始页内容。部分书籍第一章之前存在封面,引言,扉页等内容*/ |
|
|
|
|
/*tile获取不同书籍风格杂乱,格式化处理待优化*/ |
|
|
|
|
private var durIndex = 0 |
|
|
|
|
private fun parseFirstPage( |
|
|
|
|
chapterList: ArrayList<BookChapter>, |
|
|
|
|
refs: List<TOCReference>? |
|
|
|
|
) { |
|
|
|
|
val contents = epubBook?.contents |
|
|
|
|
if (epubBook == null || contents == null || refs == null) return |
|
|
|
|
var i = 0 |
|
|
|
|
durIndex = 0 |
|
|
|
|
while (i < contents.size) { |
|
|
|
|
val content = contents[i] |
|
|
|
|
if (!content.mediaType.toString().contains("htm")) continue |
|
|
|
|
/*检索到第一章href停止*/ |
|
|
|
|
if (refs[0].completeHref == content.href) break |
|
|
|
|
val chapter = BookChapter() |
|
|
|
|
var title = content.title |
|
|
|
|
if (TextUtils.isEmpty(title)) { |
|
|
|
|
val elements = Jsoup.parse( |
|
|
|
|
String( |
|
|
|
|
epubBook!!.resources.getByHref(content.href).data, |
|
|
|
|
mCharset |
|
|
|
|
) |
|
|
|
|
).getElementsByTag("title") |
|
|
|
|
title = |
|
|
|
|
if (elements != null && elements.size > 0) elements[0].text() else "--卷首--" |
|
|
|
|
} |
|
|
|
|
chapter.bookUrl = book.bookUrl |
|
|
|
|
chapter.title = title |
|
|
|
|
chapter.url = content.href |
|
|
|
|
chapter.startFragmentId = |
|
|
|
|
if (content.href.substringAfter("#") == content.href) null |
|
|
|
|
else content.href.substringAfter("#") |
|
|
|
|
if (durIndex > 0) { |
|
|
|
|
val preIndex = durIndex - 1 |
|
|
|
|
chapterList[preIndex].endFragmentId = chapter.startFragmentId |
|
|
|
|
} |
|
|
|
|
chapterList.add(chapter) |
|
|
|
|
durIndex++ |
|
|
|
|
i++ |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private fun parseMenu( |
|
|
|
|
chapterList: ArrayList<BookChapter>, |
|
|
|
|
refs: List<TOCReference>?, |
|
|
|
@ -210,7 +350,13 @@ class EPUBFile(val book: io.legado.app.data.entities.Book) { |
|
|
|
|
chapter.bookUrl = book.bookUrl |
|
|
|
|
chapter.title = ref.title |
|
|
|
|
chapter.url = ref.completeHref |
|
|
|
|
chapter.startFragmentId = ref.fragmentId |
|
|
|
|
if (durIndex > 0) { |
|
|
|
|
val preIndex = durIndex - 1 |
|
|
|
|
chapterList[preIndex].endFragmentId = chapter.startFragmentId |
|
|
|
|
} |
|
|
|
|
chapterList.add(chapter) |
|
|
|
|
durIndex++ |
|
|
|
|
} |
|
|
|
|
if (ref.children != null && ref.children.isNotEmpty()) { |
|
|
|
|
parseMenu(chapterList, ref.children, level + 1) |
|
|
|
|