Merge pull request #957 from ag2s20150909/master

图片解码优化,epub读取导出优化。
pull/967/head
kunfei 4 years ago committed by GitHub
commit 428660171e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 3
      app/build.gradle
  2. 25
      app/src/main/java/io/legado/app/help/BookHelp.kt
  3. 46
      app/src/main/java/io/legado/app/model/localBook/EpubFile.kt
  4. 7
      app/src/main/java/io/legado/app/model/localBook/LocalBook.kt
  5. 12
      app/src/main/java/io/legado/app/ui/book/cache/CacheViewModel.kt
  6. 4
      app/src/main/java/io/legado/app/ui/book/read/page/provider/ImageProvider.kt
  7. 20
      app/src/main/java/io/legado/app/utils/BitmapUtils.kt
  8. 38
      app/src/main/java/io/legado/app/utils/FileUtils.kt
  9. 38
      epublib/src/main/java/me/ag2s/epublib/domain/FileResourceProvider.java
  10. 254
      epublib/src/main/java/me/ag2s/epublib/epub/DOMUtil.java
  11. 6
      epublib/src/main/java/me/ag2s/epublib/epub/NCXDocumentV2.java
  12. 13
      epublib/src/main/java/me/ag2s/epublib/epub/NCXDocumentV3.java
  13. 353
      epublib/src/main/java/me/ag2s/epublib/epub/PackageDocumentMetadataReader.java
  14. 677
      epublib/src/main/java/me/ag2s/epublib/epub/PackageDocumentReader.java
  15. 32
      epublib/src/main/java/me/ag2s/epublib/epub/ResourcesLoader.java
  16. 11
      epublib/src/main/java/me/ag2s/epublib/util/IOUtil.java

@ -178,7 +178,8 @@ dependencies {
implementation 'com.github.gedoor:rhino-android:1.5'
//
implementation 'com.squareup.okhttp3:okhttp:4.9.0'
//noinspection GradleDependency
implementation 'com.squareup.okhttp3:okhttp:4.9.1'
implementation 'com.ljx.rxhttp:rxhttp:2.5.7'
kapt 'com.ljx.rxhttp:rxhttp-compiler:2.5.7'

@ -1,5 +1,6 @@
package io.legado.app.help
import android.net.Uri
import io.legado.app.constant.AppPattern
import io.legado.app.constant.EventBus
import io.legado.app.data.appDb
@ -55,6 +56,28 @@ object BookHelp {
}
}
fun getEpubFile(book: Book,): File {
val file = FileUtils.getFile(
downloadDir,
cacheFolderName,
book.getFolderName(),
"index.epubx"
)
if(!file.exists()){
val input = if (book.bookUrl.isContentScheme()) {
val uri = Uri.parse(book.bookUrl)
appCtx.contentResolver.openInputStream(uri)
} else {
File(book.bookUrl).inputStream()
}
if (input != null) {
FileUtils.writeInputStream(file, input)
}
}
return file
}
suspend fun saveContent(book: Book, bookChapter: BookChapter, content: String) {
if (content.isEmpty()) return
//保存文本
@ -113,7 +136,7 @@ object BookHelp {
)
}
private fun getImageSuffix(src: String): String {
fun getImageSuffix(src: String): String {
var suffix = src.substringAfterLast(".").substringBefore(",")
if (suffix.length > 5) {
suffix = ".jpg"

@ -2,16 +2,16 @@ package io.legado.app.model.localBook
import android.graphics.Bitmap
import android.graphics.BitmapFactory
import android.net.Uri
import android.text.TextUtils
import io.legado.app.data.entities.Book
import io.legado.app.data.entities.BookChapter
import io.legado.app.utils.*
import io.legado.app.help.BookHelp
import io.legado.app.utils.FileUtils
import io.legado.app.utils.HtmlFormatter
import io.legado.app.utils.MD5Utils
import io.legado.app.utils.externalFilesDir
import me.ag2s.epublib.domain.EpubBook
import me.ag2s.epublib.domain.MediaTypes
import me.ag2s.epublib.domain.Resources
import me.ag2s.epublib.epub.EpubReader
import me.ag2s.epublib.util.ResourceUtil
import org.jsoup.Jsoup
import splitties.init.appCtx
import java.io.File
@ -20,8 +20,7 @@ import java.io.IOException
import java.io.InputStream
import java.nio.charset.Charset
import java.util.*
import java.util.zip.ZipEntry
import java.util.zip.ZipInputStream
import java.util.zip.ZipFile
class EpubFile(var book: Book) {
@ -30,6 +29,7 @@ class EpubFile(var book: Book) {
@Synchronized
private fun getEFile(book: Book): EpubFile {
BookHelp.getEpubFile(book)
if (eFile == null || eFile?.book?.bookUrl != book.bookUrl) {
eFile = EpubFile(book)
return eFile!!
@ -101,32 +101,12 @@ class EpubFile(var book: Book) {
/*重写epub文件解析代码,直接读出压缩包文件生成Resources给epublib,这样的好处是可以逐一修改某些文件的格式错误*/
private fun readEpub(): EpubBook? {
try {
val input = if (book.bookUrl.isContentScheme()) {
val uri = Uri.parse(book.bookUrl)
appCtx.contentResolver.openInputStream(uri)
} else {
File(book.bookUrl).inputStream()
}
input ?: return null
val inZip = ZipInputStream(input)
var zipEntry: ZipEntry?
val resources = Resources()
do {
zipEntry = inZip.nextEntry
if ((zipEntry == null) || zipEntry.isDirectory || zipEntry == ZipEntry("<error>")) continue
val resource = ResourceUtil.createResource(zipEntry, inZip)
if (resource.mediaType == MediaTypes.XHTML) resource.inputEncoding = "UTF-8"
if (zipEntry.name.endsWith(".opf")) {
/*掌上书苑有很多自制书OPF的nameSpace格式不标准,强制修复成正确的格式*/
val newS = String(resource.data).replace(
"\\smlns=\"http://www.idpf.org/2007/opf\"".toRegex(),
" xmlns=\"http://www.idpf.org/2007/opf\""
)
resource.data = newS.toByteArray()
}
resources.add(resource)
} while (zipEntry != null)
if (resources.size() > 0) return EpubReader().readEpub(resources)
val file = BookHelp.getEpubFile(book)
//通过懒加载读取epub
return EpubReader().readEpubLazy(ZipFile(file), "utf-8")
} catch (e: Exception) {
e.printStackTrace()
}

@ -138,6 +138,13 @@ object LocalBook {
val bookFile = FileUtils.getFile(cacheFolder, book.originName)
bookFile.delete()
}
if(book.isEpub()){
val bookFile=BookHelp.getEpubFile(book).parentFile
if (bookFile!=null&&bookFile.exists()){
FileUtils.delete(bookFile,true)
}
}
if (deleteOriginal) {
if (book.bookUrl.isContentScheme()) {

@ -24,7 +24,6 @@ import me.ag2s.epublib.epub.EpubWriter
import me.ag2s.epublib.util.ResourceUtil
import java.io.ByteArrayOutputStream
import java.io.File
import java.io.FileInputStream
import java.io.FileOutputStream
import java.nio.charset.Charset
@ -184,6 +183,7 @@ class CacheViewModel(application: Application) : BaseViewModel(application) {
}
}
private fun exportEpub(file: File, book: Book) {
val filename = "${book.name} by ${book.author}.epub"
val epubBook = EpubBook()
@ -254,9 +254,12 @@ class CacheViewModel(application: Application) : BaseViewModel(application) {
}
private fun setPic(src: String, book: Book, epubBook: EpubBook) {
val href = "${MD5Utils.md5Encode16(src)}${BookHelp.getImageSuffix(src)}"
val vFile = BookHelp.getImage(book, src)
val fp = FileResourceProvider(vFile.parent)
if (vFile.exists()) {
val img = Resource(FileInputStream(vFile), MD5Utils.md5Encode16(src) + ".jpg")
val img = LazyResource(fp, href)
epubBook.resources.add(img)
}
}
@ -275,7 +278,10 @@ class CacheViewModel(application: Application) : BaseViewModel(application) {
matchResult.groupValues[1].let {
val src = NetworkUtils.getAbsoluteURL(chapter.url, it)
setPic(src, book, epubBook)
text1 = text1.replace(src, MD5Utils.md5Encode16(src) + ".jpg")
text1 = text1.replace(
src,
"${MD5Utils.md5Encode16(src)}${BookHelp.getImageSuffix(src)}"
)
}
}

@ -54,7 +54,9 @@ object ImageProvider {
ChapterProvider.visibleWidth,
ChapterProvider.visibleHeight
)
setCache(chapterIndex, src, bitmap)
if (bitmap != null) {
setCache(chapterIndex, src, bitmap)
}
bitmap
} catch (e: Exception) {
null

@ -9,6 +9,7 @@ import android.renderscript.RenderScript
import android.renderscript.ScriptIntrinsicBlur
import android.view.View
import splitties.init.appCtx
import java.io.FileInputStream
import java.io.IOException
import kotlin.math.*
@ -25,11 +26,13 @@ object BitmapUtils {
* @param height 想要显示的图片的高度
* @return
*/
fun decodeBitmap(path: String, width: Int, height: Int): Bitmap {
fun decodeBitmap(path: String, width: Int, height: Int): Bitmap? {
val op = BitmapFactory.Options()
op.inPreferredConfig = Config.RGB_565
var ips = FileInputStream(path)
// inJustDecodeBounds如果设置为true,仅仅返回图片实际的宽和高,宽和高是赋值给opts.outWidth,opts.outHeight;
op.inJustDecodeBounds = true
BitmapFactory.decodeFile(path, op) //获取尺寸信息
BitmapFactory.decodeStream(ips, null, op)
//获取比例大小
val wRatio = ceil((op.outWidth / width).toDouble()).toInt()
val hRatio = ceil((op.outHeight / height).toDouble()).toInt()
@ -42,21 +45,26 @@ object BitmapUtils {
}
}
op.inJustDecodeBounds = false
return BitmapFactory.decodeFile(path, op)
ips = FileInputStream(path)
return BitmapFactory.decodeStream(ips, null, op)
}
/** 从path中获取Bitmap图片
* @param path 图片路径
* @return
*/
fun decodeBitmap(path: String): Bitmap {
fun decodeBitmap(path: String): Bitmap? {
val opts = BitmapFactory.Options()
opts.inPreferredConfig = Config.RGB_565
var ips = FileInputStream(path)
opts.inJustDecodeBounds = true
BitmapFactory.decodeFile(path, opts)
BitmapFactory.decodeStream(ips, null, opts)
opts.inSampleSize = computeSampleSize(opts, -1, 128 * 128)
opts.inJustDecodeBounds = false
ips = FileInputStream(path)
return BitmapFactory.decodeFile(path, opts)
return BitmapFactory.decodeStream(ips, null, opts)
}
/**

@ -517,6 +517,44 @@ object FileUtils {
closeSilently(fos)
}
}
/**
* 保存文件内容
*/
fun writeInputStream(filepath: String, data: InputStream): Boolean {
val file = File(filepath)
return writeInputStream(file,data)
}
/**
* 保存文件内容
*/
fun writeInputStream(file: File, data: InputStream): Boolean {
var fos: FileOutputStream? = null
return try {
if (!file.exists()) {
file.parentFile?.mkdirs()
file.createNewFile()
}
val buffer=ByteArray(1024*4)
fos = FileOutputStream(file)
while (true) {
val len = data.read(buffer, 0, buffer.size)
if (len == -1) {
break
} else {
fos.write(buffer, 0, len)
}
}
data.close()
fos.flush()
true
} catch (e: IOException) {
false
} finally {
closeSilently(fos)
}
}
/**
* 追加文本内容

@ -0,0 +1,38 @@
package me.ag2s.epublib.domain;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
/**
* 用于创建epub添加大文件如大量图片时容易OOM使用LazyResource避免OOM.
*
*/
public class FileResourceProvider implements LazyResourceProvider {
//需要导入资源的父目录
String dir;
/**
* 创建一个文件夹里面文件夹的LazyResourceProvider用于LazyResource
* @param dir 文件的目录
*/
public FileResourceProvider(String dir) {
this.dir = dir;
}
/**
* 创建一个文件夹里面文件夹的LazyResourceProvider用于LazyResource
* @param dirfile 文件夹
*/
@SuppressWarnings("unused")
public FileResourceProvider(File dirfile) {
this.dir = dirfile.getPath();
}
@Override
public InputStream getResourceStream(String href) throws IOException {
return new FileInputStream(new File(dir, href));
}
}

@ -1,135 +1,177 @@
package me.ag2s.epublib.epub;
import me.ag2s.epublib.util.StringUtil;
import java.util.ArrayList;
import java.util.List;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import java.util.ArrayList;
import java.util.List;
import me.ag2s.epublib.util.StringUtil;
/**
* Utility methods for working with the DOM.
*
* @author paul
*
*/
// package
class DOMUtil {
/**
* First tries to get the attribute value by doing an getAttributeNS on the element, if that gets an empty element it does a getAttribute without namespace.
*
* @param element element
* @param namespace namespace
* @param attribute attribute
* @return String Attribute
*/
public static String getAttribute(Element element, String namespace,
String attribute) {
String result = element.getAttributeNS(namespace, attribute);
if (StringUtil.isEmpty(result)) {
result = element.getAttribute(attribute);
/**
* First tries to get the attribute value by doing an getAttributeNS on the element, if that gets an empty element it does a getAttribute without namespace.
*
* @param element element
* @param namespace namespace
* @param attribute attribute
* @return String Attribute
*/
public static String getAttribute(Element element, String namespace,
String attribute) {
String result = element.getAttributeNS(namespace, attribute);
if (StringUtil.isEmpty(result)) {
result = element.getAttribute(attribute);
}
return result;
}
return result;
}
/**
* Gets all descendant elements of the given parentElement with the given namespace and tagname and returns their text child as a list of String.
*
* @param parentElement parentElement
* @param namespace namespace
* @param tagName tagName
* @return List<String>
*/
public static List<String> getElementsTextChild(Element parentElement,
String namespace, String tagName) {
NodeList elements = parentElement
.getElementsByTagNameNS(namespace, tagName);
//ArrayList 初始化时指定长度提高性能
List<String> result = new ArrayList<>(elements.getLength());
for (int i = 0; i < elements.getLength(); i++) {
result.add(getTextChildrenContent((Element) elements.item(i)));
/**
* Gets all descendant elements of the given parentElement with the given namespace and tagname and returns their text child as a list of String.
*
* @param parentElement parentElement
* @param namespace namespace
* @param tagName tagName
* @return List<String>
*/
public static List<String> getElementsTextChild(Element parentElement,
String namespace, String tagName) {
NodeList elements = parentElement
.getElementsByTagNameNS(namespace, tagName);
//ArrayList 初始化时指定长度提高性能
List<String> result = new ArrayList<>(elements.getLength());
for (int i = 0; i < elements.getLength(); i++) {
result.add(getTextChildrenContent((Element) elements.item(i)));
}
return result;
}
return result;
}
/**
* Finds in the current document the first element with the given namespace and elementName and with the given findAttributeName and findAttributeValue.
* It then returns the value of the given resultAttributeName.
*
* @param document document
* @param namespace namespace
* @param elementName elementName
* @param findAttributeName findAttributeName
* @param findAttributeValue findAttributeValue
* @param resultAttributeName resultAttributeName
* @return String value
*/
public static String getFindAttributeValue(Document document,
String namespace, String elementName, String findAttributeName,
String findAttributeValue, String resultAttributeName) {
NodeList metaTags = document.getElementsByTagNameNS(namespace, elementName);
for (int i = 0; i < metaTags.getLength(); i++) {
Element metaElement = (Element) metaTags.item(i);
if (findAttributeValue
.equalsIgnoreCase(metaElement.getAttribute(findAttributeName))
&& StringUtil
.isNotBlank(metaElement.getAttribute(resultAttributeName))) {
return metaElement.getAttribute(resultAttributeName);
}
/**
* Finds in the current document the first element with the given namespace and elementName and with the given findAttributeName and findAttributeValue.
* It then returns the value of the given resultAttributeName.
*
* @param document document
* @param namespace namespace
* @param elementName elementName
* @param findAttributeName findAttributeName
* @param findAttributeValue findAttributeValue
* @param resultAttributeName resultAttributeName
* @return String value
*/
public static String getFindAttributeValue(Document document,
String namespace, String elementName, String findAttributeName,
String findAttributeValue, String resultAttributeName) {
NodeList metaTags = document.getElementsByTagNameNS(namespace, elementName);
for (int i = 0; i < metaTags.getLength(); i++) {
Element metaElement = (Element) metaTags.item(i);
if (findAttributeValue
.equalsIgnoreCase(metaElement.getAttribute(findAttributeName))
&& StringUtil
.isNotBlank(metaElement.getAttribute(resultAttributeName))) {
return metaElement.getAttribute(resultAttributeName);
}
}
return null;
}
return null;
}
/**
* Gets the first element that is a child of the parentElement and has the given namespace and tagName
*
* @param parentElement parentElement
* @param namespace namespace
* @param tagName tagName
* @return Element
*/
public static Element getFirstElementByTagNameNS(Element parentElement,
String namespace, String tagName) {
NodeList nodes = parentElement.getElementsByTagNameNS(namespace, tagName);
if (nodes.getLength() != 0) {
return (Element) nodes.item(0);
/**
* Gets the first element that is a child of the parentElement and has the given namespace and tagName
*
* @param parentElement parentElement
* @param namespace namespace
* @param tagName tagName
* @return Element
*/
public static NodeList getElementsByTagNameNS(Element parentElement,
String namespace, String tagName) {
NodeList nodes = parentElement.getElementsByTagNameNS(namespace, tagName);
if (nodes.getLength() != 0) {
return nodes;
}
nodes = parentElement.getElementsByTagName(tagName);
if (nodes.getLength() == 0) {
return null;
}
return nodes;
}
nodes= parentElement.getElementsByTagName(tagName);
if (nodes.getLength()==0){
return null;
/**
* Gets the first element that is a child of the parentElement and has the given namespace and tagName
*
* @param parentElement parentElement
* @param namespace namespace
* @param tagName tagName
* @return Element
*/
public static NodeList getElementsByTagNameNS(Document parentElement,
String namespace, String tagName) {
NodeList nodes = parentElement.getElementsByTagNameNS(namespace, tagName);
if (nodes.getLength() != 0) {
return nodes;
}
nodes = parentElement.getElementsByTagName(tagName);
if (nodes.getLength() == 0) {
return null;
}
return nodes;
}
return (Element) nodes.item(0);
}
/**
* The contents of all Text nodes that are children of the given parentElement.
* The result is trim()-ed.
*
* The reason for this more complicated procedure instead of just returning the data of the firstChild is that
* when the text is Chinese characters then on Android each Characater is represented in the DOM as
* an individual Text node.
*
* @param parentElement parentElement
* @return String value
*/
public static String getTextChildrenContent(Element parentElement) {
if (parentElement == null) {
return null;
/**
* Gets the first element that is a child of the parentElement and has the given namespace and tagName
*
* @param parentElement parentElement
* @param namespace namespace
* @param tagName tagName
* @return Element
*/
public static Element getFirstElementByTagNameNS(Element parentElement,
String namespace, String tagName) {
NodeList nodes = parentElement.getElementsByTagNameNS(namespace, tagName);
if (nodes.getLength() != 0) {
return (Element) nodes.item(0);
}
nodes = parentElement.getElementsByTagName(tagName);
if (nodes.getLength() == 0) {
return null;
}
return (Element) nodes.item(0);
}
StringBuilder result = new StringBuilder();
NodeList childNodes = parentElement.getChildNodes();
for (int i = 0; i < childNodes.getLength(); i++) {
Node node = childNodes.item(i);
if ((node == null) ||
(node.getNodeType() != Node.TEXT_NODE)) {
continue;
}
result.append(((Text) node).getData());
/**
* The contents of all Text nodes that are children of the given parentElement.
* The result is trim()-ed.
* <p>
* The reason for this more complicated procedure instead of just returning the data of the firstChild is that
* when the text is Chinese characters then on Android each Characater is represented in the DOM as
* an individual Text node.
*
* @param parentElement parentElement
* @return String value
*/
public static String getTextChildrenContent(Element parentElement) {
if (parentElement == null) {
return null;
}
StringBuilder result = new StringBuilder();
NodeList childNodes = parentElement.getChildNodes();
for (int i = 0; i < childNodes.getLength(); i++) {
Node node = childNodes.item(i);
if ((node == null) ||
(node.getNodeType() != Node.TEXT_NODE)) {
continue;
}
result.append(((Text) node).getData());
}
return result.toString().trim();
}
return result.toString().trim();
}
}

@ -148,9 +148,9 @@ public class NCXDocumentV2 {
if (resource == null) {
Log.e(TAG, "Resource with href " + href + " in NCX document not found");
}
Log.d(TAG, "label:" + label);
Log.d(TAG, "href:" + href);
Log.d(TAG, "fragmentId:" + fragmentId);
Log.v(TAG, "label:" + label);
Log.v(TAG, "href:" + href);
Log.v(TAG, "fragmentId:" + fragmentId);
TOCReference result = new TOCReference(label, resource, fragmentId);
List<TOCReference> childTOCReferences = readTOCReferences(
navpointElement.getChildNodes(), book);

@ -107,12 +107,21 @@ public class NCXDocumentV3 {
if (ncxResource == null) {
return null;
}
//Log.d(TAG, ncxResource.getHref());
//一些epub 3 文件没有按照epub3的标准使用删除掉ncx目录文件
if (ncxResource.getHref().endsWith(".ncx")){
Log.v(TAG,"该epub文件不标准,使用了epub2的目录文件");
return NCXDocumentV2.read(book, epubReader);
}
Log.d(TAG, ncxResource.getHref());
Document ncxDocument = ResourceUtil.getAsDocument(ncxResource);
//Log.d(TAG, ncxDocument.getNodeName());
Log.d(TAG, ncxDocument.getNodeName());
Element navMapElement = (Element) ncxDocument.getElementsByTagName(XHTMLTgs.nav).item(0);
if(navMapElement==null){
Log.d(TAG,"epub3目录文件未发现nav节点,尝试使用epub2的规则解析");
return NCXDocumentV2.read(book, epubReader);
}
navMapElement = (Element) navMapElement.getElementsByTagName(XHTMLTgs.ol).item(0);
Log.d(TAG, navMapElement.getTagName());

@ -22,7 +22,7 @@ import me.ag2s.epublib.util.StringUtil;
/**
* Reads the package document metadata.
*
* <p>
* In its own separate class because the PackageDocumentReader became a bit large and unwieldy.
*
* @author paul
@ -30,195 +30,196 @@ import me.ag2s.epublib.util.StringUtil;
// package
class PackageDocumentMetadataReader extends PackageDocumentBase {
private static final String TAG= PackageDocumentMetadataReader.class.getName();
public static Metadata readMetadata(Document packageDocument) {
Metadata result = new Metadata();
Element metadataElement = DOMUtil
.getFirstElementByTagNameNS(packageDocument.getDocumentElement(),
NAMESPACE_OPF, OPFTags.metadata);
if (metadataElement == null) {
Log.e(TAG,"Package does not contain element " + OPFTags.metadata);
return result;
}
result.setTitles(DOMUtil
.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE,
DCTags.title));
result.setPublishers(DOMUtil
.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE,
DCTags.publisher));
result.setDescriptions(DOMUtil
.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE,
DCTags.description));
result.setRights(DOMUtil
.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE,
DCTags.rights));
result.setTypes(DOMUtil
.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE,
DCTags.type));
result.setSubjects(DOMUtil
.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE,
DCTags.subject));
result.setIdentifiers(readIdentifiers(metadataElement));
result.setAuthors(readCreators(metadataElement));
result.setContributors(readContributors(metadataElement));
result.setDates(readDates(metadataElement));
result.setOtherProperties(readOtherProperties(metadataElement));
result.setMetaAttributes(readMetaProperties(metadataElement));
Element languageTag = DOMUtil
.getFirstElementByTagNameNS(metadataElement, NAMESPACE_DUBLIN_CORE,
DCTags.language);
if (languageTag != null) {
result.setLanguage(DOMUtil.getTextChildrenContent(languageTag));
private static final String TAG = PackageDocumentMetadataReader.class.getName();
public static Metadata readMetadata(Document packageDocument) {
Metadata result = new Metadata();
Element metadataElement = DOMUtil
.getFirstElementByTagNameNS(packageDocument.getDocumentElement(),
NAMESPACE_OPF, OPFTags.metadata);
if (metadataElement == null) {
Log.e(TAG, "Package does not contain element " + OPFTags.metadata);
return result;
}
result.setTitles(DOMUtil
.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE,
DCTags.title));
result.setPublishers(DOMUtil
.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE,
DCTags.publisher));
result.setDescriptions(DOMUtil
.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE,
DCTags.description));
result.setRights(DOMUtil
.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE,
DCTags.rights));
result.setTypes(DOMUtil
.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE,
DCTags.type));
result.setSubjects(DOMUtil
.getElementsTextChild(metadataElement, NAMESPACE_DUBLIN_CORE,
DCTags.subject));
result.setIdentifiers(readIdentifiers(metadataElement));
result.setAuthors(readCreators(metadataElement));
result.setContributors(readContributors(metadataElement));
result.setDates(readDates(metadataElement));
result.setOtherProperties(readOtherProperties(metadataElement));
result.setMetaAttributes(readMetaProperties(metadataElement));
Element languageTag = DOMUtil
.getFirstElementByTagNameNS(metadataElement, NAMESPACE_DUBLIN_CORE,
DCTags.language);
if (languageTag != null) {
result.setLanguage(DOMUtil.getTextChildrenContent(languageTag));
}
return result;
}
return result;
}
/**
* consumes meta tags that have a property attribute as defined in the standard. For example:
* &lt;meta property="rendition:layout"&gt;pre-paginated&lt;/meta&gt;
* @param metadataElement metadataElement
* @return Map<QName, String>
*/
private static Map<QName, String> readOtherProperties(
Element metadataElement) {
Map<QName, String> result = new HashMap<>();
NodeList metaTags = metadataElement.getElementsByTagName(OPFTags.meta);
for (int i = 0; i < metaTags.getLength(); i++) {
Node metaNode = metaTags.item(i);
Node property = metaNode.getAttributes()
.getNamedItem(OPFAttributes.property);
if (property != null) {
String name = property.getNodeValue();
String value = metaNode.getTextContent();
result.put(new QName(name), value);
}
/**
* consumes meta tags that have a property attribute as defined in the standard. For example:
* &lt;meta property="rendition:layout"&gt;pre-paginated&lt;/meta&gt;
*
* @param metadataElement metadataElement
* @return Map<QName, String>
*/
private static Map<QName, String> readOtherProperties(
Element metadataElement) {
Map<QName, String> result = new HashMap<>();
NodeList metaTags = metadataElement.getElementsByTagName(OPFTags.meta);
for (int i = 0; i < metaTags.getLength(); i++) {
Node metaNode = metaTags.item(i);
Node property = metaNode.getAttributes()
.getNamedItem(OPFAttributes.property);
if (property != null) {
String name = property.getNodeValue();
String value = metaNode.getTextContent();
result.put(new QName(name), value);
}
}
return result;
}
return result;
}
/**
* consumes meta tags that have a property attribute as defined in the standard. For example:
* &lt;meta property="rendition:layout"&gt;pre-paginated&lt;/meta&gt;
* @param metadataElement metadataElement
* @return Map<String, String>
*/
private static Map<String, String> readMetaProperties(
Element metadataElement) {
Map<String, String> result = new HashMap<>();
NodeList metaTags = metadataElement.getElementsByTagName(OPFTags.meta);
for (int i = 0; i < metaTags.getLength(); i++) {
Element metaElement = (Element) metaTags.item(i);
String name = metaElement.getAttribute(OPFAttributes.name);
String value = metaElement.getAttribute(OPFAttributes.content);
result.put(name, value);
/**
* consumes meta tags that have a property attribute as defined in the standard. For example:
* &lt;meta property="rendition:layout"&gt;pre-paginated&lt;/meta&gt;
*
* @param metadataElement metadataElement
* @return Map<String, String>
*/
private static Map<String, String> readMetaProperties(
Element metadataElement) {
Map<String, String> result = new HashMap<>();
NodeList metaTags = metadataElement.getElementsByTagName(OPFTags.meta);
for (int i = 0; i < metaTags.getLength(); i++) {
Element metaElement = (Element) metaTags.item(i);
String name = metaElement.getAttribute(OPFAttributes.name);
String value = metaElement.getAttribute(OPFAttributes.content);
result.put(name, value);
}
return result;
}
return result;
}
private static String getBookIdId(Document document) {
Element packageElement = DOMUtil
.getFirstElementByTagNameNS(document.getDocumentElement(),
NAMESPACE_OPF, OPFTags.packageTag);
if (packageElement == null) {
return null;
}
return DOMUtil.getAttribute(packageElement, NAMESPACE_OPF, OPFAttributes.uniqueIdentifier);
private static String getBookIdId(Document document) {
Element packageElement = DOMUtil
.getFirstElementByTagNameNS(document.getDocumentElement(),
NAMESPACE_OPF, OPFTags.packageTag);
if (packageElement == null) {
return null;
}
return packageElement
.getAttributeNS(NAMESPACE_OPF, OPFAttributes.uniqueIdentifier);
}
private static List<Author> readCreators(Element metadataElement) {
return readAuthors(DCTags.creator, metadataElement);
}
private static List<Author> readContributors(Element metadataElement) {
return readAuthors(DCTags.contributor, metadataElement);
}
private static List<Author> readAuthors(String authorTag,
Element metadataElement) {
NodeList elements = metadataElement
.getElementsByTagNameNS(NAMESPACE_DUBLIN_CORE, authorTag);
List<Author> result = new ArrayList<>(elements.getLength());
for (int i = 0; i < elements.getLength(); i++) {
Element authorElement = (Element) elements.item(i);
Author author = createAuthor(authorElement);
if (author != null) {
result.add(author);
}
private static List<Author> readCreators(Element metadataElement) {
return readAuthors(DCTags.creator, metadataElement);
}
return result;
}
private static List<Date> readDates(Element metadataElement) {
NodeList elements = metadataElement
.getElementsByTagNameNS(NAMESPACE_DUBLIN_CORE, DCTags.date);
List<Date> result = new ArrayList<>(elements.getLength());
for (int i = 0; i < elements.getLength(); i++) {
Element dateElement = (Element) elements.item(i);
Date date;
try {
date = new Date(DOMUtil.getTextChildrenContent(dateElement),
dateElement.getAttributeNS(NAMESPACE_OPF, OPFAttributes.event));
result.add(date);
} catch (IllegalArgumentException e) {
Log.e(TAG,e.getMessage());
}
private static List<Author> readContributors(Element metadataElement) {
return readAuthors(DCTags.contributor, metadataElement);
}
return result;
}
private static List<Author> readAuthors(String authorTag,
Element metadataElement) {
NodeList elements = metadataElement
.getElementsByTagNameNS(NAMESPACE_DUBLIN_CORE, authorTag);
List<Author> result = new ArrayList<>(elements.getLength());
for (int i = 0; i < elements.getLength(); i++) {
Element authorElement = (Element) elements.item(i);
Author author = createAuthor(authorElement);
if (author != null) {
result.add(author);
}
}
return result;
private static Author createAuthor(Element authorElement) {
String authorString = DOMUtil.getTextChildrenContent(authorElement);
if (StringUtil.isBlank(authorString)) {
return null;
}
int spacePos = authorString.lastIndexOf(' ');
Author result;
if (spacePos < 0) {
result = new Author(authorString);
} else {
result = new Author(authorString.substring(0, spacePos),
authorString.substring(spacePos + 1));
private static List<Date> readDates(Element metadataElement) {
NodeList elements = metadataElement
.getElementsByTagNameNS(NAMESPACE_DUBLIN_CORE, DCTags.date);
List<Date> result = new ArrayList<>(elements.getLength());
for (int i = 0; i < elements.getLength(); i++) {
Element dateElement = (Element) elements.item(i);
Date date;
try {
date = new Date(DOMUtil.getTextChildrenContent(dateElement),
DOMUtil.getAttribute(dateElement, NAMESPACE_OPF, OPFAttributes.event));
result.add(date);
} catch (IllegalArgumentException e) {
Log.e(TAG, e.getMessage());
}
}
return result;
}
result.setRole(
authorElement.getAttributeNS(NAMESPACE_OPF, OPFAttributes.role));
return result;
}
private static List<Identifier> readIdentifiers(Element metadataElement) {
NodeList identifierElements = metadataElement
.getElementsByTagNameNS(NAMESPACE_DUBLIN_CORE, DCTags.identifier);
if (identifierElements.getLength() == 0) {
Log.e(TAG,"Package does not contain element " + DCTags.identifier);
return new ArrayList<>();
private static Author createAuthor(Element authorElement) {
String authorString = DOMUtil.getTextChildrenContent(authorElement);
if (StringUtil.isBlank(authorString)) {
return null;
}
int spacePos = authorString.lastIndexOf(' ');
Author result;
if (spacePos < 0) {
result = new Author(authorString);
} else {
result = new Author(authorString.substring(0, spacePos),
authorString.substring(spacePos + 1));
}
result.setRole(
DOMUtil.getAttribute(authorElement, NAMESPACE_OPF, OPFAttributes.role));
return result;
}
String bookIdId = getBookIdId(metadataElement.getOwnerDocument());
List<Identifier> result = new ArrayList<>(
identifierElements.getLength());
for (int i = 0; i < identifierElements.getLength(); i++) {
Element identifierElement = (Element) identifierElements.item(i);
String schemeName = identifierElement
.getAttributeNS(NAMESPACE_OPF, DCAttributes.scheme);
String identifierValue = DOMUtil
.getTextChildrenContent(identifierElement);
if (StringUtil.isBlank(identifierValue)) {
continue;
}
Identifier identifier = new Identifier(schemeName, identifierValue);
if (identifierElement.getAttribute("id").equals(bookIdId)) {
identifier.setBookId(true);
}
result.add(identifier);
private static List<Identifier> readIdentifiers(Element metadataElement) {
NodeList identifierElements = metadataElement
.getElementsByTagNameNS(NAMESPACE_DUBLIN_CORE, DCTags.identifier);
if (identifierElements.getLength() == 0) {
Log.e(TAG, "Package does not contain element " + DCTags.identifier);
return new ArrayList<>();
}
String bookIdId = getBookIdId(metadataElement.getOwnerDocument());
List<Identifier> result = new ArrayList<>(
identifierElements.getLength());
for (int i = 0; i < identifierElements.getLength(); i++) {
Element identifierElement = (Element) identifierElements.item(i);
String schemeName = DOMUtil.getAttribute(identifierElement, NAMESPACE_OPF, DCAttributes.scheme);
String identifierValue = DOMUtil
.getTextChildrenContent(identifierElement);
if (StringUtil.isBlank(identifierValue)) {
continue;
}
Identifier identifier = new Identifier(schemeName, identifierValue);
if (identifierElement.getAttribute("id").equals(bookIdId)) {
identifier.setBookId(true);
}
result.add(identifier);
}
return result;
}
return result;
}
}

@ -36,42 +36,41 @@ import me.ag2s.epublib.util.StringUtil;
* Reads the opf package document as defined by namespace http://www.idpf.org/2007/opf
*
* @author paul
*
*/
public class PackageDocumentReader extends PackageDocumentBase {
private static final String TAG= PackageDocumentReader.class.getName();
private static final String[] POSSIBLE_NCX_ITEM_IDS = new String[]{"toc",
"ncx", "ncxtoc","htmltoc"};
public static void read(
Resource packageResource, EpubReader epubReader, EpubBook book,
Resources resources)
throws SAXException, IOException {
Document packageDocument = ResourceUtil.getAsDocument(packageResource);
String packageHref = packageResource.getHref();
resources = fixHrefs(packageHref, resources);
readGuide(packageDocument, epubReader, book, resources);
// Books sometimes use non-identifier ids. We map these here to legal ones
Map<String, String> idMapping = new HashMap<>();
String version=DOMUtil.getAttribute(packageDocument.getDocumentElement(),PREFIX_OPF,PackageDocumentBase.version);
resources = readManifest(packageDocument, packageHref, epubReader,
resources, idMapping);
book.setResources(resources);
book.setVersion(version);
readCover(packageDocument, book);
book.setMetadata(
PackageDocumentMetadataReader.readMetadata(packageDocument));
book.setSpine(readSpine(packageDocument, book.getResources(), idMapping));
// if we did not find a cover page then we make the first page of the book the cover page
if (book.getCoverPage() == null && book.getSpine().size() > 0) {
book.setCoverPage(book.getSpine().getResource(0));
private static final String TAG = PackageDocumentReader.class.getName();
private static final String[] POSSIBLE_NCX_ITEM_IDS = new String[]{"toc",
"ncx", "ncxtoc", "htmltoc"};
public static void read(
Resource packageResource, EpubReader epubReader, EpubBook book,
Resources resources)
throws SAXException, IOException {
Document packageDocument = ResourceUtil.getAsDocument(packageResource);
String packageHref = packageResource.getHref();
resources = fixHrefs(packageHref, resources);
readGuide(packageDocument, epubReader, book, resources);
// Books sometimes use non-identifier ids. We map these here to legal ones
Map<String, String> idMapping = new HashMap<>();
String version = DOMUtil.getAttribute(packageDocument.getDocumentElement(), PREFIX_OPF, PackageDocumentBase.version);
resources = readManifest(packageDocument, packageHref, epubReader,
resources, idMapping);
book.setResources(resources);
book.setVersion(version);
readCover(packageDocument, book);
book.setMetadata(
PackageDocumentMetadataReader.readMetadata(packageDocument));
book.setSpine(readSpine(packageDocument, book.getResources(), idMapping));
// if we did not find a cover page then we make the first page of the book the cover page
if (book.getCoverPage() == null && book.getSpine().size() > 0) {
book.setCoverPage(book.getSpine().getResource(0));
}
}
}
// private static Resource readCoverImage(Element metadataElement, Resources resources) {
// String coverResourceId = DOMUtil.getFindAttributeValue(metadataElement.getOwnerDocument(), NAMESPACE_OPF, OPFTags.meta, OPFAttributes.name, OPFValues.meta_cover, OPFAttributes.content);
@ -83,322 +82,326 @@ public class PackageDocumentReader extends PackageDocumentBase {
// }
/**
* Reads the manifest containing the resource ids, hrefs and mediatypes.
*
* @param packageDocument e
* @param packageHref e
* @param epubReader e
* @param resources e
* @param idMapping e
* @return a Map with resources, with their id's as key.
*/
@SuppressWarnings("unused")
private static Resources readManifest(Document packageDocument,
String packageHref,
EpubReader epubReader, Resources resources,
Map<String, String> idMapping) {
Element manifestElement = DOMUtil
.getFirstElementByTagNameNS(packageDocument.getDocumentElement(),
NAMESPACE_OPF, OPFTags.manifest);
Resources result = new Resources();
if (manifestElement == null) {
Log.e(TAG,
"Package document does not contain element " + OPFTags.manifest);
return result;
}
NodeList itemElements = manifestElement
.getElementsByTagNameNS(NAMESPACE_OPF, OPFTags.item);
for (int i = 0; i < itemElements.getLength(); i++) {
Element itemElement = (Element) itemElements.item(i);
String id = DOMUtil
.getAttribute(itemElement, NAMESPACE_OPF, OPFAttributes.id);
String href = DOMUtil
.getAttribute(itemElement, NAMESPACE_OPF, OPFAttributes.href);
try {
href = URLDecoder.decode(href, Constants.CHARACTER_ENCODING);
} catch (UnsupportedEncodingException e) {
Log.e(TAG,e.getMessage());
}
String mediaTypeName = DOMUtil
.getAttribute(itemElement, NAMESPACE_OPF, OPFAttributes.media_type);
Resource resource = resources.remove(href);
if (resource == null) {
Log.e(TAG,"resource with href '" + href + "' not found");
continue;
}
resource.setId(id);
//for epub3
String properties=DOMUtil.getAttribute(itemElement,NAMESPACE_OPF,OPFAttributes.properties);
resource.setProperties(properties);
MediaType mediaType = MediaTypes.getMediaTypeByName(mediaTypeName);
if (mediaType != null) {
resource.setMediaType(mediaType);
}
result.add(resource);
idMapping.put(id, resource.getId());
}
return result;
}
/**
* Reads the book's guide.
* Here some more attempts are made at finding the cover page.
*
* @param packageDocument r
* @param epubReader r
* @param book r
* @param resources g
*/
@SuppressWarnings("unused")
private static void readGuide(Document packageDocument,
EpubReader epubReader, EpubBook book, Resources resources) {
Element guideElement = DOMUtil
.getFirstElementByTagNameNS(packageDocument.getDocumentElement(),
NAMESPACE_OPF, OPFTags.guide);
if (guideElement == null) {
return;
}
Guide guide = book.getGuide();
NodeList guideReferences = guideElement
.getElementsByTagNameNS(NAMESPACE_OPF, OPFTags.reference);
for (int i = 0; i < guideReferences.getLength(); i++) {
Element referenceElement = (Element) guideReferences.item(i);
String resourceHref = DOMUtil
.getAttribute(referenceElement, NAMESPACE_OPF, OPFAttributes.href);
if (StringUtil.isBlank(resourceHref)) {
continue;
}
Resource resource = resources.getByHref(StringUtil
.substringBefore(resourceHref, Constants.FRAGMENT_SEPARATOR_CHAR));
if (resource == null) {
Log.e(TAG,"Guide is referencing resource with href " + resourceHref
+ " which could not be found");
continue;
}
String type = DOMUtil
.getAttribute(referenceElement, NAMESPACE_OPF, OPFAttributes.type);
if (StringUtil.isBlank(type)) {
Log.e(TAG,"Guide is referencing resource with href " + resourceHref
+ " which is missing the 'type' attribute");
continue;
}
String title = DOMUtil
.getAttribute(referenceElement, NAMESPACE_OPF, OPFAttributes.title);
if (GuideReference.COVER.equalsIgnoreCase(type)) {
continue; // cover is handled elsewhere
}
GuideReference reference = new GuideReference(resource, type, title,
StringUtil
.substringAfter(resourceHref, Constants.FRAGMENT_SEPARATOR_CHAR));
guide.addReference(reference);
}
}
/**
* Strips off the package prefixes up to the href of the packageHref.
*
* Example:
* If the packageHref is "OEBPS/content.opf" then a resource href like "OEBPS/foo/bar.html" will be turned into "foo/bar.html"
*
* @param packageHref f
* @param resourcesByHref g
* @return The stripped package href
*/
static Resources fixHrefs(String packageHref,
Resources resourcesByHref) {
int lastSlashPos = packageHref.lastIndexOf('/');
if (lastSlashPos < 0) {
return resourcesByHref;
}
Resources result = new Resources();
for (Resource resource : resourcesByHref.getAll()) {
if (StringUtil.isNotBlank(resource.getHref())
&& resource.getHref().length() > lastSlashPos) {
resource.setHref(resource.getHref().substring(lastSlashPos + 1));
}
result.add(resource);
}
return result;
}
/**
* Reads the document's spine, containing all sections in reading order.
*
* @param packageDocument b
* @param resources b
* @param idMapping b
* @return the document's spine, containing all sections in reading order.
*/
private static Spine readSpine(Document packageDocument, Resources resources,
Map<String, String> idMapping) {
Element spineElement = DOMUtil
.getFirstElementByTagNameNS(packageDocument.getDocumentElement(),
NAMESPACE_OPF, OPFTags.spine);
if (spineElement == null) {
Log.e(TAG,"Element " + OPFTags.spine
+ " not found in package document, generating one automatically");
return generateSpineFromResources(resources);
/**
* Reads the manifest containing the resource ids, hrefs and mediatypes.
*
* @param packageDocument e
* @param packageHref e
* @param epubReader e
* @param resources e
* @param idMapping e
* @return a Map with resources, with their id's as key.
*/
@SuppressWarnings("unused")
private static Resources readManifest(Document packageDocument,
String packageHref,
EpubReader epubReader, Resources resources,
Map<String, String> idMapping) {
Element manifestElement = DOMUtil
.getFirstElementByTagNameNS(packageDocument.getDocumentElement(),
NAMESPACE_OPF, OPFTags.manifest);
Resources result = new Resources();
if (manifestElement == null) {
Log.e(TAG,
"Package document does not contain element " + OPFTags.manifest);
return result;
}
NodeList itemElements = manifestElement
.getElementsByTagNameNS(NAMESPACE_OPF, OPFTags.item);
for (int i = 0; i < itemElements.getLength(); i++) {
Element itemElement = (Element) itemElements.item(i);
String id = DOMUtil
.getAttribute(itemElement, NAMESPACE_OPF, OPFAttributes.id);
String href = DOMUtil
.getAttribute(itemElement, NAMESPACE_OPF, OPFAttributes.href);
try {
href = URLDecoder.decode(href, Constants.CHARACTER_ENCODING);
} catch (UnsupportedEncodingException e) {
Log.e(TAG, e.getMessage());
}
String mediaTypeName = DOMUtil
.getAttribute(itemElement, NAMESPACE_OPF, OPFAttributes.media_type);
Resource resource = resources.remove(href);
if (resource == null) {
Log.e(TAG, "resource with href '" + href + "' not found");
continue;
}
resource.setId(id);
//for epub3
String properties = DOMUtil.getAttribute(itemElement, NAMESPACE_OPF, OPFAttributes.properties);
resource.setProperties(properties);
MediaType mediaType = MediaTypes.getMediaTypeByName(mediaTypeName);
if (mediaType != null) {
resource.setMediaType(mediaType);
}
result.add(resource);
idMapping.put(id, resource.getId());
}
return result;
}
Spine result = new Spine();
String tocResourceId = DOMUtil
.getAttribute(spineElement, NAMESPACE_OPF, OPFAttributes.toc);
result
.setTocResource(findTableOfContentsResource(tocResourceId, resources));
NodeList spineNodes = packageDocument
.getElementsByTagNameNS(NAMESPACE_OPF, OPFTags.itemref);
List<SpineReference> spineReferences = new ArrayList<>(
spineNodes.getLength());
for (int i = 0; i < spineNodes.getLength(); i++) {
Element spineItem = (Element) spineNodes.item(i);
String itemref = DOMUtil
.getAttribute(spineItem, NAMESPACE_OPF, OPFAttributes.idref);
if (StringUtil.isBlank(itemref)) {
Log.e(TAG,"itemref with missing or empty idref"); // XXX
continue;
}
String id = idMapping.get(itemref);
if (id == null) {
id = itemref;
}
Resource resource = resources.getByIdOrHref(id);
if (resource == null) {
Log.e(TAG, "resource with id '" + id + "' not found");
continue;
}
SpineReference spineReference = new SpineReference(resource);
if (OPFValues.no.equalsIgnoreCase(DOMUtil
.getAttribute(spineItem, NAMESPACE_OPF, OPFAttributes.linear))) {
spineReference.setLinear(false);
}
spineReferences.add(spineReference);
/**
* Reads the book's guide.
* Here some more attempts are made at finding the cover page.
*
* @param packageDocument r
* @param epubReader r
* @param book r
* @param resources g
*/
@SuppressWarnings("unused")
private static void readGuide(Document packageDocument,
EpubReader epubReader, EpubBook book, Resources resources) {
Element guideElement = DOMUtil
.getFirstElementByTagNameNS(packageDocument.getDocumentElement(),
NAMESPACE_OPF, OPFTags.guide);
if (guideElement == null) {
return;
}
Guide guide = book.getGuide();
NodeList guideReferences = guideElement
.getElementsByTagNameNS(NAMESPACE_OPF, OPFTags.reference);
for (int i = 0; i < guideReferences.getLength(); i++) {
Element referenceElement = (Element) guideReferences.item(i);
String resourceHref = DOMUtil
.getAttribute(referenceElement, NAMESPACE_OPF, OPFAttributes.href);
if (StringUtil.isBlank(resourceHref)) {
continue;
}
Resource resource = resources.getByHref(StringUtil
.substringBefore(resourceHref, Constants.FRAGMENT_SEPARATOR_CHAR));
if (resource == null) {
Log.e(TAG, "Guide is referencing resource with href " + resourceHref
+ " which could not be found");
continue;
}
String type = DOMUtil
.getAttribute(referenceElement, NAMESPACE_OPF, OPFAttributes.type);
if (StringUtil.isBlank(type)) {
Log.e(TAG, "Guide is referencing resource with href " + resourceHref
+ " which is missing the 'type' attribute");
continue;
}
String title = DOMUtil
.getAttribute(referenceElement, NAMESPACE_OPF, OPFAttributes.title);
if (GuideReference.COVER.equalsIgnoreCase(type)) {
continue; // cover is handled elsewhere
}
GuideReference reference = new GuideReference(resource, type, title,
StringUtil
.substringAfter(resourceHref, Constants.FRAGMENT_SEPARATOR_CHAR));
guide.addReference(reference);
}
}
result.setSpineReferences(spineReferences);
return result;
}
/**
* Creates a spine out of all resources in the resources.
* The generated spine consists of all XHTML pages in order of their href.
*
* @param resources f
* @return a spine created out of all resources in the resources.
*/
private static Spine generateSpineFromResources(Resources resources) {
Spine result = new Spine();
List<String> resourceHrefs = new ArrayList<>(resources.getAllHrefs());
Collections.sort(resourceHrefs, String.CASE_INSENSITIVE_ORDER);
for (String resourceHref : resourceHrefs) {
Resource resource = resources.getByHref(resourceHref);
if (resource.getMediaType() == MediaTypes.NCX) {
result.setTocResource(resource);
} else if (resource.getMediaType() == MediaTypes.XHTML) {
result.addSpineReference(new SpineReference(resource));
}
/**
* Strips off the package prefixes up to the href of the packageHref.
* <p>
* Example:
* If the packageHref is "OEBPS/content.opf" then a resource href like "OEBPS/foo/bar.html" will be turned into "foo/bar.html"
*
* @param packageHref f
* @param resourcesByHref g
* @return The stripped package href
*/
static Resources fixHrefs(String packageHref,
Resources resourcesByHref) {
int lastSlashPos = packageHref.lastIndexOf('/');
if (lastSlashPos < 0) {
return resourcesByHref;
}
Resources result = new Resources();
for (Resource resource : resourcesByHref.getAll()) {
if (StringUtil.isNotBlank(resource.getHref())
&& resource.getHref().length() > lastSlashPos) {
resource.setHref(resource.getHref().substring(lastSlashPos + 1));
}
result.add(resource);
}
return result;
}
return result;
}
/**
* The spine tag should contain a 'toc' attribute with as value the resource id of the table of contents resource.
*
* Here we try several ways of finding this table of contents resource.
* We try the given attribute value, some often-used ones and finally look through all resources for the first resource with the table of contents mimetype.
*
* @param tocResourceId g
* @param resources g
* @return the Resource containing the table of contents
*/
static Resource findTableOfContentsResource(String tocResourceId,
Resources resources) {
Resource tocResource = null;
if (StringUtil.isNotBlank(tocResourceId)) {
tocResource = resources.getByIdOrHref(tocResourceId);
/**
* Reads the document's spine, containing all sections in reading order.
*
* @param packageDocument b
* @param resources b
* @param idMapping b
* @return the document's spine, containing all sections in reading order.
*/
private static Spine readSpine(Document packageDocument, Resources resources,
Map<String, String> idMapping) {
Element spineElement = DOMUtil
.getFirstElementByTagNameNS(packageDocument.getDocumentElement(),
NAMESPACE_OPF, OPFTags.spine);
if (spineElement == null) {
Log.e(TAG, "Element " + OPFTags.spine
+ " not found in package document, generating one automatically");
return generateSpineFromResources(resources);
}
Spine result = new Spine();
String tocResourceId = DOMUtil.getAttribute(spineElement, NAMESPACE_OPF, OPFAttributes.toc);
Log.v(TAG,tocResourceId);
result.setTocResource(findTableOfContentsResource(tocResourceId, resources));
NodeList spineNodes = DOMUtil.getElementsByTagNameNS(packageDocument, NAMESPACE_OPF, OPFTags.itemref);
if(spineNodes==null){
Log.e(TAG,"spineNodes is null");
return result;
}
List<SpineReference> spineReferences = new ArrayList<>(spineNodes.getLength());
for (int i = 0; i < spineNodes.getLength(); i++) {
Element spineItem = (Element) spineNodes.item(i);
String itemref = DOMUtil.getAttribute(spineItem, NAMESPACE_OPF, OPFAttributes.idref);
if (StringUtil.isBlank(itemref)) {
Log.e(TAG, "itemref with missing or empty idref"); // XXX
continue;
}
String id = idMapping.get(itemref);
if (id == null) {
id = itemref;
}
Resource resource = resources.getByIdOrHref(id);
if (resource == null) {
Log.e(TAG, "resource with id '" + id + "' not found");
continue;
}
SpineReference spineReference = new SpineReference(resource);
if (OPFValues.no.equalsIgnoreCase(DOMUtil
.getAttribute(spineItem, NAMESPACE_OPF, OPFAttributes.linear))) {
spineReference.setLinear(false);
}
spineReferences.add(spineReference);
}
result.setSpineReferences(spineReferences);
return result;
}
if (tocResource != null) {
return tocResource;
/**
* Creates a spine out of all resources in the resources.
* The generated spine consists of all XHTML pages in order of their href.
*
* @param resources f
* @return a spine created out of all resources in the resources.
*/
private static Spine generateSpineFromResources(Resources resources) {
Spine result = new Spine();
List<String> resourceHrefs = new ArrayList<>(resources.getAllHrefs());
Collections.sort(resourceHrefs, String.CASE_INSENSITIVE_ORDER);
for (String resourceHref : resourceHrefs) {
Resource resource = resources.getByHref(resourceHref);
if (resource.getMediaType() == MediaTypes.NCX) {
result.setTocResource(resource);
} else if (resource.getMediaType() == MediaTypes.XHTML) {
result.addSpineReference(new SpineReference(resource));
}
}
return result;
}
// get the first resource with the NCX mediatype
tocResource = resources.findFirstResourceByMediaType(MediaTypes.NCX);
if (tocResource == null) {
for (String possibleNcxItemId : POSSIBLE_NCX_ITEM_IDS) {
tocResource = resources.getByIdOrHref(possibleNcxItemId);
/**
* The spine tag should contain a 'toc' attribute with as value the resource id of the table of contents resource.
* <p>
* Here we try several ways of finding this table of contents resource.
* We try the given attribute value, some often-used ones and finally look through all resources for the first resource with the table of contents mimetype.
*
* @param tocResourceId g
* @param resources g
* @return the Resource containing the table of contents
*/
static Resource findTableOfContentsResource(String tocResourceId,
Resources resources) {
Resource tocResource;
//一些epub3的文件为了兼容epub2,保留的epub2的目录文件,这里优先选择epub3的xml目录
tocResource = resources.getByProperties("nav");
if (tocResource != null) {
break;
return tocResource;
}
tocResource = resources
.getByIdOrHref(possibleNcxItemId.toUpperCase());
if (StringUtil.isNotBlank(tocResourceId)) {
tocResource = resources.getByIdOrHref(tocResourceId);
}
if (tocResource != null) {
break;
return tocResource;
}
}
}
//For EPUB3
if (tocResource==null){
tocResource=resources.getByProperties("nav");
}
if (tocResource == null) {
Log.e(TAG,
"Could not find table of contents resource. Tried resource with id '"
+ tocResourceId + "', " + Constants.DEFAULT_TOC_ID + ", "
+ Constants.DEFAULT_TOC_ID.toUpperCase()
+ " and any NCX resource.");
}
return tocResource;
}
/**
* Find all resources that have something to do with the coverpage and the cover image.
* Search the meta tags and the guide references
*
* @param packageDocument s
* @return all resources that have something to do with the coverpage and the cover image.
*/
// package
static Set<String> findCoverHrefs(Document packageDocument) {
Set<String> result = new HashSet<>();
// try and find a meta tag with name = 'cover' and a non-blank id
String coverResourceId = DOMUtil
.getFindAttributeValue(packageDocument, NAMESPACE_OPF,
OPFTags.meta, OPFAttributes.name, OPFValues.meta_cover,
OPFAttributes.content);
if (StringUtil.isNotBlank(coverResourceId)) {
String coverHref = DOMUtil
.getFindAttributeValue(packageDocument, NAMESPACE_OPF,
OPFTags.item, OPFAttributes.id, coverResourceId,
OPFAttributes.href);
if (StringUtil.isNotBlank(coverHref)) {
result.add(coverHref);
} else {
result.add(
coverResourceId); // maybe there was a cover href put in the cover id attribute
}
// get the first resource with the NCX mediatype
tocResource = resources.findFirstResourceByMediaType(MediaTypes.NCX);
if (tocResource == null) {
for (String possibleNcxItemId : POSSIBLE_NCX_ITEM_IDS) {
tocResource = resources.getByIdOrHref(possibleNcxItemId);
if (tocResource != null) {
break;
}
tocResource = resources
.getByIdOrHref(possibleNcxItemId.toUpperCase());
if (tocResource != null) {
break;
}
}
}
if (tocResource == null) {
Log.e(TAG,
"Could not find table of contents resource. Tried resource with id '"
+ tocResourceId + "', " + Constants.DEFAULT_TOC_ID + ", "
+ Constants.DEFAULT_TOC_ID.toUpperCase()
+ " and any NCX resource.");
}
return tocResource;
}
// try and find a reference tag with type is 'cover' and reference is not blank
String coverHref = DOMUtil
.getFindAttributeValue(packageDocument, NAMESPACE_OPF,
OPFTags.reference, OPFAttributes.type, OPFValues.reference_cover,
OPFAttributes.href);
if (StringUtil.isNotBlank(coverHref)) {
result.add(coverHref);
/**
* Find all resources that have something to do with the coverpage and the cover image.
* Search the meta tags and the guide references
*
* @param packageDocument s
* @return all resources that have something to do with the coverpage and the cover image.
*/
// package
static Set<String> findCoverHrefs(Document packageDocument) {
Set<String> result = new HashSet<>();
// try and find a meta tag with name = 'cover' and a non-blank id
String coverResourceId = DOMUtil
.getFindAttributeValue(packageDocument, NAMESPACE_OPF,
OPFTags.meta, OPFAttributes.name, OPFValues.meta_cover,
OPFAttributes.content);
if (StringUtil.isNotBlank(coverResourceId)) {
String coverHref = DOMUtil
.getFindAttributeValue(packageDocument, NAMESPACE_OPF,
OPFTags.item, OPFAttributes.id, coverResourceId,
OPFAttributes.href);
if (StringUtil.isNotBlank(coverHref)) {
result.add(coverHref);
} else {
result.add(
coverResourceId); // maybe there was a cover href put in the cover id attribute
}
}
// try and find a reference tag with type is 'cover' and reference is not blank
String coverHref = DOMUtil
.getFindAttributeValue(packageDocument, NAMESPACE_OPF,
OPFTags.reference, OPFAttributes.type, OPFValues.reference_cover,
OPFAttributes.href);
if (StringUtil.isNotBlank(coverHref)) {
result.add(coverHref);
}
return result;
}
return result;
}
/**
* Finds the cover resource in the packageDocument and adds it to the book if found.
@ -418,11 +421,11 @@ public class PackageDocumentReader extends PackageDocumentBase {
}
if (resource.getMediaType() == MediaTypes.XHTML) {
book.setCoverPage(resource);
} else if (MediaTypes.isBitmapImage(resource.getMediaType())) {
book.setCoverImage(resource);
}
} else if (MediaTypes.isBitmapImage(resource.getMediaType())) {
book.setCoverImage(resource);
}
}
}
}
}

@ -2,16 +2,6 @@ package me.ag2s.epublib.epub;
import android.util.Log;
import me.ag2s.epublib.domain.EpubResourceProvider;
import me.ag2s.epublib.domain.LazyResource;
import me.ag2s.epublib.domain.LazyResourceProvider;
import me.ag2s.epublib.domain.MediaType;
import me.ag2s.epublib.domain.MediaTypes;
import me.ag2s.epublib.domain.Resource;
import me.ag2s.epublib.domain.Resources;
import me.ag2s.epublib.util.CollectionUtil;
import me.ag2s.epublib.util.ResourceUtil;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
@ -22,6 +12,16 @@ import java.util.zip.ZipException;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
import me.ag2s.epublib.domain.EpubResourceProvider;
import me.ag2s.epublib.domain.LazyResource;
import me.ag2s.epublib.domain.LazyResourceProvider;
import me.ag2s.epublib.domain.MediaType;
import me.ag2s.epublib.domain.MediaTypes;
import me.ag2s.epublib.domain.Resource;
import me.ag2s.epublib.domain.Resources;
import me.ag2s.epublib.util.CollectionUtil;
import me.ag2s.epublib.util.ResourceUtil;
/**
* Loads Resources from inputStreams, ZipFiles, etc
@ -72,6 +72,12 @@ public class ResourcesLoader {
} else {
resource = ResourceUtil
.createResource(zipEntry, zipFile.getInputStream(zipEntry));
/*掌上书苑有很多自制书OPF的nameSpace格式不标准,强制修复成正确的格式*/
if (href.endsWith("opf")) {
String string = new String(resource.getData()).replace("smlns=\"", "xmlns=\"");
resource.setData(string.getBytes());
}
}
if (resource.getMediaType() == MediaTypes.XHTML) {
@ -123,9 +129,15 @@ public class ResourcesLoader {
if ((zipEntry == null) || zipEntry.isDirectory()) {
continue;
}
String href = zipEntry.getName();
// store resource
Resource resource = ResourceUtil.createResource(zipEntry, zipInputStream);
///*掌上书苑有很多自制书OPF的nameSpace格式不标准,强制修复成正确的格式*/
if (href.endsWith("opf")) {
String string = new String(resource.getData()).replace("smlns=\"", "xmlns=\"");
resource.setData(string.getBytes());
}
if (resource.getMediaType() == MediaTypes.XHTML) {
resource.setInputEncoding(defaultHtmlEncoding);
}

@ -1,5 +1,7 @@
package me.ag2s.epublib.util;
import android.util.Log;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.EOFException;
@ -18,6 +20,7 @@ import java.nio.CharBuffer;
import java.nio.channels.ReadableByteChannel;
import java.nio.charset.Charset;
import me.ag2s.epublib.epub.PackageDocumentReader;
import me.ag2s.epublib.util.commons.io.IOConsumer;
/**
@ -28,6 +31,7 @@ import me.ag2s.epublib.util.commons.io.IOConsumer;
* and using my own implementation saves the inclusion of a 200Kb jar file.
*/
public class IOUtil {
private static final String TAG = IOUtil.class.getName();
/**
* Represents the end-of-file (or stream).
@ -142,11 +146,7 @@ public class IOUtil {
//
public static void copy(InputStream in, OutputStream result) throws IOException {
int buffer=in.available();
if(buffer>IOUtil.DEFAULT_BUFFER_SIZE||buffer==0){
buffer=IOUtil.DEFAULT_BUFFER_SIZE;
}
copy(in, result,buffer);
copy(in, result,DEFAULT_BUFFER_SIZE);
}
/**
@ -450,6 +450,7 @@ public class IOUtil {
output.write(buffer, 0, n);
count += n;
}
//input.close();
}
return count;
}

Loading…
Cancel
Save