@daisy/epub-utils
Advanced tools
Comparing version 1.2.7-alpha.3 to 1.2.7-alpha.4
@@ -50,2 +50,26 @@ // input: unzipped book directory | ||
// Select the ToC | ||
const select = xpath.useNamespaces({ | ||
html: 'http://www.w3.org/1999/xhtml', | ||
epub: 'http://www.idpf.org/2007/ops' | ||
}); | ||
const sPageList = select('//html:nav[@epub:type="page-list"]', doc); | ||
const hasPageList = sPageList.length > 0; | ||
let pageListHrefs = undefined; | ||
if (hasPageList) { | ||
const arr1 = select('descendant::html:a/@href', sPageList[0]); | ||
pageListHrefs = arr1.map(o => decodeURI(o.nodeValue)); | ||
// console.log(arr1.length, JSON.stringify(pageListHrefs, null, 4)); | ||
} | ||
let tocHrefs = undefined; | ||
const sTOC = select('//html:nav[@epub:type="toc"]/html:ol', doc); | ||
if (sTOC[0]) { | ||
const arr2 = select('descendant::html:a/@href', sTOC[0]); | ||
tocHrefs = arr2.map(o => decodeURI(o.nodeValue)); | ||
// console.log(arr2.length, JSON.stringify(tocHrefs, null, 4)); | ||
} | ||
// Remove all links | ||
@@ -60,13 +84,9 @@ const aElems = doc.getElementsByTagNameNS('http://www.w3.org/1999/xhtml', 'a'); | ||
// Select the ToC | ||
const select = xpath.useNamespaces({ | ||
html: 'http://www.w3.org/1999/xhtml', | ||
epub: 'http://www.idpf.org/2007/ops' | ||
}); | ||
const toc = select('//html:nav' + '[@epub:type="toc"]/html:ol', doc); | ||
const tocHTML = new XMLSerializer().serializeToString(toc[0]); | ||
const hasPageList = select('//html:nav' + '[@epub:type="page-list"]', doc).length > 0; | ||
const tocHTML = new XMLSerializer().serializeToString(sTOC[0]); | ||
// console.log(tocHTML); | ||
return { | ||
src: path.relative(epubDir, fullpath), | ||
pageListHrefs, | ||
tocHrefs, | ||
tocHTML, | ||
@@ -78,2 +98,8 @@ hasPageList | ||
function addMeta(name, value, meta) { | ||
name = name.trim(); | ||
value = value.trim(); | ||
if (!name || !value) { | ||
// empty strings => bail out | ||
return; | ||
} | ||
if (!meta[name]) { | ||
@@ -110,2 +136,4 @@ meta[name] = value; | ||
function addLink(rel, href, link) { | ||
rel = rel.trim(); | ||
href = href.trim(); | ||
if (!link[rel]) { | ||
@@ -142,5 +170,7 @@ link[rel] = href; | ||
reject(new Error("Package document not found.")); | ||
return; | ||
} | ||
this.packageDoc = { | ||
src: path.relative(epubDir, packageDocPath) | ||
src: path.relative(epubDir, packageDocPath), | ||
path: packageDocPath | ||
}; | ||
@@ -156,3 +186,9 @@ this.parseData(packageDocPath, epubDir); | ||
const select = xpath.useNamespaces({ opf: 'http://www.idpf.org/2007/opf', | ||
dc: 'http://purl.org/dc/elements/1.1/' }); | ||
dc: 'http://purl.org/dc/elements/1.1/', | ||
xml: 'http://www.w3.org/XML/1998/namespace' | ||
}); | ||
const langAttr = select('/opf:package/@xml:lang', doc)[0]; | ||
this.opfLang = langAttr ? langAttr.nodeValue : undefined; | ||
this.metadata = parseMetadata(doc, select); | ||
@@ -172,4 +208,7 @@ this.links = parseLinks(doc, select); | ||
spineItem.filepath = path.join(path.dirname(packageDocPath), spineItem.relpath); | ||
spineItem.title = this.parseContentDocTitle(spineItem.filepath); | ||
const o = this.parseContentDocTitleAndIds(spineItem.filepath); | ||
spineItem.title = o.titleText; | ||
spineItem.targetIDs = o.docIds; | ||
// does encodeURI() as per https://tools.ietf.org/html/rfc3986#section-3.3 in a nutshell: encodeURI(`file://${tmpFile}`).replace(/[?#]/g, encodeURIComponent) | ||
@@ -187,2 +226,12 @@ spineItem.url = fileUrl(spineItem.filepath); | ||
} | ||
const moAttr = manifestItem[0].getAttribute('media-overlay'); | ||
const smilManifestItem = select(`/opf:package/opf:manifest/opf:item[@id='${moAttr}']`, doc); | ||
if (smilManifestItem.length > 0) { | ||
spineItem.mediaOverlay = {}; | ||
spineItem.mediaOverlay.smilRelPath = decodeURI(smilManifestItem[0].getAttribute('href')); | ||
spineItem.mediaOverlay.smilFilePath = path.join(path.dirname(packageDocPath), spineItem.mediaOverlay.smilRelPath); | ||
// spineItem.mediaOverlay.smilUrl = fileUrl(spineItem.mediaOverlay.smilFilePath); | ||
spineItem.mediaOverlay.smilRefs = this.parseSmilRefs(spineItem.mediaOverlay.smilFilePath); | ||
} | ||
} else if (!this.hasSVGContentDocuments && 'image/svg+xml' === contentType) { | ||
@@ -201,2 +250,6 @@ winston.warn('The SVG Content Documents in this EPUB will be ignored.'); | ||
this.navDoc.relpath = navDocPath; | ||
this.navDoc.filepath = navDocFullPath; | ||
this.navDoc.url = fileUrl(this.navDoc.filepath); | ||
if (spineContainsNavDoc) { | ||
@@ -211,5 +264,11 @@ if (spineContainsNavDoc.filepath !== navDocFullPath) { | ||
spi.filepath = navDocFullPath; | ||
spi.title = this.parseContentDocTitle(spi.filepath); | ||
const o = this.parseContentDocTitleAndIds(spi.filepath); | ||
spi.title = o.titleText; | ||
spi.targetIDs = o.docIds; | ||
spi.url = fileUrl(spi.filepath); | ||
spi.notInReadingOrder = true; | ||
this.contentDocs.push(spi); | ||
@@ -224,4 +283,30 @@ } | ||
EpubParser.prototype.parseContentDocTitle = function (filepath) { | ||
EpubParser.prototype.parseSmilRefs = function (filepath) { | ||
const content = fs.readFileSync(filepath).toString(); | ||
const doc = new DOMParser({ errorHandler }).parseFromString(content, 'application/xml'); | ||
const select = xpath.useNamespaces({ smil: "http://www.w3.org/ns/SMIL", epub: "http://www.idpf.org/2007/ops" }); | ||
const arr = select('//smil:text[@src]', doc); | ||
let smilRefs = arr.map(o => { | ||
let epubType = o.parentNode ? o.parentNode.getAttributeNS('http://www.idpf.org/2007/ops', 'type') : undefined; | ||
if (epubType) { | ||
epubType = epubType.trim(); | ||
} | ||
if (!epubType) { | ||
epubType = undefined; | ||
} | ||
const src = o.getAttribute("src"); | ||
return { | ||
src, | ||
full: path.join(path.dirname(filepath), src), | ||
epubType | ||
}; | ||
}); | ||
// console.log(arr.length, JSON.stringify(smilRefs, null, 4)); | ||
return smilRefs; | ||
}; | ||
EpubParser.prototype.parseContentDocTitleAndIds = function (filepath) { | ||
const content = fs.readFileSync(filepath).toString(); | ||
// not application/xhtml+xml because: | ||
@@ -234,7 +319,23 @@ // https://github.com/jindw/xmldom/pull/208 | ||
const title = select('/html:html/html:head/html:title/text()', doc); | ||
if (title.length > 0) { | ||
return title[0].nodeValue; | ||
} else { | ||
return ""; | ||
} | ||
let titleText = title.length > 0 ? title[0].nodeValue : ""; | ||
const arr = select('//*[@id]', doc); | ||
let docIds = arr.map(o => { | ||
let epubType = o.getAttributeNS('http://www.idpf.org/2007/ops', 'type'); | ||
if (epubType) { | ||
epubType = epubType.trim(); | ||
} | ||
if (!epubType) { | ||
epubType = undefined; | ||
} | ||
return { | ||
id: o.getAttribute("id"), | ||
epubType | ||
}; | ||
}); | ||
// console.log(arr.length, JSON.stringify(docIds, null, 4)); | ||
return { | ||
titleText, docIds | ||
}; | ||
}; | ||
@@ -241,0 +342,0 @@ |
{ | ||
"name": "@daisy/epub-utils", | ||
"version": "1.2.7-alpha.3", | ||
"version": "1.2.7-alpha.4", | ||
"engines": { | ||
@@ -31,3 +31,3 @@ "node": ">=12.0.0", | ||
"tmp": "^0.2.1", | ||
"winston": "^3.7.2", | ||
"winston": "^3.8.1", | ||
"xmldom": "^0.6.0", | ||
@@ -34,0 +34,0 @@ "xpath": "^0.0.32" |
18513
465
Updatedwinston@^3.8.1