From c5840897c3fa41ec571c3c196d180809f4eafd61 Mon Sep 17 00:00:00 2001 From: Johannes Wilm Date: Thu, 5 Dec 2024 12:19:29 +0100 Subject: [PATCH] import DOMExporter from fiduswriter --- .../js/modules/books/exporter/dom_export.js | 293 ++++++++++++++++++ .../js/modules/books/exporter/epub/index.js | 20 +- .../modules/books/exporter/epub/templates.js | 43 +++ .../js/modules/books/exporter/epub/tools.js | 136 ++++++++ .../modules/books/exporter/html/multifile.js | 6 +- .../js/modules/books/exporter/html/tools.js | 30 ++ 6 files changed, 515 insertions(+), 13 deletions(-) create mode 100644 fiduswriter/book/static/js/modules/books/exporter/dom_export.js create mode 100644 fiduswriter/book/static/js/modules/books/exporter/epub/tools.js create mode 100644 fiduswriter/book/static/js/modules/books/exporter/html/tools.js diff --git a/fiduswriter/book/static/js/modules/books/exporter/dom_export.js b/fiduswriter/book/static/js/modules/books/exporter/dom_export.js new file mode 100644 index 0000000..a790e2a --- /dev/null +++ b/fiduswriter/book/static/js/modules/books/exporter/dom_export.js @@ -0,0 +1,293 @@ +import {DOMSerializer} from "prosemirror-model" +import {RenderCitations} from "../../citations/render" +import {get} from "../../common" +import {BIBLIOGRAPHY_HEADERS, CATS} from "../../schema/i18n" + +/* + +WARNING: DEPRECATED! + +Base exporter class for dom-based exports. This is the deprecated way of creating exports. +The epub and html book export filters go over a DOM of a document which they change little +by little, and they are all based on the BaseDOMExporter class. + + New exporters should instead by walking the doc.content tree. + This is how all document exporters work, including the new HTML/EPUB exporter. +*/ + +export class DOMExporter { + constructor(schema, csl, documentStyles) { + this.schema = schema + this.csl = csl + this.documentStyles = documentStyles + + this.fontFiles = [] + this.binaryFiles = [] + this.styleSheets = [{url: staticUrl("css/document.css")}] + } + + addDocStyle(doc) { + const docStyle = this.documentStyles.find( + docStyle => docStyle.slug === doc.settings.documentstyle + ) + + // The files will be in the base directory. The filenames of + // DocumentStyleFiles will therefore not need to replaced with their URLs. + if (!docStyle) { + return + } + let contents = docStyle.contents + docStyle.documentstylefile_set.forEach( + ([_url, filename]) => + (contents = contents.replace( + new RegExp(filename, "g"), + `media/${filename}` + )) + ) + this.styleSheets.push({contents, filename: `css/${docStyle.slug}.css`}) + this.fontFiles = this.fontFiles.concat( + docStyle.documentstylefile_set.map(([url, filename]) => ({ + filename: `css/media/${filename}`, + url + })) + ) + } + + loadStyles() { + const p = [] + this.styleSheets.forEach(sheet => { + if (sheet.url) { + p.push( + get(sheet.url) + .then(response => response.text()) + .then(response => { + sheet.contents = response + sheet.filename = `css/${sheet.url.split("/").pop().split("?")[0]}` + delete sheet.url + }) + ) + } + }) + return Promise.all(p) + } + + joinDocumentParts() { + this.schema.cached.imageDB = this.imageDB + const serializer = DOMSerializer.fromSchema(this.schema) + this.content = serializer.serializeNode( + this.schema.nodeFromJSON(this.docContent) + ) + + this.addFootnotes() + const bibliographyHeader = + this.doc.settings.bibliography_header[this.doc.settings.language] || + BIBLIOGRAPHY_HEADERS[this.doc.settings.language] + const citRenderer = new RenderCitations( + this.content, + this.doc.settings.citationstyle, + bibliographyHeader, + this.bibDB, + this.csl + ) + return citRenderer.init().then(() => { + this.addBibliographyHTML(citRenderer.fm.bibHTML) + this.cleanHTML(citRenderer.fm) + this.addCategoryLabels(this.doc.settings.language) + return Promise.resolve() + }) + } + + addCategoryLabels(language) { + this.content + .querySelectorAll("figcaption span.label,caption span.label") + .forEach(el => { + const category = el.parentElement.parentElement.dataset.category + el.innerHTML = + category === "none" ? "" : CATS[category][language] + }) + } + + addBibliographyHTML(bibliographyHTML) { + if (bibliographyHTML.length > 0) { + const tempNode = document.createElement("div") + tempNode.innerHTML = bibliographyHTML + while (tempNode.firstChild) { + const footnotesContainer = + this.content.querySelector("section.fnlist") + this.content.insertBefore( + tempNode.firstChild, + footnotesContainer + ) + } + } + } + + replaceImgSrc(htmlString) { + htmlString = htmlString.replace( + /<(img|IMG) data-src([^>]+)>/gm, + "<$1 src$2>" + ) + return htmlString + } + // Replace all instances of the before string in all descendant textnodes of + // node. + replaceText(node, before, after) { + if (node.nodeType === 1) { + ;[].forEach.call(node.childNodes, child => + this.replaceText(child, before, after) + ) + } else if (node.nodeType === 3) { + node.textContent = node.textContent.replace( + new window.RegExp(before, "g"), + after + ) + } + } + + cleanNode(node) { + if (node.contentEditable === "true") { + node.removeAttribute("contentEditable") + } + if (node.children) { + Array.from(node.children).forEach(childNode => + this.cleanNode(childNode) + ) + } + } + + getFootnoteAnchor(counter) { + const footnoteAnchor = document.createElement("a") + footnoteAnchor.setAttribute("href", "#fn" + counter) + // RASH 0.5 doesn't mark the footnote anchors, so we add this class + footnoteAnchor.classList.add("fn") + return footnoteAnchor + } + + addFootnotes() { + // Replace the footnote markers with anchors and put footnotes with contents + // at the back of the document. + // Also, link the footnote anchor with the footnote according to + // https://rawgit.com/essepuntato/rash/master/documentation/index.html#footnotes. + const footnotes = this.content.querySelectorAll(".footnote-marker") + const footnotesContainer = document.createElement("section") + footnotesContainer.classList.add("fnlist") + footnotesContainer.setAttribute("role", "doc-footnotes") + + footnotes.forEach((footnote, index) => { + const counter = index + 1 + const footnoteAnchor = this.getFootnoteAnchor(counter) + footnote.parentNode.replaceChild(footnoteAnchor, footnote) + const newFootnote = document.createElement("section") + newFootnote.id = "fn" + counter + newFootnote.setAttribute("role", "doc-footnote") + newFootnote.innerHTML = footnote.dataset.footnote + footnotesContainer.appendChild(newFootnote) + }) + this.content.appendChild(footnotesContainer) + } + + moveCitationFootnotes(citationFormatter) { + const footnotes = this.content.querySelectorAll("a.fn, .citation") + const footnotesContainer = this.content.querySelector("section.fnlist") + const fnCountOffset = this.content.querySelectorAll("a.fn").length + + if (footnotes.length === fnCountOffset) { + // There are no citations to move + return + } + + let counter = 0 + + footnotes.forEach((footnote, index) => { + if (footnote.matches("a.fn")) { + // Regular footnote - skip + return + } + if (footnote.matches("section.fnlist .citation")) { + // The citation is already in a footnote. Do not add a second footnote. + footnote.innerHTML = + citationFormatter.citationTexts[counter] || " " + counter += 1 + return + } + const id = fnCountOffset + counter + 1 + const footnoteAnchor = this.getFootnoteAnchor(id) + footnote.parentNode.replaceChild(footnoteAnchor, footnote) + const newFootnote = document.createElement("section") + newFootnote.id = "fn" + id + newFootnote.setAttribute("role", "doc-footnote") + newFootnote.innerHTML = `

${ + citationFormatter.citationTexts[counter] || " " + }

` + footnotesContainer.insertBefore( + newFootnote, + footnotesContainer.childNodes[index] + ) + counter += 1 + }) + } + + cleanHTML(citationFormatter) { + if (citationFormatter.citationType === "note") { + this.moveCitationFootnotes(citationFormatter) + } + + this.cleanNode(this.content) + + // Replace nbsp spaces with normal ones + this.replaceText(this.content, " ", " ") + + this.content.querySelectorAll(".comment").forEach(el => { + el.insertAdjacentHTML("afterend", el.innerHTML) + el.parentElement.removeChild(el) + }) + + this.content.querySelectorAll(".citation").forEach(el => { + delete el.dataset.references + delete el.dataset.bibs + delete el.dataset.format + }) + this.content.querySelectorAll("img").forEach(el => { + delete el.dataset.image + delete el.dataset.imageSrc + }) + + this.content.querySelectorAll("table").forEach(el => { + delete el.dataset.captionHidden + }) + + this.content + .querySelectorAll( + "figcaption span.text:empty,caption span.text:empty" + ) + .forEach(el => { + el.parentElement.removeChild(el) + }) + + this.content.querySelectorAll(".cross-reference").forEach(el => { + el.innerHTML = `${el.innerHTML}` + }) + } + + // Fill the contents of table of contents. + fillToc() { + const headlines = Array.from( + this.content.querySelectorAll("h1,h2,h3,h4,h5,h6") + ) + const tocs = Array.from( + this.content.querySelectorAll("div.table-of-contents") + ) + tocs.forEach(toc => { + toc.innerHTML += headlines + .map(headline => { + if (!headline.id || !headline.textContent.length) { + // ignore the tocs own headlines + return "" + } + const tagName = headline.tagName.toLowerCase() + return `<${tagName}>${headline.innerHTML}` + }) + .join("") + }) + } +} diff --git a/fiduswriter/book/static/js/modules/books/exporter/epub/index.js b/fiduswriter/book/static/js/modules/books/exporter/epub/index.js index 5592a31..d9b56de 100644 --- a/fiduswriter/book/static/js/modules/books/exporter/epub/index.js +++ b/fiduswriter/book/static/js/modules/books/exporter/epub/index.js @@ -2,13 +2,7 @@ import download from "downloadjs" import pretty from "pretty" import {DOMSerializer} from "prosemirror-model" -import { - addCategoryLabels, - getTimestamp, - orderLinks, - setLinks, - styleEpubFootnotes -} from "../../../exporter/epub/tools" +import {getTimestamp} from "../../../exporter/epub/tools" import {DOMExporter} from "../../../exporter/tools/dom_export" import {mathliveOpfIncludes} from "../../../mathlive/opf_includes" import {BIBLIOGRAPHY_HEADERS} from "../../../schema/i18n" @@ -18,8 +12,15 @@ import { epubBookCopyrightTemplate, epubBookCoverTemplate, epubBookOpfTemplate, - epubBookTitlepageTemplate + epubBookTitlepageTemplate, + xhtmlTemplate } from "./templates" +import { + addCategoryLabels, + orderLinks, + setLinks, + styleEpubFootnotes +} from "./tools" import {RenderCitations} from "../../../citations/render" import {addAlert} from "../../../common" @@ -27,8 +28,7 @@ import { containerTemplate, navTemplate, ncxItemTemplate, - ncxTemplate, - xhtmlTemplate + ncxTemplate } from "../../../exporter/epub/templates" import {removeHidden} from "../../../exporter/tools/doc_content" import {createSlug} from "../../../exporter/tools/file" diff --git a/fiduswriter/book/static/js/modules/books/exporter/epub/templates.js b/fiduswriter/book/static/js/modules/books/exporter/epub/templates.js index ef99e45..44172d6 100644 --- a/fiduswriter/book/static/js/modules/books/exporter/epub/templates.js +++ b/fiduswriter/book/static/js/modules/books/exporter/epub/templates.js @@ -2,6 +2,49 @@ import {escapeText, localizeDate} from "../../../common" import {LANGUAGES} from "../../../schema/const" import {bookTerm} from "../../i18n" +/** A template for a document in an epub. */ +export const xhtmlTemplate = ({ + shortLang, + title, + math, + styleSheets, + part, + currentPart, + body, + copyright +}) => + ` + + + ${copyright && copyright.holder ? `` : ""} + ${escapeText(title)} +${ + math + ? '\n' + : "" +} +${styleSheets + .map( + sheet => + `\n` + ) + .join("")} + + ${ + part && part.length ? `

${escapeText(part)}

` : "" + }${body}${ + copyright && copyright.holder + ? `
© ${copyright.year ? copyright.year : new Date().getFullYear()} ${copyright.holder}
` + : "" + } + ${ + copyright && copyright.licenses.length + ? `
${copyright.licenses.map(license => `${escapeText(license.title)}${license.start ? ` (${license.start})` : ""}`).join("
")}
` + : "" + } +` + /** A template to create the OPF file of book epubs. */ export const epubBookOpfTemplate = ({ book, diff --git a/fiduswriter/book/static/js/modules/books/exporter/epub/tools.js b/fiduswriter/book/static/js/modules/books/exporter/epub/tools.js new file mode 100644 index 0000000..8e5d11e --- /dev/null +++ b/fiduswriter/book/static/js/modules/books/exporter/epub/tools.js @@ -0,0 +1,136 @@ +import {CATS} from "../../../schema/i18n" + +export function styleEpubFootnotes(htmlEl) { + // Converts RASH style footnotes into epub footnotes + const fnListEl = htmlEl.querySelector("section.fnlist") + if (!fnListEl) { + // There are no footnotes. + return htmlEl + } + fnListEl.setAttribute("role", "doc-endnotes") + const footnotes = fnListEl.querySelectorAll("section[role=doc-footnote]") + let footnoteCounter = 1 + footnotes.forEach(footnote => { + const newFootnote = document.createElement("aside") + newFootnote.setAttribute("epub:type", "footnote") + newFootnote.id = footnote.id + if (footnote.firstChild) { + while (footnote.firstChild) { + newFootnote.appendChild(footnote.firstChild) + } + newFootnote.firstChild.innerHTML = + footnoteCounter + " " + newFootnote.firstChild.innerHTML + } else { + newFootnote.innerHTML = "

" + footnoteCounter + "

" + } + + footnote.parentNode.replaceChild(newFootnote, footnote) + footnoteCounter++ + }) + const footnoteMarkers = htmlEl.querySelectorAll("a.fn") + let footnoteMarkerCounter = 1 + footnoteMarkers.forEach(fnMarker => { + const newFnMarker = document.createElement("sup") + const newFnMarkerLink = document.createElement("a") + newFnMarkerLink.setAttribute("epub:type", "noteref") + newFnMarkerLink.setAttribute("href", fnMarker.getAttribute("href")) + newFnMarkerLink.innerHTML = footnoteMarkerCounter + newFnMarker.appendChild(newFnMarkerLink) + fnMarker.parentNode.replaceChild(newFnMarker, fnMarker) + footnoteMarkerCounter++ + }) + + return htmlEl +} + +export function setLinks(htmlEl, docNum = 0) { + const contentItems = [] + let title + let idCount = 0 + + htmlEl.querySelectorAll("div.doc-title,h1,h2,h3,h4,h5,h6").forEach(el => { + title = el.textContent.trim() + if (title !== "" || el.classList.contains("doc-title")) { + const contentItem = {} + contentItem.title = title + contentItem.level = el.classList.contains("doc-title") + ? 0 + : Number.parseInt(el.tagName.substring(1, 2)) + if (docNum) { + contentItem.docNum = docNum + } + if (!el.id) { + // The element has no ID, so we add one. + el.id = `_${docNum}_${idCount++}` + } + contentItem.id = el.id + contentItems.push(contentItem) + } + }) + return contentItems +} + +export function orderLinks(contentItems) { + for (let i = 0; i < contentItems.length; i++) { + contentItems[i].subItems = [] + if (i > 0) { + for (let j = i - 1; j > -1; j--) { + if (contentItems[j].level < contentItems[i].level) { + contentItems[j].subItems.push(contentItems[i]) + contentItems[i].delete = true + break + } + } + } + } + + for (let i = contentItems.length; i > -1; i--) { + if (contentItems[i]?.delete) { + delete contentItems[i].delete + contentItems.splice(i, 1) + } + } + return contentItems +} + +export function addCategoryLabels(htmlEl, language, footnote = false) { + // Due to lacking CSS support in ereaders, figure numbers need to be hardcoded. + htmlEl + .querySelectorAll( + "figure[data-category='figure'] figcaption span.label" + ) + .forEach((el, index) => { + const suffix = el.parentElement.innerText.trim().length ? ": " : "" + el.innerHTML = `${CATS["figure"][language]} ${index + 1}${footnote ? "A" : ""}${suffix}` + el.classList.remove("label") + }) + + htmlEl + .querySelectorAll( + "figure[data-category='equation'] figcaption span.label" + ) + .forEach((el, index) => { + const suffix = el.parentElement.innerText.trim().length ? ": " : "" + el.innerHTML = `${CATS["equation"][language]} ${index + 1}${footnote ? "A" : ""}${suffix}` + el.classList.remove("label") + }) + + htmlEl + .querySelectorAll("figure[data-category='photo'] figcaption span.label") + .forEach((el, index) => { + const suffix = el.parentElement.innerText.trim().length ? ": " : "" + el.innerHTML = `${CATS["photo"][language]} ${index + 1}${footnote ? "A" : ""}${suffix}` + el.classList.remove("label") + }) + + htmlEl + .querySelectorAll( + "figure[data-category='table'] figcaption span.label,table[data-category='table'] caption span.label" + ) + .forEach((el, index) => { + const suffix = el.parentElement.innerText.trim().length ? ": " : "" + el.innerHTML = `${CATS["table"][language]} ${index + 1}${footnote ? "A" : ""}${suffix}` + el.classList.remove("label") + }) + return htmlEl +} diff --git a/fiduswriter/book/static/js/modules/books/exporter/html/multifile.js b/fiduswriter/book/static/js/modules/books/exporter/html/multifile.js index c9c543d..73da04e 100644 --- a/fiduswriter/book/static/js/modules/books/exporter/html/multifile.js +++ b/fiduswriter/book/static/js/modules/books/exporter/html/multifile.js @@ -4,16 +4,16 @@ import {DOMSerializer} from "prosemirror-model" import {RenderCitations} from "../../../citations/render" import {addAlert} from "../../../common" -import {orderLinks, setLinks} from "../../../exporter/epub/tools" import {removeHidden} from "../../../exporter/tools/doc_content" -import {DOMExporter} from "../../../exporter/tools/dom_export" import {createSlug} from "../../../exporter/tools/file" -import {modifyImages} from "../../../exporter/tools/html" import {ZipFileCreator} from "../../../exporter/tools/zip" import {LANGUAGES} from "../../../schema/const" import {BIBLIOGRAPHY_HEADERS, CATS} from "../../../schema/i18n" +import {DOMExporter} from "../dom_export" +import {orderLinks, setLinks} from "../epub/tools" import {getMissingChapterData, uniqueObjects} from "../tools" import {htmlBookExportTemplate, htmlBookIndexTemplate} from "./templates" +import {modifyImages} from "./tools" export class HTMLBookExporter extends DOMExporter { constructor(schema, csl, bookStyles, book, user, docList, updated) { diff --git a/fiduswriter/book/static/js/modules/books/exporter/html/tools.js b/fiduswriter/book/static/js/modules/books/exporter/html/tools.js new file mode 100644 index 0000000..c0c3a7d --- /dev/null +++ b/fiduswriter/book/static/js/modules/books/exporter/html/tools.js @@ -0,0 +1,30 @@ +export const modifyImages = htmlEl => { + const imageLinks = htmlEl.querySelectorAll("img"), + images = [] + + imageLinks.forEach((el, index) => { + const src = el.getAttribute("src").split("?")[0] + let filename = `images/${src.split("/").pop()}` + // JPGs are output as PNG elements as well. + if (filename === "images/") { + // name was not retrievable so we give the image a unique numerical + // name like 1.png, 2.jpg, 3.svg, etc. . + filename = `images/${index}` + } + + const newImg = document.createElement("img") + // We set the src of the image as "data-src" for now so that the browser + // won't try to load the file immediately + newImg.setAttribute("data-src", filename) + el.parentNode.replaceChild(newImg, el) + + if (!images.find(image => image.filename === filename)) { + images.push({ + filename, + url: src + }) + } + }) + + return images +}