Skip to content
Snippets Groups Projects
Commit 7c324b4b authored by Fred Chasen's avatar Fred Chasen
Browse files

Merge branch 'pdf-outline' into 'master'

Added the ability to generate outlines for PDFs

See merge request tools/pagedjs-cli!5
parents 2e995afc df0d10bd
No related branches found
No related tags found
1 merge request!5Added the ability to generate outlines for PDFs
Pipeline #30141 failed with stages
in 2 seconds
...@@ -32,6 +32,7 @@ pagedjs-cli ./path/to/index.html -o result.pdf ...@@ -32,6 +32,7 @@ pagedjs-cli ./path/to/index.html -o result.pdf
-ho, --hypher_only [str] Only hyphenate passed elements selector, such as ".hyphenate, aside" -ho, --hypher_only [str] Only hyphenate passed elements selector, such as ".hyphenate, aside"
-e, --encoding [type] Set the encoding of the input html, defaults to "utf-8" -e, --encoding [type] Set the encoding of the input html, defaults to "utf-8"
-t, --timeout [ms] Set a max timeout of [ms] -t, --timeout [ms] Set a max timeout of [ms]
--outline-tags [tags] Specifies that an outline should be generated for the resulting PDF document. [tags] specifies which HTML tags should be considered for that outline. "h1,h2" will trigger an outline with "h1" tags as root elements and "h2" elements as their childs.
``` ```
## Hyphenation ## Hyphenation
......
...@@ -26,6 +26,11 @@ program ...@@ -26,6 +26,11 @@ program
.option('-t, --timeout [ms]', 'Set a max timeout of [ms]') .option('-t, --timeout [ms]', 'Set a max timeout of [ms]')
.option('-x, --html', 'output html file') .option('-x, --html', 'output html file')
.option('-b, --blockLocal', 'Disallow access to filesystem for local files') .option('-b, --blockLocal', 'Disallow access to filesystem for local files')
.option('--outline-tags [tags]', 'Specifies that an outline should be ' +
'generated for the resulting PDF document. [tags] specifies which ' +
'HTML tags should be considered for that outline. ' +
'"h1,h2" will trigger an outline with "h1" tags as root elements ' +
'and "h2" elements as their childs.')
.parse(process.argv); .parse(process.argv);
...@@ -122,6 +127,7 @@ if (typeof input === "string") { ...@@ -122,6 +127,7 @@ if (typeof input === "string") {
file = await printer.html(input, options); file = await printer.html(input, options);
output = replaceExt(output, '.html'); output = replaceExt(output, '.html');
} else { } else {
options.outlineTags = !program.outlineTags ? [] : program.outlineTags.split(',');
file = await printer.pdf(input, options); file = await printer.pdf(input, options);
} }
} else { } else {
......
...@@ -203,6 +203,100 @@ class PostProcesser extends EventEmitter { ...@@ -203,6 +203,100 @@ class PostProcesser extends EventEmitter {
console.log(page); console.log(page);
} }
/**
* Adds a table of content to the generated PDF
*
* Ideally this would not be required if Chromium would add this directly.
* So if these bugs are closed this can probably be removed again:
* - https://bugs.chromium.org/p/chromium/issues/detail?id=840455
* - https://github.com/GoogleChrome/puppeteer/issues/1778
*
* This code is heavily based on @Hopding's comment at:
* https://github.com/Hopding/pdf-lib/issues/127#issuecomment-502450179
*/
addOutline(outlineSpec) {
const outline = JSON.parse(JSON.stringify(outlineSpec))
const pageRefs = [];
this.pdfDoc.catalog.Pages.traverse((kid, ref) => {
if (kid instanceof PDFLib.PDFPage)
pageRefs.push(ref);
});
const index = this.pdfDoc.index;
const outlineReference = index.nextObjectNumber();
const countOutlineLayer = (layer) => {
let count = 0;
for (const outlineEntry of layer) {
++count;
count += countOutlineLayer(outlineEntry.children);
}
return count;
}
const createItemsForOutlineLayer = (layer, parent) => {
layer.forEach((outlineItem, i) => {
let prev = i > 0 ? layer[i - 1].ref : null;
let next = i < layer.length - 1 ? layer[i + 1].ref : null;
const pdfItem = createOutlineItem(outlineItem, prev, next, parent);
index.assign(outlineItem.ref, pdfItem);
});
}
const createOutlineItem = (outlineItem, prev, next, parent) => {
if (!outlineItem.id) {
throw new Error(`Cannot generate outline item with title '${outlineItem.title} ` +
`without any target anchor. Please specify an 'id' attribute for ` +
`the relevant HTML element`);
}
const item = {
Title: PDFLib.PDFString.fromString(outlineItem.title),
Parent: parent,
Dest: PDFLib.PDFName.from(outlineItem.id),
};
if (prev) {
item.Prev = prev;
}
if (next) {
item.Next = next;
}
if (outlineItem.children.length > 0) {
item.First = outlineItem.children[0].ref;
item.Last = outlineItem.children[outlineItem.children.length - 1].ref;
item.Count = PDFLib.PDFNumber.fromNumber(countOutlineLayer(outlineItem.children));
createItemsForOutlineLayer(outlineItem.children, outlineItem.ref);
}
return PDFLib.PDFDictionary.from(item, index);
};
const createOutlineReferences = (outlineEntry) => {
outlineEntry.ref = index.nextObjectNumber();
for (const child of outlineEntry.children) {
createOutlineReferences(child);
}
}
for (const outlineItem of outline) {
createOutlineReferences(outlineItem);
}
createItemsForOutlineLayer(outline, outlineReference);
const pdfOutline = PDFLib.PDFDictionary.from(
{
First: outline[0].ref,
Last: outline[outline.length - 1].ref,
Count: PDFLib.PDFNumber.fromNumber(countOutlineLayer(outline)),
},
index,
);
index.assign(outlineReference, pdfOutline);
this.pdfDoc.catalog.set('Outlines', outlineReference);
}
save() { save() {
let writer = new PDFDocumentWriter(); let writer = new PDFDocumentWriter();
const pdfBytes = writer.saveToBytesWithXRefTable(this.pdfDoc); const pdfBytes = writer.saveToBytesWithXRefTable(this.pdfDoc);
......
...@@ -182,6 +182,54 @@ class Printer extends EventEmitter { ...@@ -182,6 +182,54 @@ class Printer extends EventEmitter {
return page; return page;
} }
async _parseOutline(page, tags) {
return await page.evaluate((tags) => {
const tagsToProcess = [];
for (const node of document.querySelectorAll(tags.join(','))) {
tagsToProcess.push(node);
}
tagsToProcess.reverse();
const root = {children: [], depth: -1};
let currentOutlineNode = root;
while (tagsToProcess.length > 0) {
const tag = tagsToProcess.pop();
const orderDepth = tags.indexOf(tag.tagName.toLowerCase());
if (orderDepth < currentOutlineNode.depth) {
currentOutlineNode = currentOutlineNode.parent;
tagsToProcess.push(tag);
} else {
const newNode = {
title: tag.innerText,
id: tag.id,
children: [],
depth: orderDepth,
};
if (orderDepth == currentOutlineNode.depth) {
newNode.parent = currentOutlineNode.parent;
currentOutlineNode.parent.children.push(newNode);
currentOutlineNode = newNode;
} else if (orderDepth > currentOutlineNode.depth) {
newNode.parent = currentOutlineNode;
currentOutlineNode.children.push(newNode);
currentOutlineNode = newNode;
}
}
}
const stripParentProperty = (node) => {
node.parent = undefined;
for (const child of node.children) {
stripParentProperty(child);
}
}
stripParentProperty(root)
return root.children;
}, tags);
}
async pdf(input, options={}) { async pdf(input, options={}) {
let page = await this.render(input); let page = await this.render(input);
...@@ -201,6 +249,8 @@ class Printer extends EventEmitter { ...@@ -201,6 +249,8 @@ class Printer extends EventEmitter {
return meta; return meta;
}); });
const outline = options.outlineTags.length > 0 ? await this._parseOutline(page, options.outlineTags) : null;
let settings = { let settings = {
printBackground: true, printBackground: true,
displayHeaderFooter: false, displayHeaderFooter: false,
...@@ -228,6 +278,9 @@ class Printer extends EventEmitter { ...@@ -228,6 +278,9 @@ class Printer extends EventEmitter {
let post = new PostProcesser(pdf); let post = new PostProcesser(pdf);
post.metadata(meta); post.metadata(meta);
post.boxes(this.pages); post.boxes(this.pages);
if (outline) {
post.addOutline(outline);
}
pdf = post.save(); pdf = post.save();
return pdf; return pdf;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment