Skip to content
Snippets Groups Projects
Commit df0d10bd authored by Erik Schilling's avatar Erik Schilling
Browse files

Added the ability to generate outlines for PDFs

--outline-tags allows to specify the HTML tags which should be
considered for the outline. The tags are expected to be given in
order of hierachy, for example, 'h1,h2' will trigger a generation
with h1 elements as top level outline entries and h2 as their
childs.

Ideally this would not be required if Chromium would add
this directly. So if these bugs are closed this can probably be
removed again:
- https://bugs.chromium.org/p/chromium/issues/detail?id=840455
- https://github.com/GoogleChrome/puppeteer/issues/1778

This code is heavily based on @Hopding's comment at:
https://github.com/Hopding/pdf-lib/issues/127#issuecomment-502450179
parent 2e995afc
No related branches found
No related tags found
1 merge request!5Added the ability to generate outlines for PDFs
...@@ -32,6 +32,7 @@ pagedjs-cli ./path/to/index.html -o result.pdf ...@@ -32,6 +32,7 @@ pagedjs-cli ./path/to/index.html -o result.pdf
-ho, --hypher_only [str] Only hyphenate passed elements selector, such as ".hyphenate, aside" -ho, --hypher_only [str] Only hyphenate passed elements selector, such as ".hyphenate, aside"
-e, --encoding [type] Set the encoding of the input html, defaults to "utf-8" -e, --encoding [type] Set the encoding of the input html, defaults to "utf-8"
-t, --timeout [ms] Set a max timeout of [ms] -t, --timeout [ms] Set a max timeout of [ms]
--outline-tags [tags] Specifies that an outline should be generated for the resulting PDF document. [tags] specifies which HTML tags should be considered for that outline. "h1,h2" will trigger an outline with "h1" tags as root elements and "h2" elements as their childs.
``` ```
## Hyphenation ## Hyphenation
......
...@@ -26,6 +26,11 @@ program ...@@ -26,6 +26,11 @@ program
.option('-t, --timeout [ms]', 'Set a max timeout of [ms]') .option('-t, --timeout [ms]', 'Set a max timeout of [ms]')
.option('-x, --html', 'output html file') .option('-x, --html', 'output html file')
.option('-b, --blockLocal', 'Disallow access to filesystem for local files') .option('-b, --blockLocal', 'Disallow access to filesystem for local files')
.option('--outline-tags [tags]', 'Specifies that an outline should be ' +
'generated for the resulting PDF document. [tags] specifies which ' +
'HTML tags should be considered for that outline. ' +
'"h1,h2" will trigger an outline with "h1" tags as root elements ' +
'and "h2" elements as their childs.')
.parse(process.argv); .parse(process.argv);
...@@ -122,6 +127,7 @@ if (typeof input === "string") { ...@@ -122,6 +127,7 @@ if (typeof input === "string") {
file = await printer.html(input, options); file = await printer.html(input, options);
output = replaceExt(output, '.html'); output = replaceExt(output, '.html');
} else { } else {
options.outlineTags = !program.outlineTags ? [] : program.outlineTags.split(',');
file = await printer.pdf(input, options); file = await printer.pdf(input, options);
} }
} else { } else {
......
...@@ -203,6 +203,100 @@ class PostProcesser extends EventEmitter { ...@@ -203,6 +203,100 @@ class PostProcesser extends EventEmitter {
console.log(page); console.log(page);
} }
/**
* Adds a table of content to the generated PDF
*
* Ideally this would not be required if Chromium would add this directly.
* So if these bugs are closed this can probably be removed again:
* - https://bugs.chromium.org/p/chromium/issues/detail?id=840455
* - https://github.com/GoogleChrome/puppeteer/issues/1778
*
* This code is heavily based on @Hopding's comment at:
* https://github.com/Hopding/pdf-lib/issues/127#issuecomment-502450179
*/
addOutline(outlineSpec) {
const outline = JSON.parse(JSON.stringify(outlineSpec))
const pageRefs = [];
this.pdfDoc.catalog.Pages.traverse((kid, ref) => {
if (kid instanceof PDFLib.PDFPage)
pageRefs.push(ref);
});
const index = this.pdfDoc.index;
const outlineReference = index.nextObjectNumber();
const countOutlineLayer = (layer) => {
let count = 0;
for (const outlineEntry of layer) {
++count;
count += countOutlineLayer(outlineEntry.children);
}
return count;
}
const createItemsForOutlineLayer = (layer, parent) => {
layer.forEach((outlineItem, i) => {
let prev = i > 0 ? layer[i - 1].ref : null;
let next = i < layer.length - 1 ? layer[i + 1].ref : null;
const pdfItem = createOutlineItem(outlineItem, prev, next, parent);
index.assign(outlineItem.ref, pdfItem);
});
}
const createOutlineItem = (outlineItem, prev, next, parent) => {
if (!outlineItem.id) {
throw new Error(`Cannot generate outline item with title '${outlineItem.title} ` +
`without any target anchor. Please specify an 'id' attribute for ` +
`the relevant HTML element`);
}
const item = {
Title: PDFLib.PDFString.fromString(outlineItem.title),
Parent: parent,
Dest: PDFLib.PDFName.from(outlineItem.id),
};
if (prev) {
item.Prev = prev;
}
if (next) {
item.Next = next;
}
if (outlineItem.children.length > 0) {
item.First = outlineItem.children[0].ref;
item.Last = outlineItem.children[outlineItem.children.length - 1].ref;
item.Count = PDFLib.PDFNumber.fromNumber(countOutlineLayer(outlineItem.children));
createItemsForOutlineLayer(outlineItem.children, outlineItem.ref);
}
return PDFLib.PDFDictionary.from(item, index);
};
const createOutlineReferences = (outlineEntry) => {
outlineEntry.ref = index.nextObjectNumber();
for (const child of outlineEntry.children) {
createOutlineReferences(child);
}
}
for (const outlineItem of outline) {
createOutlineReferences(outlineItem);
}
createItemsForOutlineLayer(outline, outlineReference);
const pdfOutline = PDFLib.PDFDictionary.from(
{
First: outline[0].ref,
Last: outline[outline.length - 1].ref,
Count: PDFLib.PDFNumber.fromNumber(countOutlineLayer(outline)),
},
index,
);
index.assign(outlineReference, pdfOutline);
this.pdfDoc.catalog.set('Outlines', outlineReference);
}
save() { save() {
let writer = new PDFDocumentWriter(); let writer = new PDFDocumentWriter();
const pdfBytes = writer.saveToBytesWithXRefTable(this.pdfDoc); const pdfBytes = writer.saveToBytesWithXRefTable(this.pdfDoc);
......
...@@ -182,6 +182,54 @@ class Printer extends EventEmitter { ...@@ -182,6 +182,54 @@ class Printer extends EventEmitter {
return page; return page;
} }
async _parseOutline(page, tags) {
return await page.evaluate((tags) => {
const tagsToProcess = [];
for (const node of document.querySelectorAll(tags.join(','))) {
tagsToProcess.push(node);
}
tagsToProcess.reverse();
const root = {children: [], depth: -1};
let currentOutlineNode = root;
while (tagsToProcess.length > 0) {
const tag = tagsToProcess.pop();
const orderDepth = tags.indexOf(tag.tagName.toLowerCase());
if (orderDepth < currentOutlineNode.depth) {
currentOutlineNode = currentOutlineNode.parent;
tagsToProcess.push(tag);
} else {
const newNode = {
title: tag.innerText,
id: tag.id,
children: [],
depth: orderDepth,
};
if (orderDepth == currentOutlineNode.depth) {
newNode.parent = currentOutlineNode.parent;
currentOutlineNode.parent.children.push(newNode);
currentOutlineNode = newNode;
} else if (orderDepth > currentOutlineNode.depth) {
newNode.parent = currentOutlineNode;
currentOutlineNode.children.push(newNode);
currentOutlineNode = newNode;
}
}
}
const stripParentProperty = (node) => {
node.parent = undefined;
for (const child of node.children) {
stripParentProperty(child);
}
}
stripParentProperty(root)
return root.children;
}, tags);
}
async pdf(input, options={}) { async pdf(input, options={}) {
let page = await this.render(input); let page = await this.render(input);
...@@ -201,6 +249,8 @@ class Printer extends EventEmitter { ...@@ -201,6 +249,8 @@ class Printer extends EventEmitter {
return meta; return meta;
}); });
const outline = options.outlineTags.length > 0 ? await this._parseOutline(page, options.outlineTags) : null;
let settings = { let settings = {
printBackground: true, printBackground: true,
displayHeaderFooter: false, displayHeaderFooter: false,
...@@ -228,6 +278,9 @@ class Printer extends EventEmitter { ...@@ -228,6 +278,9 @@ class Printer extends EventEmitter {
let post = new PostProcesser(pdf); let post = new PostProcesser(pdf);
post.metadata(meta); post.metadata(meta);
post.boxes(this.pages); post.boxes(this.pages);
if (outline) {
post.addOutline(outline);
}
pdf = post.save(); pdf = post.save();
return pdf; return pdf;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment