Newer
Older
const PDFLib = require("pdf-lib");
class PostProcesser extends EventEmitter {
constructor(pdf) {
super();
if (!pdf) {
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
this.pdfDoc = PDFLib.PDFDocumentFactory.load(pdf);
}
metadata(meta) {
if (meta.keywords && typeof meta.keywords === "string") {
meta.keywords = meta.keywords.split(",");
}
if (!meta.keywords) {
meta.keywords = [];
}
// Overwrite Dates
if (!(meta.creationDate instanceof Date)) {
meta.creationDate = new Date();
}
meta.modDate = new Date();
meta.metadataDate = new Date();
// Get the existing Info
let info = this.getInfoDict();
if (!meta.creator) {
meta.creator = info.creator + " + Paged.js";
}
if (!meta.producer) {
meta.producer = info.producer;
}
// Add meta
this.addXmpMetadata(meta);
this.updateInfoDict(meta);
}
getInfoDict(){
// Info Reference in Skia pdfs is always 1st
let ref = PDFLib.PDFIndirectReference.forNumbers(1, 0);
let info = this.pdfDoc.index.lookup(ref);
return {
title: info.getMaybe("Title") && info.getMaybe("Title").string,
subject: info.getMaybe("Subject") && info.getMaybe("Subject").string,
keywords: info.getMaybe("Keywords") && info.getMaybe("Keywords").string,
author: info.getMaybe("Author") && info.getMaybe("Author").string,
creationDate: info.getMaybe("CreationDate") && info.getMaybe("CreationDate").string,
modDate: info.getMaybe("ModDate") && info.getMaybe("ModDate").string,
creator: info.getMaybe("Creator") && info.getMaybe("Creator").string,
producer: info.getMaybe("Producer") && info.getMaybe("Producer").string
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
}
updateInfoDict(meta) {
// Info Reference in Skia pdfs is always 1st
let ref = PDFLib.PDFIndirectReference.forNumbers(1, 0);
let info = this.pdfDoc.index.lookup(ref);
if (meta.title) {
info.set("Title", PDFLib.PDFString.fromString(meta.title));
}
if (meta.subject) {
info.set("Subject", PDFLib.PDFString.fromString(meta.subject));
}
if (meta.keywords && meta.keywords.length) {
info.set("Keywords", PDFLib.PDFString.fromString(meta.keywords.join(", ")));
}
if (meta.author) {
info.set("Author", PDFLib.PDFString.fromString(meta.author));
}
if (meta.creationDate) {
info.set("CreationDate", PDFLib.PDFString.fromString(meta.creationDate.toISOString()));
}
if (meta.modDate) {
info.set("ModDate", PDFLib.PDFString.fromString(meta.modDate.toISOString()));
}
if (meta.creator) {
info.set("Creator", PDFLib.PDFString.fromString(meta.creator));
}
if (meta.producer) {
info.set("Producer", PDFLib.PDFString.fromString(meta.producer));
}
}
addXmpMetadata(meta) {
const charCodes = (str) => str.split("").map((c) => c.charCodeAt(0));
const typedArrayFor = (str) => new Uint8Array(charCodes(str));
const whitespacePadding = new Array(20).fill(" ".repeat(100)).join("\n");
const metadataXML = `
<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core 5.2-c001 63.139439, 2010/09/27-13:37:26">
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:about="" xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:format>application/pdf</dc:format>
<dc:creator>
<rdf:Seq>
<rdf:li>${meta.author}</rdf:li>
</rdf:Seq>
</dc:creator>
<dc:title>
<rdf:Alt>
<rdf:li xml:lang="x-default">${meta.title}</rdf:li>
</rdf:Alt>
</dc:title>
<dc:subject>
<rdf:Bag>
${meta.keywords
.map((keyword) => `<rdf:li>${keyword}</rdf:li>`)
</rdf:Bag>
</dc:subject>
</rdf:Description>
<rdf:Description rdf:about="" xmlns:xmp="http://ns.adobe.com/xap/1.0/">
<xmp:CreatorTool>${meta.creatorTool}</xmp:CreatorTool>
<xmp:CreateDate>${meta.creationDate.toISOString()}</xmp:CreateDate>
<xmp:ModifyDate>${meta.modDate.toISOString()}</xmp:ModifyDate>
<xmp:MetadataDate>${meta.metadataDate.toISOString()}</xmp:MetadataDate>
</rdf:Description>
<rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/">
<pdf:Subject>${meta.subject}</pdf:Subject>
<pdf:Producer>${meta.producer}</pdf:Producer>
</rdf:Description>
</rdf:RDF>
</x:xmpmeta>
${whitespacePadding}
<?xpacket end="w"?>
`.trim();
const metadataStreamDict = PDFLib.PDFDictionary.from(
{
Type: PDFLib.PDFName.from("Metadata"),
Subtype: PDFLib.PDFName.from("XML"),
Length: PDFLib.PDFNumber.fromNumber(metadataXML.length),
},
this.pdfDoc.index,
);
const metadataStream = PDFLib.PDFRawStream.from(
metadataStreamDict,
typedArrayFor(metadataXML),
);
const metadataStreamRef = this.pdfDoc.register(metadataStream);
this.pdfDoc.catalog.set("Metadata", metadataStreamRef);
}
boxes(pages) {
const pdfPages = this.pdfDoc.getPages();
pdfPages.forEach((pdfPage, index) => {
const page = pages[index];
if (!page) {
return; // page was not rendered
}
let { boxes } = page;
if (Object.is(boxes.media, boxes.crop)) {
return; // No bleed set
}
const rectangle = PDFLib.PDFArray.fromArray(
[
PDFLib.PDFNumber.fromNumber(boxes.crop.x),
PDFLib.PDFNumber.fromNumber(boxes.crop.y),
PDFLib.PDFNumber.fromNumber(boxes.crop.width + boxes.crop.x),
PDFLib.PDFNumber.fromNumber(boxes.crop.height + boxes.crop.y),
],
pdfPage.index,
);
// pdfPage.set("ArtBox", rectangle);
pdfPage.set("TrimBox", rectangle);
pdfPage.set("CropBox", rectangle);
});
}
updatePageBoxes(page) {
console.log(page);
}
/**
* Adds a table of content to the generated PDF
* Ideally this would not be required if Chromium would add this directly.
* So if these bugs are closed this can probably be removed again:
* - https://bugs.chromium.org/p/chromium/issues/detail?id=840455
* - https://github.com/GoogleChrome/puppeteer/issues/1778
* This code is heavily based on @Hopding's comment at:
* https://github.com/Hopding/pdf-lib/issues/127#issuecomment-502450179
*/
addOutline(outlineSpec) {
const outline = JSON.parse(JSON.stringify(outlineSpec));
const pageRefs = [];
this.pdfDoc.catalog.Pages.traverse((kid, ref) => {
if (kid instanceof PDFLib.PDFPage)
pageRefs.push(ref);
});
const index = this.pdfDoc.index;
const outlineReference = index.nextObjectNumber();
const countOutlineLayer = (layer) => {
let count = 0;
for (const outlineEntry of layer) {
++count;
count += countOutlineLayer(outlineEntry.children);
}
return count;
const createItemsForOutlineLayer = (layer, parent) => {
layer.forEach((outlineItem, i) => {
let prev = i > 0 ? layer[i - 1].ref : null;
let next = i < layer.length - 1 ? layer[i + 1].ref : null;
const pdfItem = createOutlineItem(outlineItem, prev, next, parent);
index.assign(outlineItem.ref, pdfItem);
});
const createOutlineItem = (outlineItem, prev, next, parent) => {
if (!outlineItem.id) {
throw new Error(`Cannot generate outline item with title '${outlineItem.title} ` +
"without any target anchor. Please specify an 'id' attribute for " +
"the relevant HTML element");
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
}
const item = {
Title: PDFLib.PDFString.fromString(outlineItem.title),
Parent: parent,
Dest: PDFLib.PDFName.from(outlineItem.id),
};
if (prev) {
item.Prev = prev;
}
if (next) {
item.Next = next;
}
if (outlineItem.children.length > 0) {
item.First = outlineItem.children[0].ref;
item.Last = outlineItem.children[outlineItem.children.length - 1].ref;
item.Count = PDFLib.PDFNumber.fromNumber(countOutlineLayer(outlineItem.children));
createItemsForOutlineLayer(outlineItem.children, outlineItem.ref);
}
return PDFLib.PDFDictionary.from(item, index);
};
const createOutlineReferences = (outlineEntry) => {
outlineEntry.ref = index.nextObjectNumber();
for (const child of outlineEntry.children) {
createOutlineReferences(child);
}
for (const outlineItem of outline) {
createOutlineReferences(outlineItem);
}
createItemsForOutlineLayer(outline, outlineReference);
const pdfOutline = PDFLib.PDFDictionary.from(
{
First: outline[0].ref,
Last: outline[outline.length - 1].ref,
Count: PDFLib.PDFNumber.fromNumber(countOutlineLayer(outline)),
},
index,
);
index.assign(outlineReference, pdfOutline);
this.pdfDoc.catalog.set("Outlines", outlineReference);