Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
postprocesser.js 9.49 KiB
const PDFLib = require("pdf-lib");
const EventEmitter = require("events");

const PDFDocumentWriter = require("./writer");

class PostProcesser extends EventEmitter {
  constructor(pdf) {
    super();

    if (!pdf) {
      throw "Must pass a PDF Buffer to PostProcesser";
    }
    this.pdf = pdf;
    this.pdfDoc = PDFLib.PDFDocumentFactory.load(pdf);
  }

  metadata(meta) {
    if (meta.keywords && typeof meta.keywords === "string") {
      meta.keywords = meta.keywords.split(",");
    }

    if (!meta.keywords) {
      meta.keywords = [];
    }

    // Overwrite Dates
    if (!(meta.creationDate instanceof Date)) {
      meta.creationDate = new Date();
    }
    meta.modDate = new Date();
    meta.metadataDate = new Date();

    // Get the existing Info
    let info = this.getInfoDict();
    if (!meta.creator) {
      meta.creator = info.creator + " + Paged.js";
    }

    if (!meta.producer) {
      meta.producer = info.producer;
    }

    // Add meta
    this.addXmpMetadata(meta);
    this.updateInfoDict(meta);
  }

  getInfoDict(){
    // Info Reference in Skia pdfs is always 1st
    let ref = PDFLib.PDFIndirectReference.forNumbers(1, 0);
    let info = this.pdfDoc.index.lookup(ref);
    return {
      title: info.getMaybe("Title") && info.getMaybe("Title").string,
      subject: info.getMaybe("Subject") && info.getMaybe("Subject").string,
      keywords: info.getMaybe("Keywords") && info.getMaybe("Keywords").string,
      author: info.getMaybe("Author") && info.getMaybe("Author").string,
      creationDate: info.getMaybe("CreationDate") && info.getMaybe("CreationDate").string,
      modDate: info.getMaybe("ModDate") && info.getMaybe("ModDate").string,
      creator: info.getMaybe("Creator") && info.getMaybe("Creator").string,
      producer: info.getMaybe("Producer") && info.getMaybe("Producer").string
    };
  }

  updateInfoDict(meta) {
    // Info Reference in Skia pdfs is always 1st
    let ref = PDFLib.PDFIndirectReference.forNumbers(1, 0);
    let info = this.pdfDoc.index.lookup(ref);

    if (meta.title) {
      info.set("Title", PDFLib.PDFString.fromString(meta.title));
    }

    if (meta.subject) {
      info.set("Subject", PDFLib.PDFString.fromString(meta.subject));
    }

    if (meta.keywords && meta.keywords.length) {
      info.set("Keywords", PDFLib.PDFString.fromString(meta.keywords.join(", ")));
    }

    if (meta.author) {
      info.set("Author", PDFLib.PDFString.fromString(meta.author));
    }

    if (meta.creationDate) {
      info.set("CreationDate", PDFLib.PDFString.fromString(meta.creationDate.toISOString()));
    }

    if (meta.modDate) {
      info.set("ModDate", PDFLib.PDFString.fromString(meta.modDate.toISOString()));
    }

    if (meta.creator) {
      info.set("Creator", PDFLib.PDFString.fromString(meta.creator));
    }

    if (meta.producer) {
      info.set("Producer", PDFLib.PDFString.fromString(meta.producer));
    }
  }

  addXmpMetadata(meta) {
    const charCodes = (str) => str.split("").map((c) => c.charCodeAt(0));
    const typedArrayFor = (str) => new Uint8Array(charCodes(str));
    const whitespacePadding = new Array(20).fill(" ".repeat(100)).join("\n");
    const metadataXML = `
      <?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
        <x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core 5.2-c001 63.139439, 2010/09/27-13:37:26">
          <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">

            <rdf:Description rdf:about="" xmlns:dc="http://purl.org/dc/elements/1.1/">
              <dc:format>application/pdf</dc:format>
              <dc:creator>
                <rdf:Seq>
                  <rdf:li>${meta.author}</rdf:li>
                </rdf:Seq>
              </dc:creator>
              <dc:title>
                 <rdf:Alt>
                    <rdf:li xml:lang="x-default">${meta.title}</rdf:li>
                 </rdf:Alt>
              </dc:title>
              <dc:subject>
                <rdf:Bag>
                  ${meta.keywords
                    .map((keyword) => `<rdf:li>${keyword}</rdf:li>`)
                    .join("\n")}
                </rdf:Bag>
              </dc:subject>
            </rdf:Description>

            <rdf:Description rdf:about="" xmlns:xmp="http://ns.adobe.com/xap/1.0/">
              <xmp:CreatorTool>${meta.creatorTool}</xmp:CreatorTool>
              <xmp:CreateDate>${meta.creationDate.toISOString()}</xmp:CreateDate>
              <xmp:ModifyDate>${meta.modDate.toISOString()}</xmp:ModifyDate>
              <xmp:MetadataDate>${meta.metadataDate.toISOString()}</xmp:MetadataDate>
            </rdf:Description>

            <rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/">
              <pdf:Subject>${meta.subject}</pdf:Subject>
              <pdf:Producer>${meta.producer}</pdf:Producer>
            </rdf:Description>

          </rdf:RDF>
        </x:xmpmeta>
        ${whitespacePadding}
      <?xpacket end="w"?>
    `.trim();

    const metadataStreamDict = PDFLib.PDFDictionary.from(
      {
        Type: PDFLib.PDFName.from("Metadata"),
        Subtype: PDFLib.PDFName.from("XML"),
        Length: PDFLib.PDFNumber.fromNumber(metadataXML.length),
      },
      this.pdfDoc.index,
    );

    const metadataStream = PDFLib.PDFRawStream.from(
      metadataStreamDict,
      typedArrayFor(metadataXML),
    );

    const metadataStreamRef = this.pdfDoc.register(metadataStream);

    this.pdfDoc.catalog.set("Metadata", metadataStreamRef);
  }

  boxes(pages) {
    const pdfPages = this.pdfDoc.getPages();

    pdfPages.forEach((pdfPage, index) => {
      const page = pages[index];

      if (!page) {
        return; // page was not rendered
      }

      let { boxes } = page;

      if (Object.is(boxes.media, boxes.crop)) {
        return; // No bleed set
      }

      const rectangle = PDFLib.PDFArray.fromArray(
          [
            PDFLib.PDFNumber.fromNumber(boxes.crop.x),
            PDFLib.PDFNumber.fromNumber(boxes.crop.y),
            PDFLib.PDFNumber.fromNumber(boxes.crop.width + boxes.crop.x),
            PDFLib.PDFNumber.fromNumber(boxes.crop.height + boxes.crop.y),
          ],
          pdfPage.index,
        );

      // pdfPage.set("ArtBox", rectangle);
      pdfPage.set("TrimBox", rectangle);
      pdfPage.set("CropBox", rectangle);
    });

  }

  updatePageBoxes(page) {
    console.log(page);
  }

  /**
   * Adds a table of content to the generated PDF
   *
   * Ideally this would not be required if Chromium would add this directly.
   * So if these bugs are closed this can probably be removed again:
   * - https://bugs.chromium.org/p/chromium/issues/detail?id=840455
   * - https://github.com/GoogleChrome/puppeteer/issues/1778
   *
   * This code is heavily based on @Hopding's comment at:
   * https://github.com/Hopding/pdf-lib/issues/127#issuecomment-502450179
   */
  addOutline(outlineSpec) {
    const outline = JSON.parse(JSON.stringify(outlineSpec));


    const pageRefs = [];
    this.pdfDoc.catalog.Pages.traverse((kid, ref) => {
      if (kid instanceof PDFLib.PDFPage)
        pageRefs.push(ref);
    });
    const index = this.pdfDoc.index;

    const outlineReference = index.nextObjectNumber();

    const countOutlineLayer = (layer) => {
      let count = 0;
      for (const outlineEntry of layer) {
        ++count;
        count += countOutlineLayer(outlineEntry.children);
      }
      return count;
    };

    const createItemsForOutlineLayer = (layer, parent) => {
      layer.forEach((outlineItem, i) => {
        let prev = i > 0 ? layer[i - 1].ref : null;
        let next = i < layer.length - 1 ? layer[i + 1].ref : null;
        const pdfItem = createOutlineItem(outlineItem, prev, next, parent);
        index.assign(outlineItem.ref, pdfItem);
      });
    };

    const createOutlineItem = (outlineItem, prev, next, parent) => {
      if (!outlineItem.id) {
        throw new Error(`Cannot generate outline item with title '${outlineItem.title} ` +
                        "without any target anchor. Please specify an 'id' attribute for " +
                        "the relevant HTML element");
      }
      const item = {
        Title: PDFLib.PDFString.fromString(outlineItem.title),
        Parent: parent,
        Dest: PDFLib.PDFName.from(outlineItem.id),
      };
      if (prev) {
        item.Prev = prev;
      }
      if (next) {
        item.Next = next;
      }
      if (outlineItem.children.length > 0) {
        item.First = outlineItem.children[0].ref;
        item.Last = outlineItem.children[outlineItem.children.length - 1].ref;
        item.Count = PDFLib.PDFNumber.fromNumber(countOutlineLayer(outlineItem.children));
        createItemsForOutlineLayer(outlineItem.children, outlineItem.ref);
      }

      return PDFLib.PDFDictionary.from(item, index);
    };

    const createOutlineReferences = (outlineEntry) => {
      outlineEntry.ref = index.nextObjectNumber();
      for (const child of outlineEntry.children) {
        createOutlineReferences(child);
      }
    };

    for (const outlineItem of outline) {
      createOutlineReferences(outlineItem);
    }

    createItemsForOutlineLayer(outline, outlineReference);

    const pdfOutline = PDFLib.PDFDictionary.from(
      {
        First: outline[0].ref,
        Last: outline[outline.length - 1].ref,
        Count: PDFLib.PDFNumber.fromNumber(countOutlineLayer(outline)),
      },
      index,
    );
    index.assign(outlineReference, pdfOutline);
    this.pdfDoc.catalog.set("Outlines", outlineReference);
  }

  save() {
    let writer = new PDFDocumentWriter();
    const pdfBytes = writer.saveToBytesWithXRefTable(this.pdfDoc);
    this.pdf = pdfBytes;
    return this.pdf;
  }
}

module.exports = PostProcesser;