Skip to content
Snippets Groups Projects
Commit b6d4ef67 authored by Fred Chasen's avatar Fred Chasen
Browse files

Merge branch 'request_interception' into 'master'

Add request interception handler

See merge request tools/pagedjs-cli!11
parents 469393cb dc1a9252
No related branches found
No related tags found
1 merge request!11Add request interception handler
Pipeline #30160 passed with stages
in 1 minute and 32 seconds
...@@ -17,23 +17,28 @@ pagedjs-cli ./path/to/index.html -o result.pdf ...@@ -17,23 +17,28 @@ pagedjs-cli ./path/to/index.html -o result.pdf
## Options ## Options
``` ```
-h, --help output usage information -V, --version output the version number
-V, --version output the version number -i, --inputs [inputs] Inputs
-i, --inputs [inputs] Inputs -o, --output [output] Output
-o, --output [output] Output -d, --debug Debug
-d, --debug Show Electron Window to Debug -l, --landscape Landscape printing (default: false)
-l, --landscape Landscape printing -s, --page-size [size] Print to Page Size [size]
-s, --page-size [size] Print to Page Size [size] -w, --width [size] Print to Page Width [width] in MM
-w, --width [size] Print to Page Width [width] -h --height [size] Print to Page Height [weight] in MM
-h --height [size] Print to Page Height [weight] -t, --timeout [ms] Set a max timeout of [ms]
-m, --page-margin [margin] Print with margin [margin] -x, --html output html file
-n, --hyphenate [lang] Hyphenate with language [language], defaults to "en-us" -b, --blockLocal Disallow access to filesystem for local files
-hi, --hypher_ignore [str] Ignore passed element selectors, such as ".class_to_ignore, h1" -r, --blockRemote Disallow requests to remote servers
-ho, --hypher_only [str] Only hyphenate passed elements selector, such as ".hyphenate, aside" --allowedPath [allowedPaths] Only allow access to given filesystem paths, repeatable. (default: [])
-e, --encoding [type] Set the encoding of the input html, defaults to "utf-8" --allowedDomain [allowedDomains] Only allow access to given remote domains, repeatable (default: [])
-t, --timeout [ms] Set a max timeout of [ms] --outline-tags [tags] Specifies that an outline should be generated for the resulting PDF
--outline-tags [tags] Specifies that an outline should be generated for the resulting PDF document. [tags] specifies which HTML tags should be considered for that outline. "h1,h2" will trigger an outline with "h1" tags as root elements and "h2" elements as their childs. document. [tags] specifies which HTML tags should be considered for that
--additional-script <script> Additional script tags which are added to the HTML document before rendering. This is useful for adding custom pagedjs handlers. The option can be repeated. (default: []) outline. "h1,h2" will trigger an outline with "h1" tags as root elements
and "h2" elements as their childs.
--additional-script <script> Additional script tags which are added to the HTML document before
rendering. This is useful for adding custom pagedjs handlers. The option
can be repeated. (default: [])
--browserEndpoint Use a remote Chrome server with browserWSEndpoint
``` ```
## Hyphenation ## Hyphenation
......
...@@ -26,6 +26,9 @@ program ...@@ -26,6 +26,9 @@ program
.option("-t, --timeout [ms]", "Set a max timeout of [ms]") .option("-t, --timeout [ms]", "Set a max timeout of [ms]")
.option("-x, --html", "output html file") .option("-x, --html", "output html file")
.option("-b, --blockLocal", "Disallow access to filesystem for local files") .option("-b, --blockLocal", "Disallow access to filesystem for local files")
.option("-r, --blockRemote", "Disallow requests to remote servers")
.option("--allowedPath [allowedPaths]", "Only allow access to given filesystem paths, repeatable.", collect, [])
.option("--allowedDomain [allowedDomains]", "Only allow access to given remote domains, repeatable", collect, [])
.option("--outline-tags [tags]", "Specifies that an outline should be " + .option("--outline-tags [tags]", "Specifies that an outline should be " +
"generated for the resulting PDF document. [tags] specifies which " + "generated for the resulting PDF document. [tags] specifies which " +
"HTML tags should be considered for that outline. " + "HTML tags should be considered for that outline. " +
...@@ -35,13 +38,13 @@ program ...@@ -35,13 +38,13 @@ program
"added to the HTML document before rendering. This is useful for " + "added to the HTML document before rendering. This is useful for " +
"adding custom pagedjs handlers. The option can be repeated.", "adding custom pagedjs handlers. The option can be repeated.",
collect, []) collect, [])
.option("--browserEndpoint", "Use a remote Chrome server with browserWSEndpoint")
.parse(process.argv); .parse(process.argv);
function collect(value, previous) { function collect(value, previous) {
return previous.concat(value); return previous.concat(value);
} }
let input = program.inputs || program.args[0]; let input = program.inputs || program.args[0];
let dir = process.cwd(); let dir = process.cwd();
...@@ -49,7 +52,7 @@ let dir = process.cwd(); ...@@ -49,7 +52,7 @@ let dir = process.cwd();
let relativePath; let relativePath;
let allowLocal; let allowLocal;
try { try {
input = new URL(input); let uri = new URL(input);
allowLocal = false; allowLocal = false;
} catch (error) { } catch (error) {
relativePath = path.resolve(dir, input); relativePath = path.resolve(dir, input);
...@@ -104,7 +107,14 @@ if (typeof input === "string") { ...@@ -104,7 +107,14 @@ if (typeof input === "string") {
} }
(async () => { (async () => {
let printer = new Printer(headless, allowLocal, program.additionalScript); let printer = new Printer({
headless: headless,
allowLocal: allowLocal,
allowRemote: !program.blockRemote,
allowedPaths: program.allowedPaths,
allowedDomains: program.allowedDomains,
additionalScripts: program.additionalScript,
});
printer.on("page", (page) => { printer.on("page", (page) => {
if (page.position === 0) { if (page.position === 0) {
......
This diff is collapsed.
{ {
"name": "pagedjs-cli", "name": "pagedjs-cli",
"version": "0.0.10", "version": "0.1.0",
"author": "Fred Chasen", "author": "Fred Chasen",
"license": "MIT", "license": "MIT",
"homepage": "https://pagedmedia.org", "homepage": "https://pagedmedia.org",
...@@ -16,20 +16,20 @@ ...@@ -16,20 +16,20 @@
}, },
"main": "index.js", "main": "index.js",
"dependencies": { "dependencies": {
"commander": "^3.0.2", "commander": "^5.0.0",
"express": "^4.17.1", "express": "^4.17.1",
"hyphenopoly": "^3.2.1", "hyphenopoly": "^4.2.1",
"katex": "^0.11.1", "katex": "^0.11.1",
"lodash": "^4.17.15", "lodash": "^4.17.15",
"mathjax": "^3.0.0", "mathjax": "^3.0.1",
"node-fetch": "^2.6.0", "node-fetch": "^2.6.0",
"ora": "^4.0.2", "ora": "^4.0.3",
"pagedjs": "0.1.34", "pagedjs": "0.1.40",
"pdf-lib": "0.6.4", "pdf-lib": "0.6.4",
"puppeteer": "^2.0.0", "puppeteer": "^2.1.1",
"replace-ext": "^1.0.0" "replace-ext": "^1.0.0"
}, },
"devDependencies": { "devDependencies": {
"eslint": "^6.5.1" "eslint": "^6.8.0"
} }
} }
...@@ -14,20 +14,37 @@ let scriptPath = paths[0] + "node_modules" + paths[paths.length-1]; ...@@ -14,20 +14,37 @@ let scriptPath = paths[0] + "node_modules" + paths[paths.length-1];
const PostProcesser = require("./postprocesser"); const PostProcesser = require("./postprocesser");
class Printer extends EventEmitter { class Printer extends EventEmitter {
constructor(headless, allowLocal, additionalScripts) { constructor(options = {}) {
super(); super();
this.headless = headless !== false;
this.allowLocal = allowLocal; this.headless = options.headless !== false;
this.allowLocal = options.allowLocal;
this.allowRemote = options.allowRemote;
this.additionalScripts = options.additionalScripts;
this.allowedPaths = options.allowedPaths || [];
this.allowedDomains = options.allowedDomains || [];
this.ignoreHTTPSErrors = options.ignoreHTTPSErrors;
this.browserWSEndpoint = options.browserEndpoint;
this.pages = []; this.pages = [];
this.additionalScripts = additionalScripts;
} }
async setup() { async setup() {
const browser = await puppeteer.launch({ let puppeteerOptions = {
headless: this.headless, headless: this.headless,
args: this.allowLocal ? ["--allow-file-access-from-files", "--disable-dev-shm-usage"] : ["--disable-dev-shm-usage"], args: ["--disable-dev-shm-usage"],
ignoreHTTPSErrors: true ignoreHTTPSErrors: this.ignoreHTTPSErrors
}); }
if (this.allowLocal) {
puppeteerOptions.args.push("--allow-file-access-from-files");
}
if (this.browserWSEndpoint) {
puppeteerOptions.browserWSEndpoint = this.browserWSEndpoint;
}
const browser = await puppeteer.launch(puppeteerOptions);
this.browser = browser; this.browser = browser;
...@@ -46,17 +63,13 @@ class Printer extends EventEmitter { ...@@ -46,17 +63,13 @@ class Printer extends EventEmitter {
const page = await this.browser.newPage(); const page = await this.browser.newPage();
let uri, url, html; let uri, url, relativePath, html;
if (typeof input === "string") { if (typeof input === "string") {
try { try {
uri = new URL(input); uri = new URL(input);
if (uri.protocol === "https:") {
html = await fetch(input)
.then(res => res.text());
}
url = input; url = input;
} catch (error) { } catch (error) {
let relativePath = path.resolve(dir, input); relativePath = path.resolve(dir, input);
url = "file://" + relativePath; url = "file://" + relativePath;
} }
} else { } else {
...@@ -64,6 +77,36 @@ class Printer extends EventEmitter { ...@@ -64,6 +77,36 @@ class Printer extends EventEmitter {
html = input.html; html = input.html;
} }
await page.setRequestInterception(true);
page.on('request', (request) => {
let uri = new URL(request.url());
let { host, protocol, pathname } = uri;
let local = protocol === "file:"
if (local && this.withinAllowedPath(pathname) === false) {
request.abort();
return;
}
if (local && !this.allowLocal) {
request.abort();
return;
}
if (host && this.isAllowedDomain(host) === false) {
request.abort();
return;
}
if (host && !this.allowRemote) {
request.abort();
return;
}
request.continue();
});
if (html) { if (html) {
await page.setContent(html) await page.setContent(html)
.catch((e) => { .catch((e) => {
...@@ -299,6 +342,28 @@ class Printer extends EventEmitter { ...@@ -299,6 +342,28 @@ class Printer extends EventEmitter {
return this.browser.close(); return this.browser.close();
} }
withinAllowedPath(pathname) {
if (!this.allowedPaths || this.allowedPaths.length === 0) {
return true;
}
for (let parent of this.allowedPaths) {
const relative = path.relative(parent, pathname);
if (relative && !relative.startsWith('..') && !path.isAbsolute(relative)) {
return true;
}
}
return false;
}
isAllowedDomain(domain) {
if (!this.allowedDomains || this.allowedDomains.length === 0) {
return true;
}
return this.allowedDomains.includes(domain);
}
} }
module.exports = Printer; module.exports = Printer;
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment