Skip to content
Snippets Groups Projects
Commit 827477a7 authored by julientaq's avatar julientaq
Browse files

add long links fixer with jsdom

parent 1986e3f4
No related branches found
No related tags found
No related merge requests found
Pipeline #33101 passed with stages
in 50 seconds
// const cheerio = require("cheerio");
const slugify = require("@sindresorhus/slugify")// import slugify from '@sindresorhus/slugify';
const slugify = require("@sindresorhus/slugify"); // import slugify from '@sindresorhus/slugify';
const Cache = require("@11ty/eleventy-cache-assets");
const pluginTOC = require("eleventy-plugin-nesting-toc");
const markdownIt = require("markdown-it");
const markdownItAnchor = require("markdown-it-anchor");
const { DateTime } = require("luxon");
const cheerio = require('cheerio');
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
module.exports = function (eleventyConfig) {
const markdown = require("markdown-it")({
html: true,
breaks: true,
......@@ -22,48 +22,58 @@ module.exports = function (eleventyConfig) {
return markdown.renderInline(rawString);
});
eleventyConfig.addFilter("wbrToUrl", function (string) {
const $ = cheerio.load(string);
console.log('fun', $('a').innerHTML)
return $.html();
});
eleventyConfig.addFilter("wbrToUrl", function (value) {
const dom = new JSDOM(value);
console.log(dom);
let links = dom.window.document.body.querySelectorAll('a[href^="http"], a[href^="www"]');
links.forEach((link) => {
link.innerHTML = link.innerHTML
.replace(/\/\//g, "//\u003Cwbr\u003E")
.replace(/\,/g, ",\u003Cwbr\u003E")
.replace(/(\/|\~|\-|\.|\,|\_|\?|\#|\%)/g, "\u003Cwbr\u003E$1")
.replace(/\-/g, "\u003Cwbr\u003E‑");
});
return dom.serialize();
});
eleventyConfig.addFilter("markdowninline", function (rawString) {
eleventyConfig.addFilter("markdowninline", function (rawString) {
return markdown.renderInline(rawString);
});
eleventyConfig.addCollection("sortedByOrder", function (collectionApi) {
return collectionApi.getAll()
.filter(item => {
return item.data.menu;
})
.sort((a, b) => {
if (a.data.menu) {
if (a.data.order > b.data.order) return 1;
else if (a.data.order < b.data.order) return -1;
else return 0;
}
});
return collectionApi
.getAll()
.filter((item) => {
return item.data.menu;
})
.sort((a, b) => {
if (a.data.menu) {
if (a.data.order > b.data.order) return 1;
else if (a.data.order < b.data.order) return -1;
else return 0;
}
});
});
eleventyConfig.addCollection("blog", collectionApi => {
return collectionApi.getFilteredByGlob("src/blog/posts/*.md").sort((a, b) => a.data.date - b.data.date);
eleventyConfig.addCollection("blog", (collectionApi) => {
return collectionApi
.getFilteredByGlob("src/blog/posts/*.md")
.sort((a, b) => a.data.date - b.data.date);
});
eleventyConfig.addCollection("articles", collectionApi => {
return collectionApi.getFilteredByGlob("src/articles/*.md").sort((a, b) => a.data.date - b.data.date);
eleventyConfig.addCollection("articles", (collectionApi) => {
return collectionApi
.getFilteredByGlob("src/articles/*.md")
.sort((a, b) => a.data.date - b.data.date);
});
// eleventyConfig.addFilter("search", searchFilter);
// eleventyConfig.addFilter("searchSingle", searchFilterSingle);
eleventyConfig.addCollection("allSearch", collection => {
eleventyConfig.addCollection("allSearch", (collection) => {
return [...collection.getFilteredByTag("chapter")];
});
eleventyConfig.addPassthroughCopy({ "static/css": "/css" });
eleventyConfig.addPassthroughCopy({ "static/fonts": "/fonts" });
......@@ -84,27 +94,27 @@ module.exports = function (eleventyConfig) {
// useful to use the toc somewhere else
eleventyConfig.addFilter("prependLinks", function (value, prepend) {
return value.replace(/<a href="/g, `<a href="${prepend}`)
});
eleventyConfig.addFilter("replaceWithRegex", function (replaceThat, replaceWith) {
let regex = new RegExp(replaceThat);
return value.replace(regex, replaceWith)
return value.replace(/<a href="/g, `<a href="${prepend}`);
});
eleventyConfig.addFilter(
"replaceWithRegex",
function (replaceThat, replaceWith) {
let regex = new RegExp(replaceThat);
return value.replace(regex, replaceWith);
}
);
// add latin number plugin
eleventyConfig.addFilter("romanize", function (value) {
return romanize(value);
});
// \get the date with luxon (for all date)
eleventyConfig.addFilter("postDate", (dateObj) => {
let date = new Date(dateObj)
let date = new Date(dateObj);
return DateTime.fromJSDate(date).toLocaleString(DateTime.DATE_MED);
});
// limit the amount of items
eleventyConfig.addFilter("limit", function (arr, limit) {
return arr.slice(0, limit);
......@@ -114,17 +124,11 @@ module.exports = function (eleventyConfig) {
// console.log(value);
const $ = cheerio.load(value);
if ($.html(el)) {
return value = $.html(el);
}
else {
return (value = $.html(el));
} else {
return value;
}
});
});
eleventyConfig.addPlugin(pluginTOC, {
tags: ["h2", "h3", "h4"], // which heading tags are selected headings must each have an ID attribute
......@@ -134,17 +138,14 @@ module.exports = function (eleventyConfig) {
flat: false,
});
eleventyConfig.addFilter("slugify", function (str) {
return slugify(str, {
lower: true,
replacement: "-",
remove: /[*+~.·,()'"`´%!?¿:@]/g
remove: /[*+~.·,()'"`´%!?¿:@]/g,
});
});
// folder structures
// -----------------------------------------------------------------------------
// content, data and layouts comes from the src folders
......@@ -160,26 +161,70 @@ module.exports = function (eleventyConfig) {
};
};
function getRandomInt(min, max) {
return Math.floor(Math.random() * (max - min)) + min;
}
function romanize(num) {
// taken from Steven Levithan
// https://blog.stevenlevithan.com/archives/javascript-roman-numeral-converter
if (isNaN(num))
return NaN;
if (isNaN(num)) return NaN;
var digits = String(+num).split(""),
key = ["", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM",
"", "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC",
"", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX"],
key = [
"",
"C",
"CC",
"CCC",
"CD",
"D",
"DC",
"DCC",
"DCCC",
"CM",
"",
"X",
"XX",
"XXX",
"XL",
"L",
"LX",
"LXX",
"LXXX",
"XC",
"",
"I",
"II",
"III",
"IV",
"V",
"VI",
"VII",
"VIII",
"IX",
],
roman = "",
i = 3;
while (i--)
roman = (key[+digits.pop() + (i * 10)] || "") + roman;
while (i--) roman = (key[+digits.pop() + i * 10] || "") + roman;
return Array(+digits.join("") + 1).join("M") + roman;
}
function cleanLink(content) {
//add wbr to / in links
const links = content.querySelectorAll('a[href^="http"], a[href^="www"]');
links.forEach((link) => {
// Rerun to avoid large spaces. Break after a colon or a double slash (//) or before a single slash (/), a tilde (~), a period, a comma, a hyphen, an underline (_), a question mark, a number sign, or a percent symbol.
const content = link.textContent;
let printableUrl = content.replace(/\/\//g, "//\u003Cwbr\u003E");
printableUrl = printableUrl.replace(/\,/g, ",\u003Cwbr\u003E");
// put wbr around everything.
printableUrl = printableUrl.replace(
/(\/|\~|\-|\.|\,|\_|\?|\#|\%)/g,
"\u003Cwbr\u003E$1"
);
// turn hyphen in non breaking hyphen
printableUrl = printableUrl.replace(/\-/g, "\u003Cwbr\u003E&#x2011;");
link.setAttribute("data-print-url", printableUrl);
link.innerHTML = printableUrl;
});
}
This diff is collapsed.
......@@ -5,6 +5,7 @@ author: Adam Hyde
date: "2022-02-01"
category: "thoughts"
---
Scholarly publishing at times seems to want to hold on to some outdated processes. At the top of my list is the dreaded RFP (Request for Proposals) process.
In the scholarly comms world, consultants send these out on behalf of publishers that are seeking solutions or services. My career involves designing and building publishing technology and so I have been on the receiving end of many RFPs for developing new platforms.
......@@ -12,6 +13,7 @@ In the scholarly comms world, consultants send these out on behalf of publishers
An RFP to discover a partner to build software generally contains an endless bucket-list of necessary features ('requirements'). Some of the RFPs I have seen literally list items such as 'JATS export' or 'automatic notifications' as features and I'm supposed to check if we can supply that functionality or not.
Export
<ul class="checkboxList">
<li><input type="checkbox" disabled> JATS Export</li>
<li><input type="checkbox" disabled> PDF</li>
......@@ -21,9 +23,11 @@ Export
But (from this example), let's face it... all journal platforms require this functionality. I'm not sure I understand the rationale to indicate that we can provide features that belong to every publishing platform in their domain. Sure, there are sometimes unusual requirements but at the end of the day we can 'supply' (i.e. design and build) any of those too.
## Let's Design Your House
Let's zoom out a little. The problem is perhaps easier to understand if we reference the more accessible notion of house architects. In the process of identifying an architect to design my new home, imagine if I sent them an initial RFP that said: “Do your houses have bathrooms?”, “Can you design living spaces?”. Or, even closer to the scholarly communications reality, I sent them a list that just read 'bathrooms', 'living rooms' and the architect should tick off those they can 'do'.
House
<ul class="checkboxList">
<li><input type="checkbox" disabled> living room</li>
<li><input type="checkbox" disabled> bathroom(s)</li>
......@@ -33,6 +37,7 @@ House
I hope you can see why this might be considered a waste of time for everyone involved. As an architect, the best I could do would be to answer “yes” to every question - we can design and build anything. But what a good architect actually wants to do, is to design and build a solution that meets the present and future needs of the client perfectly. To design systems like this means an architect needs to understand the commissioning organisation's legacy, culture and ambitions.
## Function vs Culture
Imagine it like this (now I’m going back to the house design): as a house architect, I could deliver you a design immediately after answering your checklist. That would actually be quite easy. Most houses are more or less the same from this checklist-level description. They have (for example) bathrooms and living spaces. So, going by this logic, should my design fulfill those functional requirements (i.e. the house has a bathroom and living room) then it follows that you should be quite happy with the final product.
It might be a pity that I actually put the bathroom at the opposite end of the house from the living room - you might have wanted it closer for convenience sake. Or perhaps you have a nice view and I built the living room looking out onto the back fence (“...the laundry has a superb vista!”). But all that doesn't matter because the checklist is fulfilled.
......@@ -46,11 +51,13 @@ Let's take an example from the journal world : reviewer invitations. Sure, invit
There is a lot to understand here about how the publisher wants to work that will inform the design of every aspect of the system from the trivial (e.g. email notifications) to the complex (e.g. author revisions, reviewer rounds, submission form customisation etc).
## Constraints
There are also questions an architect needs to understand around constraints that you, the client, may face. The trick here is that you might not realize immediately how these constraints may inform your design. Let’s consider the house example once again: a good architect can only design your ideal house with you after they understand some of the major constraints you may face. Some of these constraints might be financial, some might be practical e.g. the nature of the landscape you are building on: If you want a single-level pole house built on a radical incline, the architect would do well to inform you of the level of engineering and costs needed to achieve this, or whether this 'requirement' is in tension with any of your other requirements (e.g. like having easy access to the ground). These issues are best understood through conversation - arguably the most efficient way to help everyone involved come to the best suited solution together.
There are also questions an architect needs to understand around constraints that you, the client, may face. The trick here is that you might not realize immediately how these constraints may inform your design. Let’s consider the house example once again: a good architect can only design your ideal house with you after they understand some of the major constraints you may face. Some of these constraints might be financial, some might be practical e.g. the nature of the landscape you are building on: If you want a single-level pole house built on a radical incline, the architect would do well to inform you of the level of engineering and costs needed to achieve this, or whether this 'requirement' is in tension with any of your other requirements (e.g. like having easy access to the ground). These issues are best understood through conversation - arguably the most efficient way to help everyone involved come to the best suited solution together.
Similar issues exist in publishing. If you state, for example, you wish to have both LaTeX support throughout the submission and peer review workflow AND you wish to achieve browser based Single Source Publishing...then I think it would be good if we discussed this in detail so you can understand clearly the constraints and tradeoffs, and consequent development time and costs that would be involved. You can then make your decision on whether it is worth it or not.
## An Improvement
I have been on the receiving end of good RFPs but they are rare. Most recently I was asked to respond to functional use-cases, i.e. as a user, I need to do x. I liked this process. Responding to this as an architect, gives me room to move. I can respond in short-essay form by highlighting the design issues that must be considered per use case; I can juxtapose some design challenges and trade-offs, and I can ask questions that will help the publisher think through what it is that they actually are trying to achieve.
This is a much better approach than the box-ticking RFP. From the beginning the RFP is trying to determine how the architect thinks and how they approach a problem. From the response provided by the architect, the publisher can also understand clearly whether the architect clearly understands the problem or domain. While the functional use-case example is better than the checklist approach there are still better ways to request proposals. In my opinion the best way is through conversation. You could learn a lot more about the suitability of an architect in a single hour conversation than you can learn from either the functional use-case or checklist approaches.
......@@ -58,6 +65,7 @@ This is a much better approach than the box-ticking RFP. From the beginning the
In other words, find an architect by simply talking to them and then involve your chosen architect in your requirements discovery process. This is a necessary and critical shift that would help publishers get systems that better meet their needs and open up solutions they might not have yet considered.
## Lets Fix it
I was recently asked by a group of publishing consultants to have a chat about 'things in general' - it was a welcome process and I had the opportunity to set the agenda. One item I put on the table was the RFP process. I made clear my reservations about checklist documents and I was very surprised to hear them respond likewise. Their point was that publishers are driving the RFP process.
That feedback was very interesting and inspired this post. It would be great to think through an effective RFP process with the scholarly comms community.
......
......@@ -5,12 +5,14 @@ author: Wendell Piez and Adam Hyde
date: "2016-12-05"
category: "article"
---
<p>Originally written in 2016, small updates in 2022.</a>
<p>In the days of the typewriter, a&nbsp;typescript was a typed copy of a work. The typescript copy was used for improving the document through the editorial process&nbsp;&nbsp;for reviewing, commenting, fact and rights checking, revision etc.</p>
<p>For many years, since the advent of desktop publishing, the hand-typed typescript has been replaced by Microsoft Word documents. This advanced the publishing world quite a bit. Emailing typescript in the form of Word documents was far easier than mailing paper copies, and making revised copies moved from being an arduous typing exercise to the one click ‘save as’<span style="color: #ff0000;">.</span> There are obvious efficiencies.<p></p>
<p>But now we are in the age of networked documents. We have the opportunity to make another paradigmatic workflow change – in a sense: bringing typescript&nbsp;to the browser. With this evolution, we can bring an end to many of the frustrations of emailing Word documents around for comment and revision – while exploring wider options&nbsp;for collaboration, innovating more interesting content types, more easily understanding and managing a document’s history, using automated typesetting and data exchange. All of these represent cost and time savings for&nbsp;the publishing process and have the potential to move the communication of research beyond the <a href="https://coko.foundation/publishing-for-reproducibility-collaborative-input-and-networked-output/">limited paradigm of the manuscript.</a></p>
<p>However, migrating typescript&nbsp;formats away from the desktop to the web has proven to be very difficult.&nbsp;One of the major problems&nbsp;is that while there is a growing&nbsp;move towards online authoring environments, many authors still start in Word, and many previous attempts&nbsp;have shown that MS Word is not an easy file format to convert to other formats. Fortunately, we believe, <em>if we think of MS Word&nbsp;as a software for “typescript preparation,”</em>&nbsp;that HTML is&nbsp;a viable option for conversion from Word’s &nbsp;.docx format into a typescript format ready for the web.</p>
<p>HTML works out well as a format for these purposes due to&nbsp;features that are more commonly considered weaknesses. How so? Well,&nbsp;first we must consider that, at early stages&nbsp;of a publishing workflow, a Word document&nbsp;will&nbsp;not have achieved its final structure, or indeed, much of any structure at all. Nonetheless, attempts to ‘get out of Word’ have tried to jump from unstructured Word to very structured XML formats by:</p>
<a href="#"></a>
<ol>
<li>copying over all the data in the document and</li>
<li>interpolating structure at the same time in an attempt to ‘understand’ the “intent of the author” (or a proxy) as represented by the display semantics of the document.</li>
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment