Commit 08866b10 authored by Wendell Piez's avatar Wendell Piez

Testing and shakedown of text replacement macros.

parent bf1b33a5
This diff is collapsed.
This diff is collapsed.
......@@ -2,7 +2,21 @@
<x:description xmlns:x="http://www.jenitennison.com/xslt/xspec"
stylesheet="ucp-text-macros.xsl" xmlns="http://www.w3.org/1999/xhtml">
<x:scenario label="Big Grab Bag">
<x:scenario label="a">
<x:context>
<body>
<p>baby <b>bold</b></p>
<p>"'quote'"</p>
</body>
</x:context>
<x:expect label="quotes substituted including hair spaces char 200A">
<body>
<p>baby <b>bold</b></p>
<p>“ ‘quote’ ”</p>
</body>
</x:expect>
</x:scenario>
<x:scenario label="Big Grab Bag">
<x:context>
<html>
<head>
......@@ -45,7 +59,7 @@ stylesheet="ucp-text-macros.xsl" xmlns="http://www.w3.org/1999/xhtml">
<p>sentence with punctuations separated by space ,</p>
<p>sentence with punctuations separated by space ;</p>
<p>sentence with punctuations separated by space ?</p>
<p>sentence with punctuations separated by 3 spaces ?</p>
<p>sentence with punctuations separated by 3 spaces ?</p>
<p><!-- empty --></p>
<p><!-- empty --></p>
<p><b>this is all bold except for the period</b>.</p>
......@@ -78,46 +92,8 @@ stylesheet="ucp-text-macros.xsl" xmlns="http://www.w3.org/1999/xhtml">
</html>
</x:context>
<x:expect label="With links marked up in line">
<html>
<head>
<meta charset="UTF-8" />
</head>
<body>
<div>
<p>
<b>Here's some</b>
<b> bold for</b>
<b>you</b>.</p>
<h1>Substitution macros</h1>
<p>...the lion sleeps tonight...</p>
<p>
<a href="http://www.wendellpiez.com">www.wendellpiez.com</a>
</p>
<p>there's a file at ... file:/D:/Work/Projects/Customers/Coko/Data/url-reading-testing.html</p>
<p>Hyperlink inferencing might work on any domain and many file formats</p>
<p>
<a href="http://http:where.are.we.now.com">http:where.are.we.now.com</a>
</p>
<p>Here's a pretty insane one: <a href="http://www5.iadb.org/mif/ProgramsandProjects/AccesstoFinance/Remittances/tabid/215/language/en-US/Default.aspx">http://www5.iadb.org/mif/ProgramsandProjects/AccesstoFinance/Remittances/tabid/215/language/en-US/Default.aspx</a> okay?</p>
<p>
<a href="http://www.nmta.us/site/page.php?347">http://www.nmta.us/site/page.php?347</a>
</p>
<p>
<a href="http://www.profeco.gob.mx/envio/cuadros.asp">http://www.profeco.gob.mx/envio/cuadros.asp</a>
</p>
<p>
<a href="ftp://ftp.my.site.org">ftp://ftp.my.site.org</a> is also a link!</p>
<p>
<a href="http://www.ime.gob.mx/ime2/images/educacion_financiera/acuerdo_bancos_consulados.pdf">http://www.ime.gob.mx/ime2/images/educacion_financiera/acuerdo_bancos_consulados.pdf</a>
</p>
<p>and finally we have ... <a href="http://sccounty01.co.santa-cruz.ca.us/bds/Govstream/ASP/Display/PdfFinder.asp?Type=Agenda&amp;MeetingDate=20020618&amp;Filename=051.pdf">http://sccounty01.co.santa-cruz.ca.us/bds/Govstream/ASP/Display/PdfFinder.asp?Type=Agenda&amp;MeetingDate=20020618&amp;Filename=051.pdf</a>
</p>
<p>Here's my home page: <a href="http://www.piez.org/wendell">http://www.piez.org/wendell</a>
</p> also <a href="http://pellucidliterature.org">pellucidliterature.org</a>.
</div>
</body>
</html>
<x:expect label="Grab Bag of Substitutions">
</x:expect>
</x:scenario>
......
......@@ -57,11 +57,11 @@
<!-- $space is (plain) space and LF, no tab -->
<xsl:variable as="xs:string" name="space">[&#x20;&#xA;]</xsl:variable>
<xsl:variable name="operations" expand-text="yes" as="element(xsw:sequence)">
<xsl:variable name="operations-stub" expand-text="yes" as="element(xsw:sequence)">
<sequence xmlns="http://coko.foundation/xsweet"/>
</xsl:variable>
<xsl:variable name="operations-real" expand-text="yes" as="element(xsw:sequence)">
<xsl:variable name="operations" expand-text="yes" as="element(xsw:sequence)">
<sequence xmlns="http://coko.foundation/xsweet">
<!-- Two adjacent hyphens become an em dash: "\-\-" (escaped in regex) becomes "—" -->
<match replace="&#x2014;">\-\-</match>
......@@ -138,7 +138,7 @@
<match replace="B.C.">B\.&#xA0;C\.</match>
<!-- subsequence to perform all quotation mark munging -->
<!--<munge-quotes/>-->
<munge-quotes/>
<!-- Punctuation-related cleanup - spaces before certain punctuation signs - -->
......@@ -162,15 +162,20 @@
</xsl:apply-templates>
</xsl:template>
<!-- NB THE FOLLOWING IS EXPERIMENTAL WORKS ONLY ON SOME INPUTS note 'inactive' mode -->
<!-- To handle overlap problems within paragraphs, we are doing the sneakiest thing ever. -->
<!-- We match paragraphs and process them to perform all text-node level replacements.
The result is an XML tree (representing the result).
We then post-process this tree by producing a string result of operating a new series of replacements
on the string.
Then we rewrite our temporary tree using the new string to replace its text nodes, node by node, using sibling recursion
and a (tunneled) parameter. -->
and a (tunneled) parameter.
<xsl:template match="p">
This works as long as there is one-for-one correspondence in string length between
input and output character strings.-->
<xsl:template match="p" mode="inactive">
<xsl:variable name="result-tree">
<xsl:next-match/>
</xsl:variable>
......@@ -193,10 +198,12 @@
<!-- Now for the magic - we rewrite $result-tree except stitching the replacement string in to replace its values. -->
<!-- This is about Level 20 XSLT - -->
<xsl:apply-templates select="$result-tree" mode="stitch">
<xsl:copy>
<xsl:copy-of select="$munged-string"/>
</xsl:copy>
<!--<xsl:apply-templates select="$result-tree" mode="stitch">
<xsl:with-param name="thread" select="$munged-string" tunnel="yes"/>
</xsl:apply-templates>
</xsl:apply-templates>-->
</xsl:template>
<xsl:template match="document-node()" mode="stitch">
......@@ -357,7 +364,7 @@
<xsl:template match="xsw:splice" xpath-default-namespace="http://coko.foundation/xsweet">
<!-- Inside this template, path 'prae' amounts to 'xsw:prae' -->
<xsl:param name="original" required="yes" as="text()"/>
<xsl:param name="original" required="yes" as="text()?"/>
<xsl:param name="str" required="yes" as="xs:string"/>
<xsl:variable name="ahead" select="string-join($original/xsw:container(.)/descendant::text()[. &lt;&lt; $original],'')"/>
......@@ -438,7 +445,7 @@
<xsl:variable name="apos" as="xs:string">'</xsl:variable>
<!--<xsl:variable name="quotapos" as="xs:string">['"]</xsl:variable>-->
<xsl:variable name="quote-operations" as="element(xsw:sequence)" expand-text="true">
<xsl:variable name="quote-operations" as="element(xsw:sequence)">
<xsw:sequence>
<!-- Alex's sequence - first, reduce all left- and right-facing quotations marks with their 'straight' analog
......@@ -447,7 +454,7 @@
u201c and u201d -> u0022
also ` and `` to their respective u0027 and u0022
-->
<xsw:match replace="{$apos}" >[&lsquo;&rsquo;]</xsw:match>
<xsw:match replace="{$apos}">[&lsquo;&rsquo;]</xsw:match>
<xsw:match replace="{$quot}" >[&ldquo;&rdquo;]</xsw:match>
<xsw:match replace="{$quot}" >``</xsw:match>
<xsw:match replace="{$apos}" >`</xsw:match>
......@@ -463,19 +470,19 @@
-->
<!-- $livechar is any character except a space or quotation mark (open, close or straight) -->
<xsl:variable name="livechar">[^\s\p{{Ps}}\p{{Pe}}"']</xsl:variable>
<xsl:variable name="livechar">[^\s\p{Ps}\p{Pe}"']</xsl:variable>
<xsl:variable name="singles">['&rsquo;&lsquo;]</xsl:variable>
<xsl:variable name="doubles">["&rdquo;&ldquo;]</xsl:variable>
<xsw:match replace="&ldquo;$1">"({$livechar})</xsw:match>
<xsw:match replace="$1&rdquo;">({$livechar})["&ldquo;]</xsw:match>
<xsw:match replace="&ldquo;$1" xsl:expand-text="true">"({$livechar})</xsw:match>
<xsw:match replace="$1&rdquo;" xsl:expand-text="true">({$livechar})["&ldquo;]</xsw:match>
<xsw:match replace="$1&rsquo;">({$livechar})'</xsw:match>
<xsw:match replace="&lsquo;$1">'({$livechar})</xsw:match>
<xsw:match replace="$1&rsquo;" xsl:expand-text="true">({$livechar})'</xsw:match>
<xsw:match replace="&lsquo;$1" xsl:expand-text="true">'({$livechar})</xsw:match>
<!-- now the combinations -->
<xsw:match replace="&ldquo;$1">{$doubles}([&lsquo;&ldquo;])</xsw:match>
<xsw:match replace="$1&rdquo;">([&rsquo;&rdquo;]){$doubles}</xsw:match>
<xsw:match replace="&ldquo;$1" xsl:expand-text="true">{$doubles}([&lsquo;&ldquo;])</xsw:match>
<xsw:match replace="$1&rdquo;" xsl:expand-text="true">([&rsquo;&rdquo;]){$doubles}</xsw:match>
<!-- Inserting hair spaces between *certain* pairings now -->
<xsw:match replace="&ldquo;&#x200a;&ldquo;">&ldquo;&ldquo;</xsw:match>
......@@ -494,21 +501,20 @@
" 'em" or " ‘em" (space+u0027+"em" or space+u2019+"em") -> " ’em" (space+u2019+"em")
"'n'" or "'n'" (u0027+"n"+u0027 or u2018+"n"+u2018) -> "’n’" (u2019+"n"+u2019)"
" 'tis" (space+u0027+"tis" or space+u2018+"tis") -> " ’tis" (space+u2019+"tis")-->
<xsw:match replace="&rsquo;$1">{$singles}(\d)</xsw:match>
<xsw:match replace="&rsquo;em">{$singles}em</xsw:match>
<xsw:match replace="&rsquo;n&rsquo;">{$singles}n{$singles}</xsw:match>
<xsw:match replace="&rsquo;$1">{$singles}([Tt](wa|i)s\s)</xsw:match>
</xsw:sequence>
<xsw:match replace="&rsquo;$1" xsl:expand-text="true">{$singles}(\d)</xsw:match>
<xsw:match replace="&rsquo;em" xsl:expand-text="true">{$singles}em</xsw:match>
<xsw:match replace="&rsquo;n&rsquo;" xsl:expand-text="true">{$singles}n{$singles}</xsw:match>
<xsw:match replace="&rsquo;$1" xsl:expand-text="true">{$singles}([Tt](wa|i)s\s)</xsw:match> </xsw:sequence>
</xsl:variable>
<xsl:template match="xsw:munge-quotes">
<xsl:param name="original" required="yes" as="text()"/>
<xsl:param name="str" required="yes" as="xs:string"/>
<xsl:apply-templates select="$quote-operations">
<xsl:with-param name="starting" select="$str"/>
<xsl:with-param name="original" select="$original"/>
<xsl:with-param name="starting" select="$str"/>
</xsl:apply-templates>
</xsl:template>
......
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:x="http://www.jenitennison.com/xslt/xspec"
version="2.0">
<xsl:import href="file:/C:/Users/Wendell/Documents/Gitlab/HTMLevator/applications/ucp-cleanup/xspec/text-substitutions.xsl"/>
</xsl:stylesheet>
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="file:/C:/Users/Wendell/AppData/Roaming/com.oxygenxml/extensions/v20.1/frameworks/https___raw.githubusercontent.com_xspec_oXygen_XML_editor_xspec_support_master_build_update_site.xml/xspec.support-1.2.1/src/compiler/format-xspec-report.xsl"?>
<x:report xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:test="http://www.jenitennison.com/xslt/unit-test"
xmlns:x="http://www.jenitennison.com/xslt/xspec"
xmlns="http://www.w3.org/1999/xhtml"
stylesheet="file:/C:/Users/Wendell/Documents/Gitlab/HTMLevator/applications/ucp-cleanup/ucp-text-macros.xsl"
date="2019-05-12T16:42:34.433-04:00"
xspec="file:/C:/Users/Wendell/Documents/Gitlab/HTMLevator/applications/ucp-cleanup/text-substitutions.xspec">
<x:scenario source="file:/C:/Users/Wendell/Documents/Gitlab/HTMLevator/applications/ucp-cleanup/text-substitutions.xspec"
template-id="xbaa809d2-a24e-3d42-9025-37acd773d060">
<x:label>a</x:label>
<x:context>
<body>
<p>baby <b>bold</b>
</p>
<p>"'quote'"</p>
</body>
</x:context>
<x:result>
<body>
<p>baby <b>bold</b>
</p>
<p>“ ‘quote’ ”</p>
</body>
</x:result>
<x:test successful="true">
<x:label>quotes substituted including hair spaces char 200A</x:label>
<x:expect>
<body>
<p>baby <b>bold</b>
</p>
<p>“ ‘quote’ ”</p>
</body>
</x:expect>
</x:test>
</x:scenario>
<x:scenario source="file:/C:/Users/Wendell/Documents/Gitlab/HTMLevator/applications/ucp-cleanup/text-substitutions.xspec"
template-id="x15bc6d43-b57d-327b-98d5-e176d33af497">
<x:label>Big Grab Bag</x:label>
<x:context>
<html>
<head>
<meta charset="UTF-8"/>
</head>
<body>
<div class="docx-body">
<p>
<span class="tab"><!-- tab --></span>
</p>
<p>Paragraph above is just a tab</p>
<p>"'quote'"</p>
<p>'"quote"'</p>
<p>'”quote"‘</p>
<p>‘"quote"’</p>
<p>""quote""</p>
<p>—”</p>
<p>“—</p>
<p>’ quotation ’</p>
<p><!-- empty --></p>
<h2 style="font-family: Helvetica; font-weight: bold">These work:</h2>
<p>"straight quotes"</p>
<p>straight apostrophe'</p>
<p>don't</p>
<p>don‘t</p>
<p>don’t</p>
<p>'simple'</p>
<p>`quote`</p>
<p>``quote``</p>
<p>‘qoutation‘</p>
<p>“quotation“</p>
<p>’quotation’</p>
<p>”quotation”</p>
<p>”quotation``</p>
<p>``quotation”</p>
<p><!-- empty --></p>
<p>sentence with punctuations separated by space .</p>
<p>sentence with punctuations separated by space !</p>
<p>sentence with punctuations separated by space ,</p>
<p>sentence with punctuations separated by space ,</p>
<p>sentence with punctuations separated by space ;</p>
<p>sentence with punctuations separated by space ?</p>
<p>sentence with punctuations separated by 3 spaces ?</p>
<p><!-- empty --></p>
<p><!-- empty --></p>
<p>
<b>this is all bold except for the period</b>.</p>
<p>
<i>this is all italics except for the period</i>.</p>
<p>
<span style="font-family: Helvetica">
<u>this is all underlined</u>
</span>
<a class="bookmarkStart" id="docx-bookmark_0"><!-- bookmark ='_GoBack'--></a>
<a href="#docx-bookmark_0"><!-- bookmark end --></a>
<span style="font-family: Helvetica">
<u> except for the period</u>.</span>
</p>
<p><!-- empty --></p>
<p>Summer of '69</p>
<p>Summer of ‘69</p>
<p><!-- empty --></p>
<p>W.E.B. Dubois</p>
<p>E. B. White</p>
<p><!-- empty --></p>
<p>U.S.</p>
<p>D.C.</p>
<p>A.M.</p>
<p>P.M.</p>
<p>A.D.</p>
<p>B.C.</p>
<p>B.C.E.</p>
<p>A.C.E.</p>
</div>
</body>
</html>
</x:context>
<x:result>
<html>
<head>
<meta charset="UTF-8"/>
</head>
<body>
<div class="docx-body">
<p>
<span class="tab"><!-- tab --></span>
</p>
<p>Paragraph above is just a tab</p>
<p>“ ‘quote’ ”</p>
<p>‘ “quote” ’</p>
<p>‘ “quote” ’</p>
<p>‘ “quote” ’</p>
<p>“ “quote” ’</p>
<p>—”</p>
<p>“—</p>
<p>' quotation '</p>
<p><!-- empty --></p>
<h2 style="font-family: Helvetica; font-weight: bold">These work:</h2>
<p>“straight quotes”</p>
<p>straight apostrophe’</p>
<p>don’t</p>
<p>don’t</p>
<p>don’t</p>
<p>‘simple’</p>
<p>‘quote’</p>
<p>“quote”</p>
<p>‘qoutation’</p>
<p>“quotation”</p>
<p>‘quotation’</p>
<p>“quotation”</p>
<p>“quotation”</p>
<p>“quotation”</p>
<p><!-- empty --></p>
<p>sentence with punctuations separated by space.</p>
<p>sentence with punctuations separated by space!</p>
<p>sentence with punctuations separated by space,</p>
<p>sentence with punctuations separated by space,</p>
<p>sentence with punctuations separated by space;</p>
<p>sentence with punctuations separated by space?</p>
<p>sentence with punctuations separated by 3 spaces?</p>
<p><!-- empty --></p>
<p><!-- empty --></p>
<p>
<b>this is all bold except for the period.</b>
</p>
<p>
<i>this is all italics except for the period.</i>
</p>
<p>
<span style="font-family: Helvetica">
<u>this is all underlined</u>
</span>
<a class="bookmarkStart" id="docx-bookmark_0"><!-- bookmark ='_GoBack'--></a>
<a href="#docx-bookmark_0"><!-- bookmark end --></a>
<span style="font-family: Helvetica">
<u> except for the period.</u>
</span>
</p>
<p><!-- empty --></p>
<p>Summer of ’69</p>
<p>Summer of ’69</p>
<p><!-- empty --></p>
<p>W. E. B. Dubois</p>
<p>E. B. White</p>
<p><!-- empty --></p>
<p>U.S.</p>
<p>D.C.</p>
<p>A.M.</p>
<p>P.M.</p>
<p>A.D.</p>
<p>B.C.</p>
<p>B.C.E.</p>
<p>A.C.E.</p>
</div>
</body>
</html>
</x:result>
<x:test successful="false">
<x:label>Grab Bag of Substitutions</x:label>
<x:expect select="()"/>
</x:test>
</x:scenario>
</x:report>
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment