Commit 4ef10d2f authored by Alf Eaton's avatar Alf Eaton

Initial commit

parents
FROM hubdock/docker-php7-apache-saxonhe
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get install -y libzip-dev \
&& docker-php-ext-install -j$(nproc) zip
&& rm -rf /var/lib/apt/lists/
COPY src/ /var/www/html/
RUN mkdir /input
VOLUME /input
Copyright (c) 2017, Aspiration Tech
MIT License (https://opensource.org/licenses/MIT)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# XSweet DOCX-to-HTML
This repository contains a Docker service for use with the `ConversionServices` INK plugin.
<?php
$saxonProcessor = new Saxon\SaxonProcessor();
// print "Saxon processor version: {$saxon->version()}\n";
$xsltProcessor = $saxonProcessor->newXsltProcessor();
$tmp = tempnam(sys_get_temp_dir(),'xsweet');
unlink($tmp);
mkdir($tmp);
mkdir($tmp . '/input');
$outputFile = tempnam(sys_get_temp_dir(), 'xsweet');
$zip = new ZipArchive;
$zip->open($_FILES['input']['tmp_name']);
$zip->extractTo($tmp . '/input');
$steps = [
[
'xsl/docx-html-extract.xsl',
$tmp . '/input/word/document.xml',
$outputFile
],
'xsl/handle-notes.xsl',
'xsl/join-elements.xsl',
'xsl/scrub.xsl',
'xsl/collapse-paragraphs.xsl',
[
'xsl/digest-paragraphs.xsl',
$outputFile,
$tmp . '/paragraphs.xml'
],
[
'xsl/make-header-escalator-xslt.xsl',
$tmp . '/paragraphs.xml',
$tmp . '/header-escalator.xsl'
],
$tmp . '/header-escalator.xsl',
'xsl/final-rinse.xsl',
'xsl/p-split-around-br.xsl',
'xsl/editoria-notes.xsl',
'xsl/editoria-basic.xsl',
'xsl/editoria-reduce.xsl'
];
foreach ($steps as $step) {
if (!is_array($step)) {
$step = [$step, $outputFile, $outputFile];
}
list($xsl, $input, $output) = $step;
$xsltProcessor->compileFromFile($xsl);
$xsltProcessor->setSourceFromFile($input);
$xsltProcessor->setOutputFile($output);
$xsltProcessor->transformToFile();
$xsltProcessor->clearParameters();
$xsltProcessor->clearProperties();
}
readfile($outputFile);
unlink($outputFile);
// TODO: unlink $tmp recursively
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsw="http://coko.foundation/xsweet"
xmlns="http://www.w3.org/1999/xhtml"
xpath-default-namespace="http://www.w3.org/1999/xhtml"
exclude-result-prefixes="#all">
<!-- Indent should really be no, but for testing. -->
<xsl:output method="xml" indent="no" omit-xml-declaration="yes"/>
<!-- Copy everything by default. -->
<xsl:template match="node() | @*">
<xsl:copy>
<xsl:apply-templates select="node() | @*"/>
</xsl:copy>
</xsl:template>
<!-- Rewrites CSS where p has all its contents in a single branch; display semantics
of that branch are expressed in CSS and overwrite the p element's given @style.
[examples]
<p style="color: #000020; font-size: 13.5pt; margin-left: 72pt">
<span style="color: #000020; font-size: 13.5pt">
<i>All wholesome food is caught without a net or a trap.</i>
</span>
</p>
should be rewritten
<p style="color: #000020; font-size: 13.5pt; font-style: italic; margin-left: 72pt">All wholesome food is caught without a net or a trap.</p>
Note the properties overwritten on descendants are removed, and the 'i' element is rewritten as
font-style='italic'.
Note the following mappings:
i - font-style='italic'.
b - font-weight='bold'.
u - text-decoration='underline'.
-->
<xsl:template match="p">
<xsl:variable name="css-proxy" as="element()">
<style>
<xsl:apply-templates select="@style" mode="as-attributes"/>
<xsl:call-template name="override-styles"/>
</style>
</xsl:variable>
<xsl:copy>
<xsl:copy-of select="@*"/>
<!-- Now overwriting @style ... -->
<xsl:for-each select="$css-proxy[exists(@*)]">
<!-- ... only when there are properties as attributes on $css-proxy ... -->
<xsl:attribute name="style">
<xsl:for-each select="@*">
<xsl:sort data-type="text" select="name()"/>
<xsl:if test="position() gt 1">; </xsl:if>
<xsl:value-of select="name()"/>
<xsl:text>: </xsl:text>
<xsl:value-of select="."/>
</xsl:for-each>
</xsl:attribute>
</xsl:for-each>
<!--<xsl:copy-of select="$css-proxy"/>-->
<xsl:apply-templates/>
</xsl:copy>
</xsl:template>
<!-- We can strip 'span' elements when they are coextensive with their wrapping p and
have nothing but @style to offer, as the latter is being promoted. -->
<xsl:template match="p//span[empty(@class)]
[normalize-space(.) = normalize-space(ancestor::p[1])]">
<xsl:apply-templates/>
</xsl:template>
<!-- Note we leave 'u', 'i' and 'b' in place despite also promoting them to CSS. -->
<xsl:template name="override-styles">
<!-- Under certain conditions, descends tree to collect CSS style property assignments
returning them as attributes (captured on a proxy). -->
<xsl:if test="count(*) eq 1 and normalize-space(.) = normalize-space(*[1])">
<xsl:for-each select="*">
<xsl:apply-templates select=". | @style" mode="as-attributes"/>
<!-- descend recursively -->
<xsl:call-template name="override-styles"/>
</xsl:for-each>
</xsl:if>
</xsl:template>
<!-- 'as-attributes mode' loads up a proxy element with CSS properties. We exploit
the fact that attributes overwrite other attributes of the same name, added
earlier (since attributes must be uniquely named) to de-duplicate our CSS ...
i.e. font-size will come out only once, with whatever value was declared deepest.
Note that elements as well as @style values will prompt CSS properties being added in this way. -->
<xsl:template match="*" mode="as-attributes"/>
<xsl:template match="u" mode="as-attributes">
<xsl:attribute name="text-decoration">underline</xsl:attribute>
</xsl:template>
<xsl:template match="i" mode="as-attributes">
<xsl:attribute name="font-style">italic</xsl:attribute>
</xsl:template>
<xsl:template match="b" mode="as-attributes">
<xsl:attribute name="font-weight">bold</xsl:attribute>
</xsl:template>
<xsl:template match="@style" mode="as-attributes">
<xsl:for-each select="tokenize(.,'\s*;\s*')">
<xsl:attribute name="{replace(.,':.*$','')}" select="replace(.,'^.*:\s*','')"/>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsw="http://coko.foundation/xsweet"
xmlns="http://www.w3.org/1999/xhtml"
xpath-default-namespace="http://www.w3.org/1999/xhtml"
exclude-result-prefixes="#all">
<xsl:output method="xml" indent="no" omit-xml-declaration="yes"/>
<xsl:template match="node() | @*">
<xsl:copy>
<xsl:apply-templates select="node() | @*"/>
</xsl:copy>
</xsl:template>
<!-- Drop on default traversal -->
<xsl:template match="@style"/>
<xsl:template match="*[matches(@style,'\S')]">
<xsl:copy>
<xsl:apply-templates select="@*"/>
<xsl:attribute name="class">
<xsl:value-of select="@class"/>
<xsl:for-each select="@style/../@class"><xsl:text> </xsl:text></xsl:for-each>
<xsl:apply-templates select="@style" mode="styleClass"/>
</xsl:attribute>
<xsl:apply-templates/>
</xsl:copy>
</xsl:template>
<xsl:template match="head">
<xsl:variable name="abstracted-css">
<xsl:call-template name="rewrite-css-styles"/>
</xsl:variable>
<xsl:copy>
<xsl:apply-templates/>
<xsl:if test="matches($abstracted-css,'\S')">
<style type="text/css">
<xsl:sequence select="$abstracted-css"/>
</style>
</xsl:if>
</xsl:copy>
</xsl:template>
<xsl:template name="rewrite-css-styles">
<!-- Note that we depend on styles being in a regular order. Note they are sorted when
they are filtered by scrub.xsl in a previous step. -->
<xsl:for-each-group select="//@style" group-by="string(.)">
<xsl:text>&#xA;.</xsl:text>
<xsl:apply-templates select="." mode="styleClass"/>
<xsl:text> { </xsl:text>
<xsl:value-of select="current-grouping-key()"/>
<xsl:text> }</xsl:text>
</xsl:for-each-group>
</xsl:template>
<!-- Emits a string translating a value sequence into a @class-compatible string -->
<xsl:template match="@style" mode="styleClass">
<xsl:variable name="props" select="tokenize(., '\s*;\s*')"/>
<xsl:value-of>
<xsl:text>xsw_</xsl:text>
<!-- Since we're looking at a sequence of strings, we can't write path expressions (in 2.0). -->
<xsl:for-each select="$props[starts-with(., 'text-align:')]">
<xsl:sequence select="replace(., '(^text-align|[:\s\.])', '')"/>
</xsl:for-each>
<xsl:for-each select="$props[starts-with(., 'margin-')]">
<xsl:text>margin</xsl:text>
<xsl:sequence select="replace(., '(^margin-|[:\s\.])', '')"/>
</xsl:for-each>
<xsl:for-each select="$props[starts-with(., 'padding-')]">
<xsl:text>pad</xsl:text>
<xsl:sequence select="replace(., '(^padding-|[:\s\.])', '')"/>
</xsl:for-each>
<xsl:if test="some $p in $props satisfies matches($p,'^font-(family|size)')">font</xsl:if>
<xsl:for-each select="$props[starts-with(., 'font-family:')]">
<xsl:sequence select="replace(., '(^font-family:|\C)', '')"/>
</xsl:for-each>
<xsl:for-each select="$props[starts-with(., 'font-size:')]">
<xsl:sequence select="replace(., '(^font-size:|\C)', '')"/>
</xsl:for-each>
<xsl:for-each select="$props[starts-with(., 'text-indent:')]">
<xsl:sequence select="replace(., '(^text-|:|\C)', '')"/>
</xsl:for-each>
<xsl:for-each select="$props[matches(., '^\-?xsweet-')]">
<xsl:sequence select="replace(., '(^-?xsweet-|:|\C|\-)', '')"/>
</xsl:for-each>
<xsl:if test="$props = 'font-weight: bold'">bold</xsl:if>
<xsl:if test="$props = 'font-style: italic'">italic</xsl:if>
<xsl:if test="$props = 'text-decoration: underline'">underline</xsl:if>
<xsl:if test="$props = 'font-variant: small-caps'">smallcaps</xsl:if>
<xsl:for-each select="$props[starts-with(., 'color:')]">
<xsl:sequence select="replace(., '^color:|\C', '')"/>
</xsl:for-each>
</xsl:value-of>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
This diff is collapsed.
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
exclude-result-prefixes="#all"
version="2.0">
<xsl:import href="docx-html-extract.xsl"/>
<!-- A 'shell' stylesheet, permitting us to pass a .docx file as an input parameter,
using Java to retrieve the document.xml from inside it and process that file
through imported templates (matching elements in the w: namespace), for "extraction" output. -->
<!-- The full path (URI) to the input docx must be passed at runtime. -->
<xsl:param as="xs:string" name="docx-file-uri" required="yes"/>
<!-- Overriding imported binding yes|no -->
<xsl:param as="xs:string" name="show-css">yes</xsl:param>
<xsl:output indent="no" omit-xml-declaration="yes"/>
<xsl:variable name="document-path" select="concat('jar:',$docx-file-uri,'!/word/document.xml')"/>
<xsl:variable name="document-xml" select="document($document-path)"/>
<xsl:template name="extract">
<!-- Grabbing the document element of document.xml; imported templates will take over. -->
<xsl:apply-templates select="$document-xml/*"/>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
This diff is collapsed.
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsw="http://coko.foundation/xsweet"
xpath-default-namespace="http://www.w3.org/1999/xhtml"
xmlns="http://www.w3.org/1999/xhtml"
exclude-result-prefixes="#all">
<!-- Note the default namespace for matching (given above) is
"http://www.w3.org/1999/xhtml" -->
<!-- The results will have XML syntax but no XML declaration or DOCTYPE declaration
(as permitted by HTML5). -->
<xsl:output method="xml" indent="no" omit-xml-declaration="yes"/>
<!-- By default (when not matched with a template of higher priority)
copy any element and its attributes. -->
<xsl:template match="node() | @*">
<xsl:copy>
<xsl:apply-templates select="node() | @*"/>
</xsl:copy>
</xsl:template>
<!-- Any 'i' element becomes an 'em' element; its attributes are copied. -->
<!-- (Unwanted attributes can be removed in a subsquent step.) -->
<xsl:template match="i">
<em>
<xsl:copy-of select="@*"/>
<xsl:apply-templates/>
</em>
</xsl:template>
<!-- Any 'b' element becomes a 'strong' element; its attributes are copied. -->
<!-- NB note that inline elements may be modified further or stripped in a subsequent 'reduce' step:
the story is not over. -->
<xsl:template match="b">
<strong>
<xsl:copy-of select="@*"/>
<xsl:apply-templates/>
</strong>
</xsl:template>
<!-- 'u' becomes 'i' for Editoria.... -->
<xsl:template match="u">
<i>
<xsl:copy-of select="@*"/>
<xsl:apply-templates/>
</i>
</xsl:template>
<!-- We declare a key that enables us to match elements based on 'class' (attribute) values.
Any element with a non-ws @class may be so matched (and retrieved).
Since 'class' may be overloaded this is a many-to-many match, so template
priority will be important.
i.e. <p class="Quote Special"> matches with both 'Quote' and 'Special' key values.
Note the key matches elements of any type (p, h1, span etc.) so no worries about that; only
as assigned 'class' value (delimited by whitespace) will count. -->
<xsl:key name="elements-by-class"
match="*[matches(@class,'\S')]" use="tokenize(@class,'\s+')"/>
<!-- Calling the key() function, match any element .Quote and emit 'extract', copying attributes.
Explicit @priority assignments prevent template collisions and let us control
the order of preference. -->
<xsl:template match="key('elements-by-class','Quote')" priority="100">
<extract>
<xsl:copy-of select="@*"/>
<xsl:apply-templates/>
</extract>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsw="http://coko.foundation/xsweet"
xpath-default-namespace="http://www.w3.org/1999/xhtml"
xmlns="http://www.w3.org/1999/xhtml"
exclude-result-prefixes="#all">
<!-- Note the default namespace for matching (given above) is
"http://www.w3.org/1999/xhtml" -->
<!-- The results will have XML syntax but no XML declaration or DOCTYPE declaration
(as permitted by HTML5). -->
<xsl:output method="xml" indent="no" omit-xml-declaration="yes"/>
<xsl:key name="elements-by-class"
match="*[matches(@class,'\S')]" use="tokenize(@class,'\s+')"/>
<xsl:key name="internal-links" match="*[@id]" use="concat('#',@id)"/>
<!-- By default (when not matched with a template of higher priority)
copy any element and its attributes. -->
<xsl:template match="node() | @*">
<xsl:copy>
<xsl:apply-templates select="node() | @*"/>
</xsl:copy>
</xsl:template>
<xsl:template priority="10" match="div[@id='docx-body'] | key('elements-by-class','docx-body')">
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="body/div"/>
<xsl:template match="key('elements-by-class','endnoteReference') | key('elements-by-class','footnoteReference')">
<xsl:variable name="contents">
<xsl:apply-templates select="key('internal-links',@href)" mode="plaintext"/>
</xsl:variable>
<!-- note comes out empty since we don't actually want the referencing string (number). -->
<note note-content="{$contents}"/>
</xsl:template>
<xsl:template match="*" mode="plaintext">
<xsl:apply-templates mode="#current"/>
</xsl:template>
<xsl:template match="p" mode="plaintext">
<xsl:for-each select="preceding-sibling::*[1]">
<xsl:text>&#xA;&#xA;</xsl:text>
</xsl:for-each>
<xsl:apply-templates mode="#current"/>
</xsl:template>
<!-- Note references suppressed inside notes. -->
<xsl:template match="key('elements-by-class','endnoteRef')" mode="plaintext"/>
<xsl:template match="key('elements-by-class','footnoteRef')" mode="plaintext"/>
<xsl:template match="text()" mode="plaintext">
<xsl:value-of select="replace(.,'\s+',' ')"/>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsw="http://coko.foundation/xsweet"
xpath-default-namespace="http://www.w3.org/1999/xhtml"
xmlns="http://www.w3.org/1999/xhtml"
exclude-result-prefixes="#all">
<!-- Note the default namespace for matching (given above) is
"http://www.w3.org/1999/xhtml" -->
<!-- The results will have XML syntax but no XML declaration or DOCTYPE declaration
(as permitted by HTML5). -->
<xsl:output method="xml" indent="no" omit-xml-declaration="yes"/>
<!-- Just in case:
<xsl:key name="elements-by-class" match="*[matches(@class,'\S')]"
use="tokenize(@class,'\s+')"/>
-->
<!-- By default we *drop* elements.
Better templates will copy the ones we want. -->
<xsl:template match="*">
<xsl:apply-templates/>
</xsl:template>
<!-- But we keep attributes -->
<xsl:template match="@*">
<xsl:copy-of select="."/>
</xsl:template>
<!-- Sorry guys, until further notice Editoria doesn't know what to do with you. -->
<xsl:template match="comment() | processing-instruction()"/>
<!-- Drop head/style -->
<xsl:template priority="5" match="head/style"/>
<xsl:template match="html | head | head//* | body">
<xsl:apply-templates select="." mode="copy-after-all"/>
</xsl:template>
<!-- We only permit a header to be propagated if it has (non-ws) contents. -->
<xsl:template match="h1| h2 | h3 | h4 | h5 | h6">
<xsl:if test="matches(.,'\S')">
<xsl:apply-templates select="." mode="copy-after-all"/>
</xsl:if>
</xsl:template>
<xsl:template match="p | extract | blockquote | pre">
<xsl:apply-templates select="." mode="copy-after-all"/>
</xsl:template>
<!-- Empty line feeds may be left especially after paragraph splitting in an earlier step. -->
<xsl:template match="p[not(matches(.,'\S'))]"/>
<!-- NB stripping b and strong for now. -->
<xsl:template match="I | sup | sub | a | code | em">
<xsl:apply-templates select="." mode="copy-after-all"/>
</xsl:template>
<xsl:template match="note | note/@*">
<xsl:apply-templates select="." mode="copy-after-all"/>
</xsl:template>
<xsl:template match="node()" mode="comment-in"/>
<xsl:template match="p//* | td//*" mode="comment-in">
<xsl:if test="not(matches(string(.),'\S'))">
<xsl:comment> open/close </xsl:comment>
</xsl:if>
</xsl:template>
<xsl:template match="node() | @*" mode="copy-after-all">
<xsl:copy>
<xsl:apply-templates select="@*"/>
<xsl:apply-templates select="." mode="comment-in"/>
<!-- switching back out of mode -->
<xsl:apply-templates/>
</xsl:copy>
</xsl:template>
<!-- Bye-bye @class, bye-bye @style! -->
<xsl:template match="@class | @style" priority="2"/>
</xsl:stylesheet>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsw="http://coko.foundation/xsweet"
xmlns="http://www.w3.org/1999/xhtml"
xpath-default-namespace="http://www.w3.org/1999/xhtml"
exclude-result-prefixes="#all">
<!-- Removes redundant tagging from HTML based on @style analysis, element type e.g. redundant b, i, u etc. -->
<xsl:output method="xml" indent="no" omit-xml-declaration="yes"/>
<xsl:template match="node() | @*">
<xsl:copy>
<xsl:apply-templates select="node() | @*"/>
</xsl:copy>
</xsl:template>
<xsl:template match="style" priority="11">
<xsl:copy>
<xsl:copy-of select="@*"/>
<xsl:value-of select="replace(string(.),'xsweet','-xsweet')"/>
</xsl:copy>
</xsl:template>
<!-- Disable when auto-indenting - this introduces cosmetic whitespace into
an assumed text-brick. -->
<xsl:template match="head | head//* | body | body/* | p | h1 | h2 | h3 | h4 | h5 | h6" priority="10">
<xsl:text>&#xA;</xsl:text>
<xsl:next-match/>
</xsl:template>
<!-- Insert a comment into any empty div or p so as not to confuse poor HTML parsers. -->
<xsl:template match="div | p | h1 | h2 | h3 | h4 | h5 | h6">
<xsl:copy>
<xsl:apply-templates select="node() | @*"/>
<xsl:if test="empty(*) and not(matches(.,'\S'))">
<xsl:comment> empty </xsl:comment>
</xsl:if>
</xsl:copy>
</xsl:template>
<xsl:template match="span[@class=('EndnoteReference','FootnoteReference')]">
<!-- These spans sometimes contain noise from input, in addition to a (generated) endnote or footnote reference. -->
<!-- Note the named style assignment is directly coded in the Word -->
<!-- <span class="EndnoteReference"><a class="endnoteReference" href="#en5">5</a>6</span>"-->
<xsl:copy>
<xsl:apply-templates select="@*"/>
<xsl:apply-templates select="a"/>
</xsl:copy>
</xsl:template>
<!-- Remove any 'p' element that is truly empty - nothing but whitespace, no elements.
(Empty inline elements were stripped by generic logic: see scrub.xsl.) -->
<!--<xsl:template match="p[not(matches(.,'\S'))][empty(*)]"/>-->
<!-- Rewrite @style to remove properties duplicative of inherited properties -->
<xsl:template match="@style">
<xsl:variable name="here" select=".."/>
<!-- Any CSS properties not declared on an ancestor are significant. -->
<xsl:variable name="significant" as="xs:string*">
<xsl:for-each select="tokenize(.,'\s*;\s*')">
<xsl:variable name="prop" select="."/>
<xsl:variable name="propName" select="replace($prop,':.*$','')"/>
<!-- the property is redundant if the same as the same property on the closest element with the property -->
<xsl:variable name="redundant" select="$here/ancestor::*[contains(@style,$propName)][1]/tokenize(@style,'\s*;\s*') = $prop"/>
<xsl:if test="not($redundant)">
<!-- We have some (pseudo) properties named 'xsweet' these are rewritten for CSS -->
<xsl:sequence select="replace($prop,'\s*^xsweet','-xsweet')"/>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<!-- Only if we have an item in sequence $significant (a sequence of strings) do we produce a new @style. -->
<xsl:if test="exists($significant)">
<xsl:attribute name="style">
<xsl:value-of select="$significant" separator="; "/>
</xsl:attribute>
</xsl:if>
</xsl:template>
<xsl:template match="span[empty(@style|@class)]">
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="span">
<xsl:copy>
<xsl:copy-of select="@*"/>
<xsl:apply-templates select="@style"/>
<xsl:apply-templates/>
</xsl:copy>
</xsl:template>
<xsl:template match="tab">
<span class="tab">&#x9;<xsl:comment> tab </xsl:comment></span>
</xsl:template>
<xsl:template match="b[ancestor::*[contains(@style,'font-weight')][1]/tokenize(@style,'\s*;\s*') = 'font-weight: bold']">
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="i[ancestor::*[contains(@style,'font-style')][1]/tokenize(@style,'\s*;\s*') = 'font-style: italic']">
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="u[ancestor::*[contains(@style,'text-decoration')][1]/tokenize(@style,'\s*;\s*') = 'text-decoration: underline']">
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="span[@style='font-style: italic']">
<i>
<xsl:apply-templates/>
</i>
</xsl:template>
<xsl:template match="span[@style='font-weight: bold']">
<b>
<xsl:apply-templates/>
</b>
</xsl:template>
<xsl:template match="span[@style='text-decoration: underline']">
<u>
<xsl:apply-templates/>
</u>
</xsl:template>
<xsl:template match="b/b | i/i | u/u" priority="5">
<xsl:apply-templates/>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsw="http://coko.foundation/xsweet"
xmlns="http://www.w3.org/1999/xhtml"
xpath-default-namespace="http://www.w3.org/1999/xhtml"
exclude-result-prefixes="#all">