Commit e8d553a1 authored by Wendell Piez's avatar Wendell Piez
parents cc85a2b5 a764dab8
......@@ -17,15 +17,13 @@
<!-- Strip these, retaining their contents. -->
<xsl:template match="position | iCs | lang | vertAlign | noProof">
<!-- Strip these, retaining their contents.
Note some at least may also be suppressed in the extraction,
so "gloves and mittens" -->
<xsl:template match="position | iCs | lang | vertAlign | noProof | kern">
<!-- Matching b, i and u if they have only whitespace text content
this includes <b> </b> and <b><tab/></b> -->
<xsl:template match="caps | strike">
......@@ -43,7 +41,7 @@
<!-- Inline elements that are truly empty can be stripped. -->
<xsl:template match="p//*[empty(.//* except (.//tab|.//span|.//b|.//i|.//u)) and not(matches(.,'\S'))]">
<xsl:template match="p//*[empty(.//* except (.//tab|.//span|.//b|.//i|.//u)) and not(string(.))]">
......@@ -71,7 +71,8 @@
<xsl:variable name="p-proxies">
<!-- Only paragraphs with contents are examined for header promotion.
matches(string(.),'\S') is true iff non-ws content is present. -->
<xsl:apply-templates select="//div[@class = 'docx-body']/p[matches(string(.),'\S')]" mode="digest"/>
<xsl:variable name="matching-ps" select="//div[@class = 'docx-body']/p[matches(string(.),'\S')] except (//table//p | //li//p)"/>
<xsl:apply-templates select="$matching-ps" mode="digest"/>
<!-- Mode 'digest' is the initial (first) pass over the document, which boils down all paragraph-level
......@@ -29,7 +29,7 @@
<!-- Note that generated stylesheet will error if $extra-match-criteria is anything but an XPath filter expression
i.e. '[ booleanExp ]' (with square brackets).
Exposing it as a parameter isn't recommended unless we can defend against arbitrary XPath injection. -->
<xsl:variable name="extra-match-criteria">[string-length(.) &lt;= 200][matches(.,'\S')][ancestor::*/@class='docx-body']</xsl:variable>
<xsl:variable name="extra-match-criteria">[empty(ancestor::table|ancestor::li)][ancestor::*/@class='docx-body'][string-length(.) &lt;= 200][matches(.,'\S')]</xsl:variable>
<xsl:template match="body">
......@@ -12,6 +12,18 @@
<xsl:output method="xml" indent="no" omit-xml-declaration="yes"/>
<xsl:template match="html/head">
<xsl:for-each select="(//h1 | //h2 | //h3)[1]">
<xsl:value-of select="."/>
<xsl:template match="html/head/title"/>
<xsl:template match="node() | @*">
......@@ -3,6 +3,7 @@
<xsl:output method="xml" indent="no" omit-xml-declaration="yes"/>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment