Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Julien Tremblay McLellan
XSweet
Commits
eeab0bec
Commit
eeab0bec
authored
Aug 13, 2018
by
Alex Theg
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ignore b,i,u inline tags with val=false
parent
71bee4ca
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
31 additions
and
31 deletions
+31
-31
applications/docx-extract/docx-html-extract.xsl
applications/docx-extract/docx-html-extract.xsl
+31
-31
No files found.
applications/docx-extract/docx-html-extract.xsl
View file @
eeab0bec
...
...
@@ -8,14 +8,14 @@
xmlns:o=
"urn:schemas-microsoft-com:office:office"
xmlns:r=
"http://schemas.openxmlformats.org/officeDocument/2006/relationships"
xmlns:a=
"http://schemas.openxmlformats.org/drawingml/2006/main"
xmlns=
"http://www.w3.org/1999/xhtml"
xmlns:xsw=
"http://coko.foundation/xsweet"
exclude-result-prefixes=
"#all"
>
<!-- XSweet: step 1 of docx extraction - pulling the main text, notes and styles.... [3a] -->
<!-- Input: a WordML document.xml file as extracted from .docx input, with its related (neighbor) files in place -->
<!-- Output: Spammy HTML, pretty cruddy, expect to perform cleanup ... -->
<!-- For docs on WordML, see (at least):
...
...
@@ -241,7 +241,7 @@
</xsl:template>
<!-- more table handling in module docx-table-extract.xsl -->
<!-- Drop in default traversal -->
<xsl:template
match=
"w:pPr"
/>
...
...
@@ -379,34 +379,34 @@
<xsl:template
priority=
"10"
match=
"w:rPr/w:kern | w:rPr/w:color[@w:val='000000']"
>
<xsl:call-template
name=
"tuck-next"
/>
</xsl:template>
<xsl:template
priority=
"10"
match=
"w:rPr/w:b[@w:val=('0','none')]"
>
<xsl:template
priority=
"10"
match=
"w:rPr/w:b[@w:val=('0','none'
,'false'
)]"
>
<span
style=
"font-weight: normal"
>
<xsl:call-template
name=
"tuck-next"
/>
</span>
</xsl:template>
<xsl:template
priority=
"10"
match=
"w:rPr/w:i[@w:val=('0','none')]"
>
<xsl:template
priority=
"10"
match=
"w:rPr/w:i[@w:val=('0','none'
,'false'
)]"
>
<span
style=
"font-style: normal"
>
<xsl:call-template
name=
"tuck-next"
/>
</span>
</xsl:template>
<xsl:template
priority=
"10"
match=
"w:rPr/w:u[@w:val=('0','none')]"
>
<xsl:template
priority=
"10"
match=
"w:rPr/w:u[@w:val=('0','none'
,'false'
)]"
>
<span
style=
"text-decoration: none"
>
<xsl:call-template
name=
"tuck-next"
/>
</span>
</xsl:template>
<!--<xsl:template priority="10" match="w:rPr/w:smallCaps[@w:val=('0','none')]">
<span style="font-variant: normal">
<xsl:call-template name="tuck-next"/>
</span>
</xsl:template>-->
<!-- http://webapp.docx4java.org/OnlineDemo/ecma376/WordML/ST_VerticalAlignRun.html -->
<!--<w:vertAlign w:val="superscript"/>-->
<xsl:template
priority=
"4"
match=
"w:rPr/w:vertAlign[@w:val='superscript']"
>
...
...
@@ -538,7 +538,7 @@
<xsl:apply-templates
mode=
"#current"
/>
</xsw:style>
</xsl:template>
<xsl:template
mode=
"build-properties"
as=
"element()*"
match=
"w:tblSstyle"
>
<!--w:link pulls in character level styles - and gets us infinite loops ... -->
<xsl:apply-templates
mode=
"#current"
select=
"key('styles-by-id',@w:val, $styles)"
/>
...
...
@@ -546,7 +546,7 @@
<xsl:apply-templates
mode=
"#current"
/>
</xsw:style>
</xsl:template>
<xsl:template
mode=
"build-properties"
as=
"element(xsw:prop)*"
match=
"w:ind"
>
<xsl:apply-templates
mode=
"#current"
select=
"@w:left | @w:right | @w:firstLine | @w:hanging"
/>
</xsl:template>
...
...
@@ -585,10 +585,10 @@
</xsl:variable>
<xsw:prop
name=
"{$property-name}"
><xsl:value-of
select=
". div 20"
/>
pt
</xsw:prop>
</xsl:template>
<!-- Suppress @w:left when there is a @w:hanging .... -->
<xsl:template
mode=
"build-properties"
priority=
"2"
as=
"element(xsw:prop)*"
match=
"w:ind[matches(@w:hanging,'\S')]/@w:left"
/>
<!-- With apologies, not supporting other values of text alignment in Word. -->
<xsl:template
priority=
"2"
mode=
"build-properties"
as=
"element(xsw:prop)*"
match=
"w:jc[@w:val=('left','right','center','both')]"
>
<xsw:prop
name=
"text-align"
>
...
...
@@ -614,22 +614,22 @@
<xsl:apply-templates
mode=
"set-property"
select=
"."
/>
</xsw:prop>
</xsl:template>
<xsl:template
mode=
"set-property"
match=
"w:b[@w:val=('0','none')]"
>
normal
</xsl:template>
<xsl:template
mode=
"set-property"
match=
"w:b"
>
bold
</xsl:template>
<!-- Note italics, bold and underline are dropped except when set in a style. They are picked
up through the "tucking" traversal. -->
<xsl:template
mode=
"build-properties"
as=
"element(xsw:prop)"
match=
"w:style//w:i"
>
<xsw:prop
name=
"font-style"
>
<xsl:apply-templates
mode=
"set-property"
select=
"."
/>
</xsw:prop>
</xsl:template>
<xsl:template
mode=
"set-property"
match=
"w:i[@w:val=('0','none')]"
>
normal
</xsl:template>
<xsl:template
mode=
"set-property"
match=
"w:i"
>
italic
</xsl:template>
<!-- Inoperable when no value is given -->
<xsl:template
mode=
"build-properties"
as=
"element(xsw:prop)?"
match=
"w:style//w:u[empty(@w:val)]"
priority=
"2"
/>
<xsl:template
mode=
"build-properties"
as=
"element(xsw:prop)"
match=
"w:style//w:u"
>
...
...
@@ -637,10 +637,10 @@
<xsl:apply-templates
mode=
"set-property"
select=
"."
/>
</xsw:prop>
</xsl:template>
<xsl:template
mode=
"set-property"
match=
"w:u[@w:val=('0','none')]"
>
none
</xsl:template>
<xsl:template
mode=
"set-property"
match=
"w:u"
>
underline
</xsl:template>
<xsl:template
mode=
"build-properties"
as=
"element(xsw:prop)*"
match=
"w:szCs[. = (../w:sz)]"
/>
<!-- Font size for complex scripts (szCs) is just noise. -->
...
...
@@ -653,11 +653,11 @@
<xsl:template
mode=
"build-properties"
as=
"element(xsw:prop)"
match=
"w:smallCaps[not(@w:val=('0','none'))]"
>
<xsw:prop
name=
"font-variant"
>
small-caps
</xsw:prop>
</xsl:template>
<xsl:template
mode=
"build-properties"
as=
"element(xsw:prop)"
match=
"w:smallCaps[@w:val=('0','none')]"
>
<xsw:prop
name=
"font-variant"
>
normal
</xsw:prop>
</xsl:template>
<xsl:template
mode=
"build-properties"
as=
"element(xsw:prop)*"
match=
"w:color"
>
<xsl:if
test=
"not(@w:val='000000')"
>
<xsw:prop
name=
"color"
>
...
...
@@ -760,8 +760,8 @@
</xsl:for-each>
</img>
</xsl:template>
<xsl:template
match=
"v:imagedata"
>
<img>
...
...
@@ -771,8 +771,8 @@
</xsl:for-each>
</img>
</xsl:template>
<xsl:include
href=
"docx-table-extract.xsl"
/>
</xsl:stylesheet>
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment