execute_chain.sh 5.3 KB
Newer Older
1 2 3
#!/bin/bash
# For producing HTML5 outputs via XSweet XSLT from sources extracted from .docx (Office Open XML)

4 5 6 7 8 9 10
# This script is most easily invoked via the `xsweet_runner.rb` script, which will iterate through
# unzipped .docx files in a conversion directory. See https://gitlab.coko.foundation/XSweet/XSweet_runner_scripts
# for README.

# Run bash script by itself as: `sh execute_chain.sh [path_to_conversion_folder] [bookname_dir] [docx_filename]`

# 1st arg: path to a 'to_convert' directory
11
P=$1
12
# 2nd arg: name of book directory within 'to_convert' directory, e.g. "great_expectations"
13
BOOKNAME=$2
14
# 3rd arg: name of .docx file to convert, e.g. "1_introduction.docx"
15 16 17
DOCNAME=$3

# Note Saxon is included with this distribution, qv for license.
Alex Theg's avatar
Alex Theg committed
18
saxonHE="java -jar ../lib/SaxonHE9-9-1-1J/saxon9he.jar"  # SaxonHE (XSLT 3.0 processor)
19

20 21 22 23
# INITIAL EXTRACTION
EXTRACT="../applications/docx-extract/docx-html-extract.xsl"
  # RUNS `docx-table-extract.xsl`
NOTES="../applications/docx-extract/handle-notes.xsl"
24 25 26 27
SCRUB="../applications/docx-extract/scrub.xsl"
JOIN="../applications/docx-extract/join-elements.xsl"
COLLAPSEPARA="../applications/docx-extract/collapse-paragraphs.xsl"

28
# TAG PLAIN TEXT STRINGS THAT APPEAR TO BE URLS AS LINKS
29
LINKS="../applications/htmlevator/applications/hyperlink-inferencer/hyperlink-inferencer.xsl"
30 31

# REBUILDS LISTS FROM WORD AS HTML
32
PROMOTELISTS="../applications/list-promote/PROMOTE-lists.xsl"
Alex Theg's avatar
Alex Theg committed
33
  # RUNS `mark-lists.xsl`, THEN `itemize-lists.xsl`
34

35 36
# DETECTS PLAIN-TEXT NUMBERED LISTS
DETECTLISTS="../applications/htmlevator/applications/list-detect/DETECT-ITEMIZE-LISTS.xsl"
Alex Theg's avatar
Alex Theg committed
37
  # RUNS `detect-numbered-lists.xsl`, THEN `itemize-detected-lists.xsl`, THEN `scrub-literal-numbering-lists.xsl`
38

39 40
# HEADER PROMOTION BY OUTLINE LEVEL OR DISPLAY ATTRIBUTES
HEADERCHOOSEANDPROMOTE="../applications/htmlevator/applications/header-promote/header-promotion-CHOOSE.xsl"
41

42
# MATH (OMML TO MML)
43
MATH="../applications/math/xsweet_tei_omml2mml.xsl"
44 45

# FINAL HTML CLEANUPS
46 47
FINALRINSE="../applications/html-polish/final-rinse.xsl"

48
# UNIVERSITY OF CALIFORNIA PRESS COPYEDITING CLEANUPS
Alex Theg's avatar
Alex Theg committed
49
UCPTEXT="../applications/htmlevator/applications/ucp-cleanup/ucp-text-macros.xsl"
50

51 52
# UCP-SPECIFIC SPECIFIC ELEMENT MAPPING
UCPMAP="../applications/htmlevator/applications/ucp-cleanup/ucp-mappings.xsl"
53

54
# EDITORIA TYPESCRIPT
55 56 57 58
SPLITONBR="../applications/typescript/p-split-around-br.xsl"
EDITORIABASIC="../applications/typescript/editoria-basic.xsl"
EDITORIAREDUCE="../applications/typescript/editoria-reduce.xsl"

59
# SERIALIZE TO HTML5
60
XMLTOHTML5="../applications/html-polish/html5-serialize.xsl"
61

62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85

# Intermediate and final outputs (serializations) are all left on the file system.

$saxonHE -xsl:$EXTRACT -s:$P/$DOCNAME/word/document.xml -o:../outputs/$BOOKNAME/$DOCNAME-1EXTRACTED.xhtml
echo Made $DOCNAME-1EXTRACTED.xhtml

$saxonHE -xsl:$NOTES -s:../outputs/$BOOKNAME/$DOCNAME-1EXTRACTED.xhtml -o:../outputs/$BOOKNAME/$DOCNAME-2NOTES.xhtml
echo Made $DOCNAME-2NOTES.xhtml

$saxonHE -xsl:$SCRUB -s:../outputs/$BOOKNAME/$DOCNAME-2NOTES.xhtml -o:../outputs/$BOOKNAME/$DOCNAME-3SCRUBBED.xhtml
echo Made $DOCNAME-3SCRUBBED.xhtml

$saxonHE -xsl:$JOIN -s:../outputs/$BOOKNAME/$DOCNAME-3SCRUBBED.xhtml -o:../outputs/$BOOKNAME/$DOCNAME-4JOINED.xhtml
echo Made $DOCNAME-4JOINED.xhtml

$saxonHE -xsl:$COLLAPSEPARA -s:../outputs/$BOOKNAME/$DOCNAME-4JOINED.xhtml -o:../outputs/$BOOKNAME/$DOCNAME-5COLLAPSED.xhtml
echo Made $DOCNAME-5COLLAPSED.xhtml

$saxonHE -xsl:$LINKS -s:../outputs/$BOOKNAME/$DOCNAME-5COLLAPSED.xhtml -o:../outputs/$BOOKNAME/$DOCNAME-6LINKS.xhtml
echo Made $DOCNAME-6LINKS.xhtml

$saxonHE -xsl:$PROMOTELISTS -s:../outputs/$BOOKNAME/$DOCNAME-6LINKS.xhtml -o:../outputs/$BOOKNAME/$DOCNAME-7PROMOTELISTS.xhtml
echo Made $DOCNAME-7PROMOTELISTS.xhtml

86 87
$saxonHE -xsl:$DETECTLISTS -s:../outputs/$BOOKNAME/$DOCNAME-7PROMOTELISTS.xhtml -o:../outputs/$BOOKNAME/$DOCNAME-8DETECTLISTS.xhtml
echo Made $DOCNAME-8DETECTLISTS.xhtml
88

89 90
$saxonHE -xsl:$HEADERCHOOSEANDPROMOTE -s:../outputs/$BOOKNAME/$DOCNAME-8DETECTLISTS.xhtml -o:../outputs/$BOOKNAME/$DOCNAME-9HEADERSPROMOTED.xhtml
echo Made $DOCNAME-9HEADERSPROMOTED.xhtml
91

92 93
$saxonHE -xsl:$MATH -s:../outputs/$BOOKNAME/$DOCNAME-9HEADERSPROMOTED.xhtml -o:../outputs/$BOOKNAME/$DOCNAME-10MATH.xhtml
echo Made $DOCNAME-10MATH.xhtml
94

95 96
$saxonHE -xsl:$FINALRINSE -s:../outputs/$BOOKNAME/$DOCNAME-10MATH.xhtml -o:../outputs/$BOOKNAME/$DOCNAME-11RINSED.xhtml
echo Made $DOCNAME-11RINSED.xhtml
97

98 99
$saxonHE -xsl:$UCPTEXT -s:../outputs/$BOOKNAME/$DOCNAME-11RINSED.xhtml -o:../outputs/$BOOKNAME/$DOCNAME-12UCPTEXTED.xhtml
echo Made $DOCNAME-12UCPTEXTED.xhtml
100

101 102
$saxonHE -xsl:$UCPMAP -s:../outputs/$BOOKNAME/$DOCNAME-12UCPTEXTED.xhtml -o:../outputs/$BOOKNAME/$DOCNAME-13UCPMAPPED.xhtml
echo Made $DOCNAME-13UCPMAPPED.xhtml
103

104 105
$saxonHE -xsl:$SPLITONBR -s:../outputs/$BOOKNAME/$DOCNAME-13UCPMAPPED.xhtml -o:../outputs/$BOOKNAME/$DOCNAME-14SPLITONBR.xhtml
echo Made $DOCNAME-14SPLITONBR.xhtml
106

107 108
$saxonHE -xsl:$EDITORIABASIC -s:../outputs/$BOOKNAME/$DOCNAME-14SPLITONBR.xhtml -o:../outputs/$BOOKNAME/$DOCNAME-15EDITORIABASIC.xhtml
echo Made $DOCNAME-15EDITORIABASIC.xhtml
109

110 111
$saxonHE -xsl:$EDITORIAREDUCE -s:../outputs/$BOOKNAME/$DOCNAME-15EDITORIABASIC.xhtml -o:../outputs/$BOOKNAME/$DOCNAME-16EDITORIAREDUCE.html
echo Made $DOCNAME-16EDITORIAREDUCE.html
112

113 114
$saxonHE -xsl:$XMLTOHTML5 -s:../outputs/$BOOKNAME/$DOCNAME-16EDITORIAREDUCE.html -o:../outputs/$BOOKNAME/$DOCNAME-17HTML5.html
echo Made $DOCNAME-17HTML5.html