Commit f9757ba4 authored by charlie-ablett's avatar charlie-ablett
Browse files

Add Coko namespace, parameter handling. New Pandoc Epub to Html step

parent 7bc43488
GIT
remote: https://gitlab.coko.foundation/INK/ink-step.git
revision: 41f37a783153249bc17d5ce5a1bb689d3ab0a663
revision: 3ad03cedfdcdf101aeffc0b639b50971d6a77301
specs:
ink_step (0.1.15)
ink_step (1.0.2)
awesome_print
httparty
rubyzip
......@@ -10,19 +10,19 @@ GIT
PATH
remote: .
specs:
inkstep_coko_conversion (0.2.17)
inkstep_coko_conversion (1.0.0)
GEM
remote: https://rubygems.org/
specs:
addressable (2.5.0)
public_suffix (~> 2.0, >= 2.0.2)
awesome_print (1.7.0)
awesome_print (1.8.0)
crack (0.4.3)
safe_yaml (~> 1.0.0)
diff-lcs (1.3)
hashdiff (0.3.2)
httparty (0.15.5)
httparty (0.15.6)
multi_xml (>= 0.5.2)
multi_xml (0.6.0)
public_suffix (2.0.5)
......
require 'coko_conversion/version'
require 'ink_step/conversion_step'
module InkStep
module InkStep::Coko
class CalibreHtmlToEpubStep < InkStep::ConversionStep
def perform_step(options: {})
......@@ -22,6 +22,21 @@ module InkStep
"Converts target .html file to .epub"
end
def required_parameters
# e.g. [:foo, :bar]
[]
end
def accepted_parameters
# e.g. {foo: "For setting the grobblegronx measure", bar: "Can be X, Y or Z"}
{}
end
def default_parameter_values
# e.g. {foo: 1, bar: nil}
{}
end
private
def perform_epub_conversion(source_file_path, destination_file_path)
......
require 'coko_conversion/version'
require 'ink_step/conversion_step'
module InkStep
module InkStep::Coko
class PandocConversionStep < InkStep::ConversionStep
require_parameters(:output_format, :input_format)
def perform_step(options: {})
super
output_format = options[:output_format]
input_format = options[:input_format]
output_format = parameter(options, :output_format)
input_format = parameter(options, :input_format)
source_file_path = find_source_file(regex: [/\.#{input_format}$/])
source_file_name = Pathname(source_file_path).sub_ext ''
output_file_path = File.join(working_directory, "#{source_file_name}.#{output_format}")
......@@ -26,6 +24,21 @@ module InkStep
"Converts target file via pandoc (input and output are specified via parameters)"
end
def required_parameters
# e.g. [:foo, :bar]
[:output_format, :input_format]
end
def accepted_parameters
# e.g. {foo: "For setting the grobblegronx measure", bar: "Can be X, Y or Z"}
{input_format: "The current format of the file", output_format: "The format the file will be converted to."}
end
def default_parameter_values
# e.g. {foo: 1, bar: nil}
{}
end
def perform_conversion(source, destination, input_format, output_format)
# see readme for more info about usage
command = "pandoc #{source} -f #{input_format} -t #{output_format} -s -o #{destination}"
......
require 'coko_conversion/version'
require 'ink_step/conversion_step'
require 'coko_conversion/ink_step/pandoc_conversion_step'
require 'coko_conversion/ink_step/coko/pandoc_conversion_step'
module InkStep
class PandocDocxToHtmlStep < InkStep::PandocConversionStep
module InkStep::Coko
class PandocDocxToHtmlStep < InkStep::Coko::PandocConversionStep
def perform_step(options: {})
new_hash = options.merge(input_format: "docx", output_format: "html")
new_hash = {input_format: parameter(options, :input_format), output_format: parameter(options, :output_format)}.merge(options)
super(options: new_hash)
success!
end
......@@ -18,5 +18,10 @@ module InkStep
def self.description
"Converts target .docx file to .html using pandoc"
end
def default_parameter_values
# e.g. {foo: 1, bar: nil}
{input_format: "docx", output_format: "html"}
end
end
end
\ No newline at end of file
require 'coko_conversion/version'
require 'ink_step/conversion_step'
require 'coko_conversion/ink_step/coko/pandoc_conversion_step'
module InkStep::Coko
class PandocEpubToHtmlStep < PandocConversionStep
# One of many ways to make custom steps.
# Subclassing allows access to parent class behaviours but can customise them more
# than would be possible with just parameters.
def perform_step(options: {})
new_hash = {input_format: parameter(options, :input_format), output_format: parameter(options, :output_format)}.merge(options)
super(options: new_hash)
success!
end
def self.description
"Converts target .epub file to .html via Pandoc"
end
def version
CokoConversion::VERSION
end
def default_parameter_values
# e.g. {foo: 1, bar: nil}
{input_format: "epub", output_format: "html"}
end
end
end
\ No newline at end of file
require 'coko_conversion/version'
require 'ink_step/conversion_step'
require 'coko_conversion/ink_step/pandoc_conversion_step'
require 'coko_conversion/ink_step/coko/pandoc_conversion_step'
module InkStep
class PandocEpubToIcmlStep < InkStep::PandocConversionStep
module InkStep::Coko
class PandocEpubToIcmlStep < PandocConversionStep
def perform_step(options: {})
new_hash = options.merge(input_format: "epub", output_format: "icml")
new_hash = {input_format: parameter(options, :input_format), output_format: parameter(options, :output_format)}.merge(options)
super(options: new_hash)
success!
end
......@@ -18,5 +18,10 @@ module InkStep
def version
CokoConversion::VERSION
end
def default_parameter_values
# e.g. {foo: 1, bar: nil}
{input_format: "epub", output_format: "icml"}
end
end
end
\ No newline at end of file
require 'coko_conversion/version'
require 'ink_step/conversion_step'
module InkStep
module InkStep::Coko
class VivliostyleHtmlToPdfStep < InkStep::ConversionStep
def perform_step(options: {})
......@@ -22,6 +22,21 @@ module InkStep
CokoConversion::VERSION
end
def required_parameters
# e.g. [:foo, :bar]
[]
end
def accepted_parameters
# e.g. {foo: "For setting the grobblegronx measure", bar: "Can be X, Y or Z"}
{}
end
def default_parameter_values
# e.g. {foo: 1, bar: nil}
{}
end
private
def perform_conversion(source_file_path:, output_file_path:)
......
require 'coko_conversion/version'
require 'ink_step/conversion_step'
module InkStep
module InkStep::Coko
class WkHtmlToPdfStep < InkStep::ConversionStep
def perform_step(options: {})
......@@ -22,6 +22,21 @@ module InkStep
"Runs wkhtmltopdf conversion html to pdf"
end
def required_parameters
# e.g. [:foo, :bar]
[]
end
def accepted_parameters
# e.g. {foo: "For setting the grobblegronx measure", bar: "Can be X, Y or Z"}
{}
end
def default_parameter_values
# e.g. {foo: 1, bar: nil}
{}
end
private
def run_html_to_pdf_conversion(source_file_path, destination_file_path)
......
require 'coko_conversion/ink_step/xsweet_pipeline/download_and_execute_xsl_via_saxon'
require 'coko_conversion/ink_step/coko/xsweet_pipeline/download_and_execute_xsl_via_saxon'
module InkStep
module InkStep::Coko
module XsweetPipeline
module DocxExtract
class CollapseParagraphsStep < XsweetPipeline::DownloadAndExecuteXslViaSaxon
......
require 'coko_conversion/ink_step/xsweet_pipeline/download_and_execute_xsl_via_saxon_on_docx'
require 'coko_conversion/ink_step/coko/xsweet_pipeline/download_and_execute_xsl_via_saxon_on_docx'
module InkStep
module InkStep::Coko
module XsweetPipeline
module DocxExtract
class DocxToHtmlExtractStep < DownloadAndExecuteXslViaSaxonOnDocx
......@@ -11,13 +11,13 @@ module InkStep
success!
end
def remote_xsl_location
"https://gitlab.coko.foundation/wendell/XSweet/raw/ink-api-publish/applications/docx-extract/docx-html-extract.xsl"
end
def self.description
"Extracts content from a .docx file to a HTML Typescript-conforming .html file"
end
def remote_xsl_location
"https://gitlab.coko.foundation/wendell/XSweet/raw/ink-api-publish/applications/docx-extract/docx-html-extract.xsl"
end
end
end
end
......
require 'coko_conversion/ink_step/xsweet_pipeline/download_and_execute_xsl_via_saxon'
require 'coko_conversion/ink_step/coko/xsweet_pipeline/download_and_execute_xsl_via_saxon'
module InkStep
module InkStep::Coko
module XsweetPipeline
module DocxExtract
class HandleNotesStep < XsweetPipeline::DownloadAndExecuteXslViaSaxon
......
require 'coko_conversion/ink_step/xsweet_pipeline/download_and_execute_xsl_via_saxon'
require 'coko_conversion/ink_step/coko/xsweet_pipeline/download_and_execute_xsl_via_saxon'
module InkStep
module InkStep::Coko
module XsweetPipeline
module DocxExtract
class JoinElementsStep < XsweetPipeline::DownloadAndExecuteXslViaSaxon
......
require 'coko_conversion/ink_step/xsweet_pipeline/download_and_execute_xsl_via_saxon'
require 'coko_conversion/ink_step/coko/xsweet_pipeline/download_and_execute_xsl_via_saxon'
module InkStep
module InkStep::Coko
module XsweetPipeline
module DocxExtract
class ScrubStep < XsweetPipeline::DownloadAndExecuteXslViaSaxon
......@@ -11,13 +11,13 @@ module InkStep
success!
end
def remote_xsl_location
"https://gitlab.coko.foundation/wendell/XSweet/raw/ink-api-publish/applications/docx-extract/scrub.xsl"
end
def self.description
"Removes superfluous elements (e.g. <i/>)"
end
def remote_xsl_location
"https://gitlab.coko.foundation/wendell/XSweet/raw/ink-api-publish/applications/docx-extract/scrub.xsl"
end
end
end
end
......
......@@ -3,18 +3,17 @@ require 'coko_conversion/utilities/saxon_xsl_methods'
require 'coko_conversion/utilities/zip_methods'
require 'httparty'
module InkStep
module InkStep::Coko
module XsweetPipeline
class DownloadAndExecuteXslViaSaxon < InkStep::ConversionStep
include Utilities::SaxonXslMethods
include Utilities::ZipMethods
attr_accessor :remote_xsl_uri
require_parameters :remote_xsl_uri
def perform_step(options: {})
super
@remote_xsl_uri = options[:remote_xsl_uri]
@remote_xsl_uri = parameter(options, :remote_xsl_uri)
regex = options[:regex] ? Regexp.new(options[:regex]) : [/\.html$/, /\.htm$/]
source_file_relative_path = find_source_file(regex: regex)
source_file_path = File.join(working_directory, source_file_relative_path)
......@@ -30,6 +29,25 @@ module InkStep
end
end
def required_parameters
# e.g. [:foo, :bar]
[:remote_xsl_uri]
end
def accepted_parameters
# e.g. {foo: "For setting the grobblegronx measure", bar: "Can be X, Y or Z"}
{remote_xsl_uri: "The location of the raw XSL file to download"}
end
def default_parameter_values
# e.g. {foo: 1, bar: nil}
if self.respond_to?(:remote_xsl_location)
{remote_xsl_uri: remote_xsl_location}
else
{}
end
end
def reset_xsl_path
@downloaded_file_name = nil
end
......
......@@ -3,18 +3,17 @@ require 'coko_conversion/utilities/saxon_xsl_methods'
require 'coko_conversion/utilities/zip_methods'
require 'httparty'
module InkStep
module InkStep::Coko
module XsweetPipeline
class DownloadAndExecuteXslViaSaxonOnDocx < InkStep::ConversionStep
include Utilities::SaxonXslMethods
include Utilities::ZipMethods
attr_accessor :remote_xsl_uri
require_parameters :remote_xsl_uri
def perform_step(options: {})
super
@remote_xsl_uri = options[:remote_xsl_uri]
@remote_xsl_uri = parameter(options, :remote_xsl_uri)
source_file_path = find_source_file(regex: /\.docx$/)
source_file_name = Pathname(source_file_path).sub_ext ''
output_file_path = File.join(working_directory, "#{source_file_name}.html")
......@@ -37,6 +36,33 @@ module InkStep
downloaded_file.close
end
def self.description
"Downloads and applies the target XSL sheet URL against the target docx file"
end
def version
CokoConversion::VERSION
end
def required_parameters
# e.g. [:foo, :bar]
[:remote_xsl_uri]
end
def accepted_parameters
# e.g. {foo: "For setting the grobblegronx measure", bar: "Can be X, Y or Z"}
{remote_xsl_uri: "The location of the raw XSL file to download"}
end
def default_parameter_values
# e.g. {foo: 1, bar: nil}
if self.respond_to?(:remote_xsl_location)
{remote_xsl_uri: remote_xsl_location}
else
{}
end
end
def xsl_file_path
File.join(working_directory, @downloaded_file_name)
end
......@@ -45,14 +71,6 @@ module InkStep
parsed_uri = URI.parse(uri)
File.basename(parsed_uri.path)
end
def self.description
"Downloads and applies the target XSL sheet URL against the target docx file"
end
def version
CokoConversion::VERSION
end
end
end
end
\ No newline at end of file
require 'coko_conversion/ink_step/xsweet_pipeline/download_and_execute_xsl_via_saxon'
require 'coko_conversion/ink_step/coko/xsweet_pipeline/download_and_execute_xsl_via_saxon'
module InkStep
module InkStep::Coko
module XsweetPipeline
module FinaliseTypescript
class FinalRinseStep < XsweetPipeline::DownloadAndExecuteXslViaSaxon
......@@ -10,29 +10,15 @@ module InkStep
log_as_step "applying final rinse xsl..."
super(options: new_hash)
success!
# regex = options[:regex] ? Regexp.new(options[:regex]) : [/\.html$/, /\.htm$/]
# source_file_relative_path = find_source_file(regex: regex)
# source_file_path = File.join(working_directory, source_file_relative_path)
#
# log_as_step "applying final rinse xsl on #{source_file_relative_path}..."
# download_file(final_rinse_xsl_uri)
# apply_xslt_transformation(input_file_path: source_file_path,
# output_file_path: source_file_path,
# xsl_file_path: xsl_file_path,
# provided_saxon_jar_path: nil)
# @successful = true
end
def self.description
"Tidies up the HTML by removing noise."
end
def remote_xsl_location
"https://gitlab.coko.foundation/wendell/XSweet/raw/ink-api-publish/applications/html-polish/final-rinse.xsl"
end
def self.description
"Tidies up the HTML by removing noise."
end
end
end
end
......
require 'coko_conversion/ink_step/xsweet_pipeline/download_and_execute_xsl_via_saxon'
require 'coko_conversion/ink_step/coko/xsweet_pipeline/download_and_execute_xsl_via_saxon'
module InkStep
module InkStep::Coko
module XsweetPipeline
module HeaderPromote
class HeaderPromotionStep < XsweetPipeline::DownloadAndExecuteXslViaSaxon
......@@ -12,14 +12,14 @@ module InkStep
log_as_step "digesting paragraphs on #{source_file_relative_path} to make #{paragraphs_digested_path}..."
download_file(digest_paragraphs_xsl_uri)
download_file(parameter(options, :digest_paragraphs_xsl_uri))
apply_xslt_transformation(input_file_path: source_file_path,
output_file_path: paragraphs_digested_path,
xsl_file_path: xsl_file_path,
provided_saxon_jar_path: nil)
log_as_step "creating header escalator xslt (#{header_escalator_xslt_path}..."
download_file(make_header_excalator_xslt_uri)
download_file(parameter(options, :make_header_excalator_xslt_uri))
apply_xslt_transformation(input_file_path: paragraphs_digested_path,
output_file_path: header_escalator_xslt_path,
xsl_file_path: xsl_file_path,
......@@ -37,24 +37,34 @@ module InkStep
success!
end
def header_escalator_xslt_path
File.join(working_directory, "header_escalator.xslt")
def self.description
"Detects headers by font size and promotes them to be headers (e.g. h2)"
end
def paragraphs_digested_path
File.join(working_directory, "paragraphs_digested.html")
def required_parameters
[:make_header_excalator_xslt_uri, :digest_paragraphs_xsl_uri]
end
def digest_paragraphs_xsl_uri
"https://gitlab.coko.foundation/wendell/XSweet/raw/ink-api-publish/applications/header-promote/digest-paragraphs.xsl"
def accepted_parameters
{
make_header_excalator_xslt_uri: "Location of raw XSL file to download - header escalator",
digest_paragraphs_xsl_uri: "Location of raw XSL file to download - paragraph digester"
}
end
def make_header_excalator_xslt_uri
"https://gitlab.coko.foundation/wendell/XSweet/raw/ink-api-publish/applications/header-promote/make-header-escalator-xslt.xsl"
def default_parameter_values
{
make_header_excalator_xslt_uri: "https://gitlab.coko.foundation/wendell/XSweet/raw/ink-api-publish/applications/header-promote/make-header-escalator-xslt.xsl",
digest_paragraphs_xsl_uri: "https://gitlab.coko.foundation/wendell/XSweet/raw/ink-api-publish/applications/header-promote/digest-paragraphs.xsl"
}
end
def self.description
"Detects headers by font size and promotes them to be headers (e.g. h2)"
def header_escalator_xslt_path
File.join(working_directory, "header_escalator.xslt")
end
def paragraphs_digested_path
File.join(working_directory, "paragraphs_digested.html")
end
end
end
......
require 'coko_conversion/ink_step/xsweet_pipeline/download_and_execute_xsl_via_saxon'
require 'coko_conversion/ink_step/coko/xsweet_pipeline/download_and_execute_xsl_via_saxon'
module InkStep
module InkStep::Coko
module XsweetPipeline
module PrepareForEditoria
class EditoriaPrepareStep < XsweetPipeline::DownloadAndExecuteXslViaSaxon
def perform_step(options: {})
regex = options[:regex] ? Regexp.new(options[:regex]) : [/\.html$/, /\.htm$/]
regex = parameter(options, :regex) ? Regexp.new(options[:regex]) : [/\.html$/, /\.htm$/]
source_file_relative_path = find_source_file(regex: regex)
source_file_path = File.join(working_directory, source_file_relative_path)
log_as_step "splitting ps on breaks in #{source_file_relative_path} to make #{p_split_on_br_done_path}..."
download_file(p_split_on_br_uri)
download_file(parameter(options, :p_split_on_br_uri))
apply_xslt_transformation(input_file_path: source_file_path,
output_file_path: p_split_on_br_done_path,