Skip to content
Snippets Groups Projects
annot-xml-full.conf 2.24 KiB
Newer Older
Daniel Ecer's avatar
Daniel Ecer committed
[article]
manuscript_title = front/article-meta/title-group/article-title
abstract = front/article-meta/abstract

journal = front/journal-meta/journal-title-group/journal-title
issn = front/journal-meta/issn[@pub-id-type="epub"]
doi = front/article-meta/article-id[@pub-id-type="doi"]
copyright_statement = front/article-meta/permissions/copyright-statement

keywords_title = front/article-meta/kwd-group/title

keywords = front/article-meta/kwd-group
keywords.children = kwd[@kwd-group-type='author-keywords' or not(@kwd-group-type)]
keywords.children.concat = [[{"xpath": "kwd[@kwd-group-type='author-keywords' or not(@kwd-group-type)]"}]]

manuscript_type = front/article-meta/article-categories/subj-group/subject[@subj-group-type="display-channel"]
acknowledgement = back/ack

author_notes = front/article-meta/author-notes/corresp

author = front/article-meta/contrib-group/contrib[not(@contrib-type) or @contrib-type="author"]/name
Daniel Ecer's avatar
Daniel Ecer committed
author.children = .//*
author.bonding = true
author.match-multiple = true
Daniel Ecer's avatar
Daniel Ecer committed
author.sub.surname = ./surname
author.sub.givennames = ./given-names
Daniel Ecer's avatar
Daniel Ecer committed

author_aff =
  front/article-meta/contrib-group/aff
  front/article-meta/contrib-group/contrib/aff
  front/article-meta/aff
author_aff.children = .//*
author_aff.unmatched-parent-text = true
author_aff.bonding = true
author_aff.match-multiple = true
author_aff.extract-regex = .*\b(\d+)\b.*
Daniel Ecer's avatar
Daniel Ecer committed
author_aff.sub.sup = ./sup
author_aff.sub.addrline = ./addr-line
author_aff.sub.addrline.extract-regex = .*\b(\d+)\b.*
Daniel Ecer's avatar
Daniel Ecer committed
author_aff.sub.country = ./country
author_aff.sub.extlink = ./ext-link
Daniel Ecer's avatar
Daniel Ecer committed

main_paragraph = body/p
main_paragraph.ignore = .//fig|table-wrap|disp-formula
section_title = body//sec/title
section_title.regex = (?:\d+\.?)* ?(.*)
Daniel Ecer's avatar
Daniel Ecer committed
section_paragraph = body//sec/p
section_paragraph.ignore = .//fig|fig-group|table-wrap|disp-formula

figure_caption =
  body//fig/caption
  body//fig-group/caption
figure_caption.priority = -1

reference = back/ref-list/ref
reference.children = .//*
reference.children.concat = [[{"xpath": ".//fpage"}, {"value": "-"}, {"xpath": ".//lpage"}]]
reference.bonding = true

page_no = front/article-meta
page_no.children = fpage|lpage
page_no.children.range = [{
    "min": {"xpath": "fpage"},
    "max": {"xpath": "lpage"},
    "standalone": true
  }]