Skip to content
Snippets Groups Projects
Commit 78ac72a5 authored by Daniel Ecer's avatar Daniel Ecer
Browse files

add contrib-type="author" to extracted authors

parent b06d0f6c
No related branches found
No related tags found
No related merge requests found
......@@ -78,10 +78,11 @@ def create_xml_text(xml_root, path, text):
return node
class XmlMapping(object):
def __init__(self, xml_path, single_node=False, sub_mapping=None):
def __init__(self, xml_path, single_node=False, sub_mapping=None, attrib=None):
self.xml_path = xml_path
self.single_node = single_node
self.sub_mapping = sub_mapping
self.attrib = attrib
def _extract_items(parent_node, extracted_items, xml_mapping):
previous_tag = None
......@@ -93,10 +94,7 @@ def _extract_items(parent_node, extracted_items, xml_mapping):
get_logger().warning('tag not configured: %s', tag)
continue
path = mapping_entry.xml_path
if extracted_item.sub_items and mapping_entry.sub_mapping:
node = create_and_append_xml_node(parent_node, path)
_extract_items(node, extracted_item.sub_items, mapping_entry.sub_mapping)
elif mapping_entry.single_node:
if mapping_entry.single_node:
node = create_node_recursive(parent_node, path, exists_ok=True)
if node.text is None:
node.text = extracted_item.text
......@@ -105,7 +103,14 @@ def _extract_items(parent_node, extracted_items, xml_mapping):
else:
get_logger().debug('ignoring tag %s, after tag %s', tag, previous_tag)
else:
create_xml_text(parent_node, path, extracted_item.text)
node = create_and_append_xml_node(parent_node, path)
if mapping_entry.attrib:
for k, v in mapping_entry.attrib.items():
node.attrib[k] = v
if extracted_item.sub_items and mapping_entry.sub_mapping:
_extract_items(node, extracted_item.sub_items, mapping_entry.sub_mapping)
else:
node.text = extracted_item.text
previous_tag = tag
def extracted_items_to_xml(extracted_items):
......@@ -115,6 +120,8 @@ def extracted_items_to_xml(extracted_items):
Tags.AUTHOR: XmlMapping(XmlPaths.AUTHOR, sub_mapping={
SubTags.AUTHOR_GIVEN_NAMES: XmlMapping(SubXmlPaths.AUTHOR_GIVEN_NAMES),
SubTags.AUTHOR_SURNAME: XmlMapping(SubXmlPaths.AUTHOR_SURNAME)
}, attrib={
'contrib-type': 'author'
}),
Tags.AUTHOR_AFF: XmlMapping(XmlPaths.AUTHOR_AFF)
}
......
......@@ -27,6 +27,14 @@ TEXT_1 = 'some text here'
TEXT_2 = 'more text to come'
TEXT_3 = 'does not stop here'
def _create_author_extracted_items(given_names, surname):
return [
ExtractedItem(Tags.AUTHOR, ' '.join([given_names, surname]), sub_items=[
ExtractedItem(SubTags.AUTHOR_GIVEN_NAMES, given_names),
ExtractedItem(SubTags.AUTHOR_SURNAME, surname)
])
]
class TestExtractedItemsToXml(object):
def test_should_return_empty_xml_for_no_empty_list_of_extracted_items(self):
xml_root = extracted_items_to_xml([])
......@@ -86,6 +94,13 @@ class TestExtractedItemsToXml(object):
assert get_text_content(author.find(SubXmlPaths.AUTHOR_GIVEN_NAMES)) == TEXT_2
assert get_text_content(author.find(SubXmlPaths.AUTHOR_SURNAME)) == TEXT_3
def test_should_add_contrib_type_author_attribute(self):
xml_root = extracted_items_to_xml(_create_author_extracted_items(TEXT_1, TEXT_2))
assert xml_root is not None
author = xml_root.find(XmlPaths.AUTHOR)
assert author is not None
assert author.attrib.get('contrib-type') == 'author'
class TestMain(object):
def test_should_extract_from_simple_annotated_document(self):
with TemporaryDirectory() as path:
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment