Skip to content
Snippets Groups Projects
Commit 30199223 authored by Daniel Ecer's avatar Daniel Ecer
Browse files

added abstract

parent a5f8863b
No related branches found
No related tags found
No related merge requests found
......@@ -19,9 +19,11 @@ from sciencebeam_gym.inference_model.extract_from_annotated_document import (
class Tags(object):
TITLE = 'manuscript_title'
ABSTRACT = 'abstract'
class XmlPaths(object):
TITLE = 'front/article-meta/title-group/article-title'
ABSTRACT = 'front/article-meta/abstract'
def get_logger():
return logging.getLogger(__name__)
......@@ -35,7 +37,7 @@ def rsplit_xml_path(path):
def create_node_recursive(xml_root, path, exists_ok=False):
node = xml_root.find(path)
if node:
if node is not None:
if not exists_ok:
raise RuntimeError('xml node already exists: %s' % path)
return node
......@@ -49,13 +51,17 @@ def create_node_recursive(xml_root, path, exists_ok=False):
return node
def set_xml_text(xml_root, path, text):
node = create_node_recursive(xml_root, path, exists_ok=False)
node.text = text
node = create_node_recursive(xml_root, path, exists_ok=True)
if node.text is None:
node.text = text
else:
node.text += '\n' + text
return node
def extracted_items_to_xml(extracted_items):
simple_xml_mapping = {
Tags.TITLE: XmlPaths.TITLE
Tags.TITLE: XmlPaths.TITLE,
Tags.ABSTRACT: XmlPaths.ABSTRACT
}
xml_root = E.article()
for extracted_item in extracted_items:
......
......@@ -21,6 +21,7 @@ from sciencebeam_gym.inference_model.extract_to_xml import (
)
TEXT_1 = 'some text here'
TEXT_2 = 'more text to come'
class TestExtractedItemsToXml(object):
def test_should_return_empty_xml_for_no_empty_list_of_extracted_items(self):
......@@ -34,6 +35,14 @@ class TestExtractedItemsToXml(object):
assert xml_root is not None
assert get_text_content(xml_root.find(XmlPaths.TITLE)) == TEXT_1
def test_should_append_to_abstract(self):
xml_root = extracted_items_to_xml([
ExtractedItem(Tags.ABSTRACT, TEXT_1),
ExtractedItem(Tags.ABSTRACT, TEXT_2)
])
assert xml_root is not None
assert get_text_content(xml_root.find(XmlPaths.ABSTRACT)) == '\n'.join([TEXT_1, TEXT_2])
class TestMain(object):
def test_should_extract_from_simple_annotated_document(self):
with TemporaryDirectory() as path:
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment