diff --git a/sciencebeam_gym/inference_model/extract_to_xml.py b/sciencebeam_gym/inference_model/extract_to_xml.py index 5a0c70f674808b34909bb2f5e840ac3b73edb96b..7436434da3ba4a2264f7184b6c087e6c23d4be5b 100644 --- a/sciencebeam_gym/inference_model/extract_to_xml.py +++ b/sciencebeam_gym/inference_model/extract_to_xml.py @@ -25,7 +25,7 @@ class Tags(object): class XmlPaths(object): TITLE = 'front/article-meta/title-group/article-title' ABSTRACT = 'front/article-meta/abstract' - AUTHOR = 'front/article-meta/contrib-group/contrib/name' + AUTHOR = 'front/article-meta/contrib-group/contrib' AUTHOR_AFF = 'front/article-meta/contrib-group/aff' class SubTags(object): @@ -33,8 +33,8 @@ class SubTags(object): AUTHOR_GIVEN_NAMES = 'givennames' class SubXmlPaths(object): - AUTHOR_SURNAME = 'surname' - AUTHOR_GIVEN_NAMES = 'given-names' + AUTHOR_SURNAME = 'name/surname' + AUTHOR_GIVEN_NAMES = 'name/given-names' def get_logger(): return logging.getLogger(__name__) diff --git a/sciencebeam_gym/inference_model/extract_to_xml_test.py b/sciencebeam_gym/inference_model/extract_to_xml_test.py index 1d8b92cc98c06895d4ee4fcfffeccfb545863717..223a905833b7f53a1aa39c7645b741508e35476b 100644 --- a/sciencebeam_gym/inference_model/extract_to_xml_test.py +++ b/sciencebeam_gym/inference_model/extract_to_xml_test.py @@ -140,6 +140,7 @@ class TestExtractedItemsToXml(object): assert xml_root is not None author = xml_root.find(XmlPaths.AUTHOR) assert author is not None + assert author.tag == 'contrib' assert author.attrib.get('contrib-type') == 'author' class TestMain(object):