From 3528a3dc0bb9c7d06ae3a5f08808dc365f5fe126 Mon Sep 17 00:00:00 2001 From: Daniel Ecer <de-code@users.noreply.github.com> Date: Mon, 8 Jan 2018 19:07:00 +0000 Subject: [PATCH] improved debugging by raising exception with filename --- .../models/text/crf/crfsuite_training_pipeline.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/sciencebeam_gym/models/text/crf/crfsuite_training_pipeline.py b/sciencebeam_gym/models/text/crf/crfsuite_training_pipeline.py index 39f4a76..dd3a9ef 100644 --- a/sciencebeam_gym/models/text/crf/crfsuite_training_pipeline.py +++ b/sciencebeam_gym/models/text/crf/crfsuite_training_pipeline.py @@ -2,6 +2,8 @@ import logging import argparse import pickle +from six import raise_from + from sciencebeam_gym.utils.file_list_loader import ( load_file_list ) @@ -48,11 +50,18 @@ def parse_args(argv=None): return parser.parse_args(argv) +def load_and_convert_to_token_props(filename): + try: + structured_document = load_structured_document(filename) + return list(structured_document_to_token_props( + structured_document + )) + except StandardError as e: + raise_from(RuntimeError('failed to process %s' % filename), e) + def train_model(file_list): token_props_list_by_document = [ - list(structured_document_to_token_props( - load_structured_document(filename) - )) + load_and_convert_to_token_props(filename) for filename in file_list ] X = [token_props_list_to_features(x) for x in token_props_list_by_document] -- GitLab