From 40cf553e75d3204d8d5d2e2b11e597527c4006cf Mon Sep 17 00:00:00 2001 From: Daniel Ecer <de-code@users.noreply.github.com> Date: Sat, 25 Jul 2020 13:20:56 +0200 Subject: [PATCH] allow partial data generation (#55) --- scripts/generate-grobid-training-data.sh | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/scripts/generate-grobid-training-data.sh b/scripts/generate-grobid-training-data.sh index 1179860..2d0ad53 100755 --- a/scripts/generate-grobid-training-data.sh +++ b/scripts/generate-grobid-training-data.sh @@ -24,5 +24,15 @@ RAW_TRAINING_DATA_DIR=/tmp/raw-training-data rm -rf "${RAW_TRAINING_DATA_DIR}" -generate-raw-grobid-training-data.sh "${PDF_DIR}" "${RAW_TRAINING_DATA_DIR}" +if generate-raw-grobid-training-data.sh "${PDF_DIR}" "${RAW_TRAINING_DATA_DIR}"; then + echo "generated raw grobid training data: ${RAW_TRAINING_DATA_DIR}" +else + echo "failed to generate raw grobid training data, error: $?" +fi + +if [ ! "$(ls --almost-all ${RAW_TRAINING_DATA_DIR})" ]; then + echo "no raw grobid training data generated: ${RAW_TRAINING_DATA_DIR}" + exit 1 +fi + copy-raw-training-data-to-file-structure.sh "${RAW_TRAINING_DATA_DIR}" "${DATASET_DIR}" -- GitLab