From 40cf553e75d3204d8d5d2e2b11e597527c4006cf Mon Sep 17 00:00:00 2001
From: Daniel Ecer <de-code@users.noreply.github.com>
Date: Sat, 25 Jul 2020 13:20:56 +0200
Subject: [PATCH] allow partial data generation (#55)

---
 scripts/generate-grobid-training-data.sh | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/scripts/generate-grobid-training-data.sh b/scripts/generate-grobid-training-data.sh
index 1179860..2d0ad53 100755
--- a/scripts/generate-grobid-training-data.sh
+++ b/scripts/generate-grobid-training-data.sh
@@ -24,5 +24,15 @@ RAW_TRAINING_DATA_DIR=/tmp/raw-training-data
 
 rm -rf "${RAW_TRAINING_DATA_DIR}"
 
-generate-raw-grobid-training-data.sh "${PDF_DIR}" "${RAW_TRAINING_DATA_DIR}"
+if generate-raw-grobid-training-data.sh "${PDF_DIR}" "${RAW_TRAINING_DATA_DIR}"; then
+    echo "generated raw grobid training data: ${RAW_TRAINING_DATA_DIR}"
+else
+    echo "failed to generate raw grobid training data, error: $?"
+fi
+
+if [ ! "$(ls --almost-all ${RAW_TRAINING_DATA_DIR})" ]; then
+    echo "no raw grobid training data generated: ${RAW_TRAINING_DATA_DIR}"
+    exit 1
+fi
+
 copy-raw-training-data-to-file-structure.sh "${RAW_TRAINING_DATA_DIR}" "${DATASET_DIR}"
-- 
GitLab