Skip to content
Snippets Groups Projects
Unverified Commit a703625a authored by Daniel Ecer's avatar Daniel Ecer Committed by GitHub
Browse files

added support for renamed headers/corpus/raw dir (#72)

parent 540f4065
No related branches found
No related tags found
No related merge requests found
...@@ -75,11 +75,21 @@ copy_segmentation_files() { ...@@ -75,11 +75,21 @@ copy_segmentation_files() {
} }
copy_header_files() { copy_header_files() {
copy_and_rename_tei_and_raw_training_files \ if [ -d "/opt/grobid-source/grobid-trainer/resources/dataset/header/corpus/headers" ]; then
"$DATASET_DIR/header/corpus/tei-raw" \ # prior GROBID 0.6.1
"*.header.tei.xml" \ copy_and_rename_tei_and_raw_training_files \
"$DATASET_DIR/header/corpus/headers" \ "$DATASET_DIR/header/corpus/tei-raw" \
"*.header" "*.header.tei.xml" \
"$DATASET_DIR/header/corpus/headers" \
"*.header"
else
# from GROBID 0.6.1
copy_and_rename_tei_and_raw_training_files \
"$DATASET_DIR/header/corpus/tei-raw" \
"*.header.tei.xml" \
"$DATASET_DIR/header/corpus/raw" \
"*.header"
fi
} }
copy_fulltext_files() { copy_fulltext_files() {
......
...@@ -66,10 +66,19 @@ if [ "${MODEL_NAME}" == "segmentation" ]; then ...@@ -66,10 +66,19 @@ if [ "${MODEL_NAME}" == "segmentation" ]; then
"segmentation/corpus/tei" "segmentation/corpus/tei"
) )
elif [ "${MODEL_NAME}" == "header" ]; then elif [ "${MODEL_NAME}" == "header" ]; then
sub_dirs=( if [ -d "/opt/grobid-source/grobid-trainer/resources/dataset/header/corpus/headers" ]; then
"header/corpus/headers" # prior GROBID 0.6.1
"header/corpus/tei" sub_dirs=(
) "header/corpus/headers"
"header/corpus/tei"
)
else
# from GROBID 0.6.1
sub_dirs=(
"header/corpus/raw"
"header/corpus/tei"
)
fi
elif [ "${MODEL_NAME}" == "fulltext" ]; then elif [ "${MODEL_NAME}" == "fulltext" ]; then
sub_dirs=( sub_dirs=(
"fulltext/corpus/raw" "fulltext/corpus/raw"
......
...@@ -41,6 +41,7 @@ sub_dirs=( ...@@ -41,6 +41,7 @@ sub_dirs=(
"segmentation/corpus/tei" "segmentation/corpus/tei"
"segmentation/corpus/tei-raw" "segmentation/corpus/tei-raw"
"segmentation/corpus/tei-auto" "segmentation/corpus/tei-auto"
"header/corpus/raw"
"header/corpus/headers" "header/corpus/headers"
"header/corpus/tei" "header/corpus/tei"
"header/corpus/tei-raw" "header/corpus/tei-raw"
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment