Skip to content
Snippets Groups Projects
Unverified Commit 55778774 authored by Daniel Ecer's avatar Daniel Ecer Committed by GitHub
Browse files

added directories to upload dataset (#24)

* added upload-dataset make target

* added dataset directories to upload
parent 7ebb1c2e
No related branches found
No related tags found
No related merge requests found
......@@ -26,6 +26,9 @@ SAMPLE_PDF_URL = https://cdn.elifesciences.org/articles/32671/elife-32671-v2.pdf
# Specify the location where to copy the model to
CLOUD_MODELS_PATH =
# Specify the location where to copy the dataset to
CLOUD_DATATSET_PATH =
NOT_SLOW_PYTEST_ARGS = -m 'not slow'
ARGS =
......@@ -117,6 +120,12 @@ generate-grobid-training-data:
"$(DATASET_DIR)"
upload-dataset:
$(RUN) upload-dataset.sh \
"${DATASET_DIR}" \
"$(CLOUD_DATATSET_PATH)"
copy-raw-header-training-data-to-tei:
$(RUN) bash -c '\
mkdir -p "$(DATASET_DIR)/header/corpus/tei" && \
......
......@@ -45,6 +45,33 @@ sub_dirs=(
"header/corpus/tei"
"header/corpus/tei-raw"
"header/corpus/tei-auto"
"fulltext/corpus/raw"
"fulltext/corpus/tei"
"fulltext/corpus/tei-raw"
"fulltext/corpus/tei-auto"
"figure/corpus/raw"
"figure/corpus/tei"
"figure/corpus/tei-raw"
"figure/corpus/tei-auto"
"reference-segmenter/corpus/raw"
"reference-segmenter/corpus/tei"
"reference-segmenter/corpus/tei-raw"
"reference-segmenter/corpus/tei-auto"
"affiliation-address/corpus"
"affiliation-address/corpus-raw"
"affiliation-address/corpus-auto"
"citation/corpus"
"citation/corpus-raw"
"citation/corpus-auto"
"name/citation/corpus"
"name/citation/corpus-raw"
"name/citation/corpus-auto"
"name/header/corpus"
"name/header/corpus-raw"
"name/header/corpus-auto"
"date/corpus"
"date/corpus-raw"
"date/corpus-auto"
"xml"
)
for sub_dir in "${sub_dirs[@]}"; do
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment