Newer
Older
source prepare-shell.sh
export IMAGE_WIDTH=256
export IMAGE_HEIGHT=256
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
if [ $USE_CLOUD == true ]; then
CLOUD_ARGS="--cloud"
else
CLOUD_ARGS=""
fi
python -m sciencebeam_gym.preprocess.preprocessing_pipeline \
--data-path="$DATA_SOURCE_PATH" \
--pdf-xml-file-list="$FILE_LIST_PATH/file-list-train.tsv" \
--limit=$TRAIN_FILE_LIMIT \
--output-path="${TRAIN_PREPROC_PATH}/" \
--annotation-evaluation-csv="annotation-evaluation.tsv" \
--xml-mapping-path=annot-xml-full.conf \
--min-annotation-percentage=$MIN_ANNOTATION_PERCENTAGE \
--save-svg \
--save-tfrecords \
--image-width ${IMAGE_WIDTH} \
--image-height ${IMAGE_HEIGHT} \
--pages=$PAGE_RANGE \
--num_workers 1 \
$CLOUD_ARGS
python -m sciencebeam_gym.preprocess.preprocessing_pipeline \
--data-path="$DATA_SOURCE_PATH" \
--pdf-xml-file-list="$FILE_LIST_PATH/file-list-validation.tsv" \
--limit=$EVAL_FILE_LIMIT \
--output-path="${EVAL_PREPROC_PATH}/" \
--annotation-evaluation-csv="annotation-evaluation.tsv" \
--xml-mapping-path=annot-xml-full.conf \
--min-annotation-percentage=$MIN_ANNOTATION_PERCENTAGE \
--save-svg \
--save-tfrecords \
--image-width ${IMAGE_WIDTH} \
--image-height ${IMAGE_HEIGHT} \
--pages=$PAGE_RANGE \
--num_workers 1 \
$CLOUD_ARGS
if [ ! -z "$QUALITATIVE_PREPROC_PATH" ]; then
python -m sciencebeam_gym.preprocess.preprocessing_pipeline \
--data-path="$DATA_SOURCE_PATH" \
--pdf-xml-file-list="$FILE_LIST_PATH/file-list-validation.tsv" \
--limit=$QUALITATIVE_FILE_LIMIT \
--output-path="${QUALITATIVE_PREPROC_PATH}/" \
--annotation-evaluation-csv="annotation-evaluation.tsv" \
--xml-mapping-path=annot-xml-full.conf \
--min-annotation-percentage=$MIN_ANNOTATION_PERCENTAGE \
--save-svg \
--save-tfrecords \
--image-width ${IMAGE_WIDTH} \
--image-height ${IMAGE_HEIGHT} \
--pages=$PAGE_RANGE \
--num_workers 1 \
$CLOUD_ARGS
fi