diff --git a/Makefile b/Makefile
index 1935aacb592692414d6398a7ec446c794a65d9b9..794aa8f0a55ff4eae03d5b84deaa84daf10990c3 100644
--- a/Makefile
+++ b/Makefile
@@ -22,6 +22,7 @@ TRAIN_ARGS =
 
 USER_AGENT = Dummy/user-agent
 SAMPLE_PDF_URL = https://cdn.elifesciences.org/articles/32671/elife-32671-v2.pdf
+SAMPLE_PDF_URL_2 = https://www.biorxiv.org/content/10.1101/452433v1.full.pdf
 
 # Specify the location where to copy the model to
 CLOUD_MODELS_PATH =
@@ -110,6 +111,8 @@ get-example-data:
 		mkdir -p "$(PDF_DATA_DIR)" \
 		&& curl --fail --show-error --connect-timeout 60 --user-agent "$(USER_AGENT)" --location \
 			"$(SAMPLE_PDF_URL)" --silent -o "$(PDF_DATA_DIR)/sample.pdf" \
+		&& curl --fail --show-error --connect-timeout 60 --user-agent "$(USER_AGENT)" --location \
+			"$(SAMPLE_PDF_URL_2)" --silent -o "$(PDF_DATA_DIR)/sample_2.pdf" \
 		&& ls -l "$(PDF_DATA_DIR)" \
 		'
 
@@ -255,6 +258,39 @@ upload-figure-model:
 	$(RUN) upload-model.sh "$(CLOUD_MODELS_PATH)" "figure"
 
 
+copy-raw-table-training-data-to-tei:
+	$(RUN) bash -c '\
+		mkdir -p "$(DATASET_DIR)/table/corpus/tei" && \
+		cp "$(DATASET_DIR)/table/corpus/tei-raw/"*.xml "$(DATASET_DIR)/table/corpus/tei/" \
+		'
+
+
+train-table-model-with-dataset:
+	$(RUN) train-model.sh \
+		--dataset "$(DATASET_DIR)" \
+		--model table \
+		$(TRAIN_ARGS)
+
+
+train-table-model-with-default-dataset:
+	$(RUN) train-model.sh \
+		--use-default-dataset \
+		--model table \
+		$(TRAIN_ARGS)
+
+
+train-table-model-with-dataset-and-default-dataset:
+	$(RUN) train-model.sh \
+		--dataset "$(DATASET_DIR)" \
+		--use-default-dataset \
+		--model table \
+		$(TRAIN_ARGS)
+
+
+upload-table-model:
+	$(RUN) upload-model.sh "$(CLOUD_MODELS_PATH)" "table"
+
+
 copy-raw-reference-segmenter-training-data-to-tei:
 	$(RUN) bash -c '\
 		mkdir -p "$(DATASET_DIR)/reference-segmenter/corpus/tei" && \
diff --git a/scripts/copy-raw-training-data-to-file-structure.sh b/scripts/copy-raw-training-data-to-file-structure.sh
index ef8a49f86c98fbb3edfe99efdf29a185123dcbb9..c9fcad2eb1991ff2ada9aaa1d77f31bf0e4e8d69 100755
--- a/scripts/copy-raw-training-data-to-file-structure.sh
+++ b/scripts/copy-raw-training-data-to-file-structure.sh
@@ -20,138 +20,151 @@ fi
 echo "RAW_TRAINING_DATA_DIR=${RAW_TRAINING_DATA_DIR}"
 echo "DATASET_DIR=${DATASET_DIR}"
 
+has_matching_files() {
+    local dir="$1"
+    local pattern="$2"
+    if test -n "$(find "${dir}" -maxdepth 1 -type f -name "${pattern}" -print -quit)"; then
+        # echo "files exist: $dir $pattern"
+        return
+    fi
+    # echo "files do not exist: $dir $pattern"
+    false
+}
+
 mkdir_clean() {
     for dir in "$@"; do 
         echo "creating or cleaning directory: ${dir}"
-        mkdir -p "${dir}"
-        rm "${dir}"/* || true
+        if [ -d "${dir}" ]; then
+            if has_matching_files "${dir}" "*"; then
+                rm "${dir}"/* || true
+            fi
+        else
+            mkdir -p "${dir}"
+        fi
     done
 }
 
+copy_and_rename_tei_and_raw_training_files() {
+    local tei_dir="$1"
+    local tei_pattern="$2"
+    local raw_dir="$3"
+    local raw_pattern="$4"
+
+    if ! has_matching_files "$RAW_TRAINING_DATA_DIR" "${tei_pattern}"; then
+        echo "no ${tei_pattern} data"
+        return
+    fi
+
+    mkdir_clean "${tei_dir}" "${raw_dir}"
+
+    echo "copying files from $RAW_TRAINING_DATA_DIR (${raw_pattern}) to $raw_dir"
+    cp -a "$RAW_TRAINING_DATA_DIR/"${raw_pattern} "$raw_dir"
+    rename 's#\.training\.#\.#' "$raw_dir"/*
+
+    echo "copying files from $RAW_TRAINING_DATA_DIR (${tei_pattern}) to $tei_dir"
+    cp -a "$RAW_TRAINING_DATA_DIR/"${tei_pattern} "$tei_dir"
+    rename 's#\.training\.#\.#' "$tei_dir"/*
+}
+
 copy_segmentation_files() {
-    segmentation_raw_dir="$DATASET_DIR/segmentation/corpus/raw"
-    segmentation_tei_dir="$DATASET_DIR/segmentation/corpus/tei-raw"
-    mkdir_clean "$segmentation_raw_dir" "${segmentation_tei_dir}"
-
-    echo "copying files from $RAW_TRAINING_DATA_DIR to $segmentation_raw_dir"
-    cp -a "$RAW_TRAINING_DATA_DIR/"*.segmentation "$segmentation_raw_dir"
-    echo "renaming files $segmentation_raw_dir"
-    rename 's#\.training\.#\.#' "$segmentation_raw_dir"/*
-
-    echo "copying files from $RAW_TRAINING_DATA_DIR to $segmentation_tei_dir"
-    cp -a "$RAW_TRAINING_DATA_DIR/"*.segmentation.tei.xml "$segmentation_tei_dir"
-    rename 's#\.training\.#\.#' "$segmentation_tei_dir"/*
+    copy_and_rename_tei_and_raw_training_files \
+        "$DATASET_DIR/segmentation/corpus/tei-raw" \
+        "*.segmentation.tei.xml" \
+        "$DATASET_DIR/segmentation/corpus/raw" \
+        "*.segmentation"
 }
 
 copy_header_files() {
-    header_headers_dir="$DATASET_DIR/header/corpus/headers"
-    header_tei_dir="$DATASET_DIR/header/corpus/tei-raw"
-    mkdir_clean "$header_headers_dir" "${header_tei_dir}"
-
-    echo "copying files from $RAW_TRAINING_DATA_DIR to $header_headers_dir"
-    cp -a "$RAW_TRAINING_DATA_DIR/"*.header "$header_headers_dir"
-    echo "renaming files $header_headers_dir"
-    rename 's#\.training\.#\.#' "$header_headers_dir"/*
-
-    echo "copying files from $RAW_TRAINING_DATA_DIR to $header_tei_dir"
-    cp -a "$RAW_TRAINING_DATA_DIR/"*.header.tei.xml "$header_tei_dir"
-    rename 's#\.training\.#\.#' "$header_tei_dir"/*
+    copy_and_rename_tei_and_raw_training_files \
+        "$DATASET_DIR/header/corpus/tei-raw" \
+        "*.header.tei.xml" \
+        "$DATASET_DIR/header/corpus/headers" \
+        "*.header"
 }
 
 copy_fulltext_files() {
-    fulltext_raw_dir="$DATASET_DIR/fulltext/corpus/raw"
-    fulltext_tei_dir="$DATASET_DIR/fulltext/corpus/tei-raw"
-    mkdir_clean "$fulltext_raw_dir" "${fulltext_tei_dir}"
-
-    echo "copying files from $RAW_TRAINING_DATA_DIR to $fulltext_raw_dir"
-    cp -a "$RAW_TRAINING_DATA_DIR/"*.fulltext "$fulltext_raw_dir"
-    echo "renaming files $fulltext_raw_dir"
-    rename 's#\.training\.#\.#' "$fulltext_raw_dir"/*
-
-    echo "copying files from $RAW_TRAINING_DATA_DIR to $fulltext_tei_dir"
-    cp -a "$RAW_TRAINING_DATA_DIR/"*.fulltext.tei.xml "$fulltext_tei_dir"
-    rename 's#\.training\.#\.#' "$fulltext_tei_dir"/*
+    copy_and_rename_tei_and_raw_training_files \
+        "$DATASET_DIR/fulltext/corpus/tei-raw" \
+        "*.fulltext.tei.xml" \
+        "$DATASET_DIR/fulltext/corpus/raw" \
+        "*.fulltext"
 }
 
 copy_figure_files() {
-    figure_raw_dir="$DATASET_DIR/figure/corpus/raw"
-    figure_tei_dir="$DATASET_DIR/figure/corpus/tei-raw"
-    mkdir_clean "$figure_raw_dir" "${figure_tei_dir}"
-
-    echo "copying files from $RAW_TRAINING_DATA_DIR to $figure_raw_dir"
-    cp -a "$RAW_TRAINING_DATA_DIR/"*.figure "$figure_raw_dir"
-    echo "renaming files $figure_raw_dir"
-    rename 's#\.training\.#\.#' "$figure_raw_dir"/*
-
-    echo "copying files from $RAW_TRAINING_DATA_DIR to $figure_tei_dir"
-    cp -a "$RAW_TRAINING_DATA_DIR/"*.figure.tei.xml "$figure_tei_dir"
-    rename 's#\.training\.#\.#' "$figure_tei_dir"/*
+    copy_and_rename_tei_and_raw_training_files \
+        "$DATASET_DIR/figure/corpus/tei-raw" \
+        "*.figure.tei.xml" \
+        "$DATASET_DIR/figure/corpus/raw" \
+        "*.figure"
+}
+
+copy_table_files() {
+    copy_and_rename_tei_and_raw_training_files \
+        "$DATASET_DIR/table/corpus/tei-raw" \
+        "*.table.tei.xml" \
+        "$DATASET_DIR/table/corpus/raw" \
+        "*.table"
 }
 
 copy_reference_segmenter_files() {
-    reference_segmenter_raw_dir="$DATASET_DIR/reference-segmenter/corpus/raw"
-    reference_segmenter_tei_dir="$DATASET_DIR/reference-segmenter/corpus/tei-raw"
-    mkdir_clean "$reference_segmenter_raw_dir" "${reference_segmenter_tei_dir}"
-
-    echo "copying files from $RAW_TRAINING_DATA_DIR to $reference_segmenter_raw_dir"
-    cp -a "$RAW_TRAINING_DATA_DIR/"*.referenceSegmenter "$reference_segmenter_raw_dir"
-    echo "renaming files $reference_segmenter_raw_dir"
-    rename 's#\.training\.#\.#' "$reference_segmenter_raw_dir"/*
-
-    echo "copying files from $RAW_TRAINING_DATA_DIR to $reference_segmenter_tei_dir"
-    cp -a "$RAW_TRAINING_DATA_DIR/"*.referenceSegmenter.tei.xml "$reference_segmenter_tei_dir"
-    rename 's#\.training\.#\.#' "$reference_segmenter_tei_dir"/*
+    copy_and_rename_tei_and_raw_training_files \
+        "$DATASET_DIR/reference-segmenter/corpus/tei-raw" \
+        "*.referenceSegmenter.tei.xml" \
+        "$DATASET_DIR/reference-segmenter/corpus/raw" \
+        "*.referenceSegmenter"
 }
 
-copy_affiliation_address_files() {
-    affiliation_address_tei_dir="$DATASET_DIR/affiliation-address/corpus-raw"
-    mkdir_clean "${affiliation_address_tei_dir}"
+copy_and_rename_tei_only_training_files() {
+    local tei_dir="$1"
+    local pattern="$2"
+
+    if ! has_matching_files "$RAW_TRAINING_DATA_DIR" "${pattern}"; then
+        echo "no ${pattern} data"
+        return
+    fi
+
+    mkdir_clean "${tei_dir}"
 
-    echo "copying files from $RAW_TRAINING_DATA_DIR to $affiliation_address_tei_dir"
-    cp -a "$RAW_TRAINING_DATA_DIR/"*.references.tei.xml "$affiliation_address_tei_dir"
-    rename 's#\.training\.header\.#\.#' "$affiliation_address_tei_dir"/*
+    echo "copying files from $RAW_TRAINING_DATA_DIR (${pattern}) to $tei_dir"
+    cp -a "$RAW_TRAINING_DATA_DIR/"${pattern} "$tei_dir"
+    rename 's#\.training\.#\.#' "$tei_dir"/*
 }
 
-copy_citation_files() {
-    citation_tei_dir="$DATASET_DIR/citation/corpus-raw"
-    mkdir_clean "${citation_tei_dir}"
+copy_affiliation_address_files() {
+    copy_and_rename_tei_only_training_files \
+        "$DATASET_DIR/affiliation-address/corpus-raw" \
+        "*.affiliation.tei.xml"
+}
 
-    echo "copying files from $RAW_TRAINING_DATA_DIR to $citation_tei_dir"
-    cp -a "$RAW_TRAINING_DATA_DIR/"*.references.tei.xml "$citation_tei_dir"
-    rename 's#\.training\.#\.#' "$citation_tei_dir"/*
+copy_citation_files() {
+    copy_and_rename_tei_only_training_files \
+        "$DATASET_DIR/citation/corpus-raw" \
+        "*.references.tei.xml"
 }
 
 copy_name_citation_files() {
-    name_citation_tei_dir="$DATASET_DIR/name/citation/corpus-raw"
-    mkdir_clean "${name_citation_tei_dir}"
-
-    echo "copying files from $RAW_TRAINING_DATA_DIR to $name_citation_tei_dir"
-    cp -a "$RAW_TRAINING_DATA_DIR/"*.references.authors.tei.xml "$name_citation_tei_dir"
-    rename 's#\.training\.#\.#' "$name_citation_tei_dir"/*
+    copy_and_rename_tei_only_training_files \
+        "$DATASET_DIR/name/citation/corpus-raw" \
+        "*.references.authors.tei.xml"
 }
 
 copy_name_header_files() {
-    name_header_tei_dir="$DATASET_DIR/name/header/corpus-raw"
-    mkdir_clean "${name_header_tei_dir}"
-
-    echo "copying files from $RAW_TRAINING_DATA_DIR to $name_header_tei_dir"
-    cp -a "$RAW_TRAINING_DATA_DIR/"*.header.authors.tei.xml "$name_header_tei_dir"
-    rename 's#\.training\.#\.#' "$name_header_tei_dir"/*
+    copy_and_rename_tei_only_training_files \
+        "$DATASET_DIR/name/header/corpus-raw" \
+        "*.header.authors.tei.xml"
 }
 
 copy_date_files() {
-    date_tei_dir="$DATASET_DIR/date/corpus-raw"
-    mkdir_clean "${date_tei_dir}"
-
-    echo "copying files from $RAW_TRAINING_DATA_DIR to $date_tei_dir"
-    cp -a "$RAW_TRAINING_DATA_DIR/"*.header.date.xml "$date_tei_dir"
-    rename 's#\.training\.header\.#\.#' "$date_tei_dir"/*
+    copy_and_rename_tei_only_training_files \
+        "$DATASET_DIR/date/corpus-raw" \
+        "*.header.date.xml"
 }
 
 copy_segmentation_files
 copy_header_files
 copy_fulltext_files
 copy_figure_files
+copy_table_files
 copy_reference_segmenter_files
 copy_affiliation_address_files
 copy_citation_files
diff --git a/scripts/train-model.sh b/scripts/train-model.sh
index a4d8c5ff1ab6aa1b143ef3416d8c670ab5fba215..5080f3840d5583ef224e8957b46084c692e16249 100755
--- a/scripts/train-model.sh
+++ b/scripts/train-model.sh
@@ -80,6 +80,11 @@ elif [ "${MODEL_NAME}" == "figure" ]; then
         "figure/corpus/raw"
         "figure/corpus/tei"
     )
+elif [ "${MODEL_NAME}" == "table" ]; then
+    sub_dirs=(
+        "table/corpus/raw"
+        "table/corpus/tei"
+    )
 elif [ "${MODEL_NAME}" == "reference-segmenter" ]; then
     sub_dirs=(
         "reference-segmenter/corpus/raw"
diff --git a/scripts/upload-dataset.sh b/scripts/upload-dataset.sh
index 7fe9cbe1096bfd0d934535edd4991fc52bf4d1c6..2e8254c6bc8490513bc87c11210c2094450ae554 100755
--- a/scripts/upload-dataset.sh
+++ b/scripts/upload-dataset.sh
@@ -53,6 +53,10 @@ sub_dirs=(
     "figure/corpus/tei"
     "figure/corpus/tei-raw"
     "figure/corpus/tei-auto"
+    "table/corpus/raw"
+    "table/corpus/tei"
+    "table/corpus/tei-raw"
+    "table/corpus/tei-auto"
     "reference-segmenter/corpus/raw"
     "reference-segmenter/corpus/tei"
     "reference-segmenter/corpus/tei-raw"