diff --git a/scripts/copy-raw-training-data-to-file-structure.sh b/scripts/copy-raw-training-data-to-file-structure.sh
index 7c2cf088cb3824686855a1a9608a86c2a4d1f5e2..73f110023f5a4dd7fdf2bfc0065d7862e268cada 100755
--- a/scripts/copy-raw-training-data-to-file-structure.sh
+++ b/scripts/copy-raw-training-data-to-file-structure.sh
@@ -20,22 +20,45 @@ fi
 echo "RAW_TRAINING_DATA_DIR=${RAW_TRAINING_DATA_DIR}"
 echo "DATASET_DIR=${DATASET_DIR}"
 
-header_headers_dir="$DATASET_DIR/header/corpus/headers"
-header_tei_dir="$DATASET_DIR/header/corpus/tei-raw"
+mkdir_clean() {
+    for dir in "$@"; do 
+        echo "creating or cleaning directory: ${dir}"
+        mkdir -p "${dir}"
+        rm "${dir}"/* || true
+    done
+}
 
-mkdir -p "$header_headers_dir"
-mkdir -p "$header_tei_dir"
+copy_segmentation_files() {
+    segmentation_raw_dir="$DATASET_DIR/segmentation/corpus/raw"
+    segmentation_tei_dir="$DATASET_DIR/segmentation/corpus/tei-raw"
+    mkdir_clean "$segmentation_raw_dir" "${segmentation_tei_dir}"
 
-rm "${header_headers_dir}"/* || true
-rm "${header_tei_dir}"/* || true
+    echo "copying files from $RAW_TRAINING_DATA_DIR to $segmentation_raw_dir"
+    cp -a "$RAW_TRAINING_DATA_DIR/"*.segmentation "$segmentation_raw_dir"
+    echo "renaming files $segmentation_raw_dir"
+    rename 's#\.training\.#\.#' "$segmentation_raw_dir"/*
 
-echo "copying files from $RAW_TRAINING_DATA_DIR to $header_headers_dir"
-cp -a "$RAW_TRAINING_DATA_DIR/"*.header "$header_headers_dir"
-echo "renaming files $header_headers_dir"
-rename 's#\.training\.#\.#' "$header_headers_dir"/*
+    echo "copying files from $RAW_TRAINING_DATA_DIR to $segmentation_tei_dir"
+    cp -a "$RAW_TRAINING_DATA_DIR/"*.segmentation.tei.xml "$segmentation_tei_dir"
+    rename 's#\.training\.#\.#' "$segmentation_tei_dir"/*
+}
 
-echo "copying files from $RAW_TRAINING_DATA_DIR to $header_tei_dir"
-cp -a "$RAW_TRAINING_DATA_DIR/"*.header.tei.xml "$header_tei_dir"
-rename 's#\.training\.#\.#' "$header_tei_dir"/*
+copy_header_files() {
+    header_headers_dir="$DATASET_DIR/header/corpus/headers"
+    header_tei_dir="$DATASET_DIR/header/corpus/tei-raw"
+    mkdir_clean "$header_headers_dir" "${header_tei_dir}"
+
+    echo "copying files from $RAW_TRAINING_DATA_DIR to $header_headers_dir"
+    cp -a "$RAW_TRAINING_DATA_DIR/"*.header "$header_headers_dir"
+    echo "renaming files $header_headers_dir"
+    rename 's#\.training\.#\.#' "$header_headers_dir"/*
+
+    echo "copying files from $RAW_TRAINING_DATA_DIR to $header_tei_dir"
+    cp -a "$RAW_TRAINING_DATA_DIR/"*.header.tei.xml "$header_tei_dir"
+    rename 's#\.training\.#\.#' "$header_tei_dir"/*
+}
+
+copy_segmentation_files
+copy_header_files
 
 ls -l --recursive "${DATASET_DIR}"
diff --git a/scripts/upload-dataset.sh b/scripts/upload-dataset.sh
index 268881e72ff8aa458a76c2a4aff83f44c53acc32..860b3074cc63bf431eda27fe4c1e12fd09aa3534 100755
--- a/scripts/upload-dataset.sh
+++ b/scripts/upload-dataset.sh
@@ -37,6 +37,10 @@ echo "CLOUD_DATATSET_PATH=${CLOUD_DATATSET_PATH}"
 
 
 sub_dirs=(
+    "segmentation/corpus/raw"
+    "segmentation/corpus/tei"
+    "segmentation/corpus/tei-raw"
+    "segmentation/corpus/tei-auto"
     "header/corpus/headers"
     "header/corpus/tei"
     "header/corpus/tei-raw"