diff --git a/scripts/download-pdf-file-list-files.sh b/scripts/download-pdf-file-list-files.sh
new file mode 100755
index 0000000000000000000000000000000000000000..31d6982c9e637e74581ed6a29133b10b71ebe3a3
--- /dev/null
+++ b/scripts/download-pdf-file-list-files.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+set -e
+
+CLOUD_FILE_LIST_PATH=${1:-$CLOUD_FILE_LIST_PATH}
+PDF_DIR=${2:-$PDF_DIR}
+
+if [ -z "${CLOUD_FILE_LIST_PATH}" ]; then
+    echo "Error: CLOUD_FILE_LIST_PATH required"
+    exit 1
+fi
+
+if [ -z "${PDF_DIR}" ]; then
+    echo "Error: PDF_DIR required"
+    exit 1
+fi
+
+echo "downloading dataset pdf from ${CLOUD_FILE_LIST_PATH} to ${PDF_DIR}"
+
+mkdir -p "${PDF_DIR}"
+gsutil cat "${CLOUD_FILE_LIST_PATH}" | gsutil -m cp -I "${PDF_DIR}/"
+gunzip -f "${PDF_DIR}/"*.gz || true
+
+ls -l "${PDF_DIR}/"