From 40ff412896fc3b5077803c759beaf92f3e8970cb Mon Sep 17 00:00:00 2001 From: Daniel Ecer <de-code@users.noreply.github.com> Date: Mon, 10 Jun 2019 22:35:09 +0100 Subject: [PATCH] minor model training logging improvement (#116) * minor model training logging improvement * make port configurable * added autocut-start-cloud --- Makefile | 12 +++++++++++- .../models/text/crf/autocut_training_pipeline.py | 9 +++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index e8fbb2a..05ab7a7 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ DOCKER_COMPOSE = $(DOCKER_COMPOSE_DEV) PYTEST_ARGS = +PORT = 8080 .PHONY: all build @@ -56,7 +57,16 @@ autocut-start: .require-AUTOCUT_MODEL_PATH build $(DOCKER_COMPOSE) run --rm \ -v "$(AUTOCUT_MODEL_PATH):/tmp/model.pkl" \ -e "AUTOCUT_MODEL_PATH=/tmp/model.pkl" \ - -p 8080:8080 \ + -p $(PORT):8080 \ + sciencebeam-gym \ + start-autocut.sh + + +autocut-start-cloud: .require-AUTOCUT_MODEL_PATH build + $(DOCKER_COMPOSE) run --rm \ + -v $$HOME/.config/gcloud:/root/.config/gcloud \ + -e "AUTOCUT_MODEL_PATH=$(AUTOCUT_MODEL_PATH)" \ + -p $(PORT):8080 \ sciencebeam-gym \ start-autocut.sh diff --git a/sciencebeam_gym/models/text/crf/autocut_training_pipeline.py b/sciencebeam_gym/models/text/crf/autocut_training_pipeline.py index b75948b..caf902e 100644 --- a/sciencebeam_gym/models/text/crf/autocut_training_pipeline.py +++ b/sciencebeam_gym/models/text/crf/autocut_training_pipeline.py @@ -113,14 +113,15 @@ def run(opt): opt.input_file_list, opt.input_file_column, opt.input_xpath, opt.limit, opt.namespaces ) + LOGGER.info('loaded %s input values (e.g. %s)', len(input_values), input_values[:10]) target_values = _load_values( opt.target_file_list, opt.target_file_column, opt.target_xpath, opt.limit, opt.namespaces ) - save_model( - opt.output_path, - train_model(input_values, target_values) - ) + LOGGER.info('loaded %s target values (e.g. %s)', len(target_values), target_values[:10]) + serialized_model = train_model(input_values, target_values) + LOGGER.info('model size: {:,} bytes'.format(len(serialized_model))) + save_model(opt.output_path, serialized_model) def main(argv=None): -- GitLab