Skip to content
Snippets Groups Projects
Unverified Commit 1dae7095 authored by Daniel Ecer's avatar Daniel Ecer Committed by GitHub
Browse files

make source file list relative to source base path if it is not an absolute path (#32)

parent 67c0e308
No related branches found
No related tags found
No related merge requests found
......@@ -8,7 +8,8 @@ from sciencebeam_gym.utils.file_list import (
from sciencebeam_gym.preprocess.preprocessing_utils import (
get_or_validate_base_path,
get_output_file
get_output_file,
join_if_relative_path
)
from sciencebeam_gym.preprocess.check_file_list import (
......@@ -85,7 +86,10 @@ def get_output_file_list(file_list, source_base_path, output_base_path, output_f
def run(opt):
source_file_list = load_file_list(
opt.source_file_list,
join_if_relative_path(
opt.source_base_path,
opt.source_file_list
),
column=opt.source_file_column,
limit=opt.limit
)
......
import os
from mock import patch, ANY
import pytest
......@@ -21,6 +22,28 @@ BASE_SOURCE_PATH = '/source'
FILE_1 = BASE_SOURCE_PATH + '/file1'
FILE_2 = BASE_SOURCE_PATH + '/file2'
@pytest.fixture(name='load_file_list_mock')
def _load_file_list():
with patch.object(get_output_files, 'load_file_list') as m:
m.return_value = [FILE_1, FILE_2]
yield m
@pytest.fixture(name='get_output_file_list_mock')
def _get_output_file_list():
with patch.object(get_output_files, 'get_output_file_list') as m:
yield m
@pytest.fixture(name='save_file_list_mock')
def _save_file_list():
with patch.object(get_output_files, 'save_file_list') as m:
yield m
@pytest.fixture(name='check_files_and_report_result_mock')
def _check_files_and_report_result():
with patch.object(get_output_files, 'check_files_and_report_result') as m:
yield m
class TestGetOutputFileList(object):
def test_should_return_output_file_with_path_and_change_ext(self):
assert get_output_file_list(
......@@ -30,86 +53,96 @@ class TestGetOutputFileList(object):
'.xml'
) == ['/output/path/file.xml']
@pytest.mark.usefixtures("load_file_list_mock", "get_output_file_list_mock", "save_file_list_mock")
class TestRun(object):
def test_should_pass_around_parameters(self):
m = get_output_files
def test_should_pass_around_parameters(
self,
load_file_list_mock,
get_output_file_list_mock,
save_file_list_mock):
load_file_list_mock.return_value = [FILE_1, FILE_2]
opt = parse_args(SOME_ARGV)
run(opt)
load_file_list_mock.assert_called_with(
opt.source_file_list,
column=opt.source_file_column,
limit=opt.limit
)
get_output_file_list_mock.assert_called_with(
load_file_list_mock.return_value,
BASE_SOURCE_PATH,
opt.output_base_path,
opt.output_file_suffix
)
save_file_list_mock.assert_called_with(
opt.output_file_list,
get_output_file_list_mock.return_value,
column=opt.source_file_column
)
def test_should_make_file_list_absolute_if_it_is_relative(
self,
load_file_list_mock):
opt = parse_args(SOME_ARGV)
with patch.object(m, 'load_file_list') as load_file_list:
with patch.object(m, 'get_output_file_list') as get_output_file_list_mock:
with patch.object(m, 'save_file_list') as save_file_list:
load_file_list.return_value = [FILE_1, FILE_2]
run(opt)
load_file_list.assert_called_with(
opt.source_file_list,
column=opt.source_file_column,
limit=opt.limit
)
get_output_file_list_mock.assert_called_with(
load_file_list.return_value,
BASE_SOURCE_PATH,
opt.output_base_path,
opt.output_file_suffix
)
save_file_list.assert_called_with(
opt.output_file_list,
get_output_file_list_mock.return_value,
column=opt.source_file_column
)
opt.source_base_path = BASE_SOURCE_PATH
opt.source_file_list = 'source.tsv'
run(opt)
load_file_list_mock.assert_called_with(
os.path.join(opt.source_base_path, opt.source_file_list),
column=opt.source_file_column,
limit=opt.limit
)
def test_should_raise_error_if_source_path_is_invalid(self):
m = get_output_files
opt = parse_args(SOME_ARGV)
opt.source_base_path = '/other/path'
with patch.object(m, 'load_file_list') as load_file_list:
with patch.object(m, 'get_output_file_list'):
with patch.object(m, 'save_file_list'):
with pytest.raises(AssertionError):
load_file_list.return_value = [FILE_1, FILE_2]
run(opt)
def test_should_use_passed_in_source_path_if_valid(self):
m = get_output_files
with pytest.raises(AssertionError):
run(opt)
def test_should_use_passed_in_source_path_if_valid(
self,
get_output_file_list_mock,
load_file_list_mock):
opt = parse_args(SOME_ARGV)
opt.source_base_path = '/base'
with patch.object(m, 'load_file_list') as load_file_list:
with patch.object(m, 'get_output_file_list') as get_output_file_list_mock:
with patch.object(m, 'save_file_list'):
load_file_list.return_value = ['/base/source/file1', '/base/source/file2']
run(opt)
get_output_file_list_mock.assert_called_with(
ANY,
opt.source_base_path,
ANY,
ANY
)
def test_should_check_file_list_if_enabled(self):
m = get_output_files
load_file_list_mock.return_value = ['/base/source/file1', '/base/source/file2']
run(opt)
get_output_file_list_mock.assert_called_with(
ANY,
opt.source_base_path,
ANY,
ANY
)
def test_should_check_file_list_if_enabled(
self,
get_output_file_list_mock,
check_files_and_report_result_mock):
opt = parse_args(SOME_ARGV)
opt.check = True
with patch.object(m, 'load_file_list') as load_file_list:
with patch.object(m, 'get_output_file_list') as get_output_file_list_mock:
with patch.object(m, 'save_file_list'):
with patch.object(m, 'check_files_and_report_result') as check_files_and_report_result:
load_file_list.return_value = [FILE_1, FILE_2]
run(opt)
check_files_and_report_result.assert_called_with(
get_output_file_list_mock.return_value
)
def test_should_limit_files_to_check(self):
m = get_output_files
run(opt)
check_files_and_report_result_mock.assert_called_with(
get_output_file_list_mock.return_value
)
def test_should_limit_files_to_check(
self,
load_file_list_mock,
get_output_file_list_mock,
check_files_and_report_result_mock):
opt = parse_args(SOME_ARGV)
opt.check = True
opt.check_limit = 1
with patch.object(m, 'load_file_list') as load_file_list:
with patch.object(m, 'get_output_file_list') as get_output_file_list_mock:
with patch.object(m, 'save_file_list'):
with patch.object(m, 'check_files_and_report_result') as check_files_and_report_result:
load_file_list.return_value = [FILE_1, FILE_2]
run(opt)
check_files_and_report_result.assert_called_with(
get_output_file_list_mock.return_value[:opt.check_limit]
)
load_file_list_mock.return_value = [FILE_1, FILE_2]
run(opt)
check_files_and_report_result_mock.assert_called_with(
get_output_file_list_mock.return_value[:opt.check_limit]
)
class TestMain(object):
def test_should_parse_args_and_call_run(self):
......
......@@ -222,7 +222,11 @@ def is_relative_path(path):
return not path.startswith('/') and '://' not in path
def join_if_relative_path(base_path, path):
return FileSystems.join(base_path, path) if is_relative_path(path) else path
return (
FileSystems.join(base_path, path)
if base_path and is_relative_path(path)
else path
)
def change_ext(path, old_ext, new_ext):
if old_ext is None:
......
......@@ -12,11 +12,12 @@ from sciencebeam_gym.preprocess.preprocessing_utils import (
svg_page_to_blockified_png_bytes,
group_file_pairs_by_parent_directory_or_name,
convert_pdf_bytes_to_lxml,
join_if_relative_path,
change_ext,
base_path_for_file_list,
get_or_validate_base_path,
get_output_file,
parse_page_range
parse_page_range,
)
PROCESSING_UTILS = 'sciencebeam_gym.preprocess.preprocessing_utils'
......@@ -115,6 +116,16 @@ class TestConvertPdfBytesToLxml(object):
)
assert lxml_content == LXML_CONTENT_1
class TestJoinIfRelativePath(object):
def test_should_return_path_if_base_path_is_none(self):
assert join_if_relative_path(None, 'file') == 'file'
def test_should_return_path_if_not_relative(self):
assert join_if_relative_path('/parent', '/other/file') == '/other/file'
def test_should_return_joined_path_if_relative(self):
assert join_if_relative_path('/parent', 'file') == '/parent/file'
class TestChangeExt(object):
def test_should_replace_simple_ext_with_simple_ext(self):
assert change_ext('file.pdf', None, '.xml') == 'file.xml'
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment