diff --git a/Makefile b/Makefile index b04e0edfdfaa4b1bdc71c078c7c76c8eb16b7b22..9e1c698f9105ec7dab4d7fcc26f15d425bc8ddc4 100644 --- a/Makefile +++ b/Makefile @@ -27,9 +27,10 @@ venv-create: dev-install: $(PIP) install -r requirements.build.txt - $(PIP) install -r requirements.prereq.txt - $(PIP) install -r requirements.txt - $(PIP) install -r requirements.dev.txt + $(PIP) install \ + -r requirements.prereq.txt \ + -r requirements.txt \ + -r requirements.dev.txt dev-nltk-download-models: diff --git a/requirements.dev.txt b/requirements.dev.txt index 42326685fcfb4c4e59086cbe9e97ebfefaabc261..bcf004cfd24935507af9e3fa076dbca6db4cd16c 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -1,6 +1,6 @@ # astroid and pylint raising false positives: # https://github.com/PyCQA/pylint/issues/3139 -pylint==2.8.3 +pylint==2.10.2 flake8==3.9.2 nose==1.3.7 pytest==6.2.4 diff --git a/requirements.txt b/requirements.txt index d8b44c749b243876ccbbc1205291c022250f3a4d..0e162729815801f193159cf2b4fa73cb97bfe507 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,13 +4,15 @@ gevent==21.8.0 gunicorn==20.1.0 fsspec==2021.5.0 gcsfs==2021.5.0 +google-cloud-bigquery<=2.24.0 futures fuzzywuzzy==0.18.0 lxml==4.6.3 matplotlib==2.2.5 -numpy==1.21.2 +numpy==1.18.5 nltk==3.6.2 Pillow>=3.4.1 +pyarrow>=0.17.1 python-crfsuite==0.9.7 Pyqtree==1.0.0 requests==2.26.0 diff --git a/sciencebeam_gym/preprocess/annotation/matching_annotator.py b/sciencebeam_gym/preprocess/annotation/matching_annotator.py index f2a33d62304811ea3c40e1a2f1f7118d370828fd..e355a4a099726ff0208ff540dbdac8cdc2f84566 100644 --- a/sciencebeam_gym/preprocess/annotation/matching_annotator.py +++ b/sciencebeam_gym/preprocess/annotation/matching_annotator.py @@ -676,7 +676,7 @@ class MatchingAnnotator(AbstractAnnotator): conditional_match: Optional[dict] = None - matched_choices_map = dict() + matched_choices_map = {} for target_annotation in self.target_annotations: get_logger().debug('target annotation: %s', target_annotation) target_value = normalise_and_remove_junk_str_or_list(target_annotation.value) diff --git a/sciencebeam_gym/preprocess/annotation/target_annotation.py b/sciencebeam_gym/preprocess/annotation/target_annotation.py index 892d4c3ed68f4d6b4831d546eadb0eb1ddef40ee..b91529c6f1e671394dd5081acf4cd5a4577de5b0 100644 --- a/sciencebeam_gym/preprocess/annotation/target_annotation.py +++ b/sciencebeam_gym/preprocess/annotation/target_annotation.py @@ -67,7 +67,7 @@ class TargetAnnotation(object): def parse_xml_mapping(xml_mapping_filename): - with open(xml_mapping_filename, 'r') as f: + with open(xml_mapping_filename, 'r', encoding='utf-8') as f: config = ConfigParser() config.read_file(f) return { diff --git a/sciencebeam_gym/preprocess/color_map.py b/sciencebeam_gym/preprocess/color_map.py index a4fee41b683529f824a43e9744f91200592ace92..b05613611ca28280ef6f470b9b4fba2276070369 100644 --- a/sciencebeam_gym/preprocess/color_map.py +++ b/sciencebeam_gym/preprocess/color_map.py @@ -19,7 +19,7 @@ def parse_color_map_from_configparser(color_map_config): return (int(m.group(1)), int(m.group(2)), int(m.group(3))) raise Exception('invalid color value: {}'.format(s)) - color_map = dict() + color_map = {} for k, v in color_map_config.items('color_map'): color_map[k] = parse_color(v) return color_map @@ -28,7 +28,7 @@ def parse_color_map_from_configparser(color_map_config): def parse_color_map_from_file(f): color_map_config = ConfigParser() if isinstance(f, str): - with open(f, 'r') as fp: + with open(f, 'r', encoding='utf-8') as fp: color_map_config.read_file(fp) else: color_map_config.read_file(f) diff --git a/sciencebeam_gym/preprocess/preprocessing_transforms.py b/sciencebeam_gym/preprocess/preprocessing_transforms.py index 1fea02fa971f67c0b715f5937cab028d1eae9fcd..b61d58edb53db507b5b392402bcd59e4af4cccf6 100644 --- a/sciencebeam_gym/preprocess/preprocessing_transforms.py +++ b/sciencebeam_gym/preprocess/preprocessing_transforms.py @@ -26,9 +26,9 @@ class WritePropsToTFRecord(beam.PTransform): raise RuntimeError('TensorFlow required for this transform') LOGGER.debug('tfrecords output file: %r', self.file_path + self.file_name_suffix) - def expand(self, pcoll): # pylint: disable=W0221 + def expand(self, input_or_inputs): # pylint: disable=W0221 return ( - pcoll | + input_or_inputs | 'ConvertToTfExamples' >> beam.FlatMap(lambda v: ( dict_to_example(props) for props in self.extract_props(v) diff --git a/sciencebeam_gym/tools/colorize_image.py b/sciencebeam_gym/tools/colorize_image.py index d13900dbcd3af2cd739ea605e7ad901720333893..0b72f53bee3beacd2e2faf0498761291bdcd971a 100644 --- a/sciencebeam_gym/tools/colorize_image.py +++ b/sciencebeam_gym/tools/colorize_image.py @@ -53,7 +53,7 @@ def parse_color_map_from_configparser(color_map_config): return (int(m.group(1)), int(m.group(2)), int(m.group(3))) raise Exception('invalid color value: {}'.format(s)) - color_map = dict() + color_map = {} for k, v in color_map_config.items('color_map'): color_map[parse_color(k)] = parse_color(v) return color_map diff --git a/sciencebeam_gym/tools/vocabulary/extract_embeddings_vocabulary.py b/sciencebeam_gym/tools/vocabulary/extract_embeddings_vocabulary.py index 5b57915af095c0cac77b7fe590498295b48829b1..310c988709a03c416e3f18965fd367d9deaa8f78 100644 --- a/sciencebeam_gym/tools/vocabulary/extract_embeddings_vocabulary.py +++ b/sciencebeam_gym/tools/vocabulary/extract_embeddings_vocabulary.py @@ -48,7 +48,7 @@ def run(args: argparse.Namespace): tokens_iterable = iter_tokens_from_embeddings_file( args.input_file ) - with open(args.output_vocabulary_file, 'wt') as out_fp: + with open(args.output_vocabulary_file, 'wt', encoding='utf-8') as out_fp: out_fp.writelines(( token + '\n' for token in tokens_iterable diff --git a/sciencebeam_gym/trainer/models/pix2pix/pix2pix_model.py b/sciencebeam_gym/trainer/models/pix2pix/pix2pix_model.py index 7c8eb82f8f45467ec5b9a3349159e5cfb1cfd66a..606ef29647dfca7025dc7a7bcba6b2c14ce348cc 100644 --- a/sciencebeam_gym/trainer/models/pix2pix/pix2pix_model.py +++ b/sciencebeam_gym/trainer/models/pix2pix/pix2pix_model.py @@ -72,7 +72,7 @@ class GraphReferences(object): def __init__(self): self.is_training = None - self.inputs = dict() + self.inputs = {} self.examples = None self.train = None self.global_step = None @@ -504,14 +504,14 @@ class Model(object): batched_tensors: dict = tf.train.batch( remove_none_from_dict({ k: getattr(tensors, k) - for k in { + for k in [ 'input_uri', 'annotation_uri', 'image_tensor', 'annotation_tensor', 'separate_channel_annotation_tensor', 'pos_weight' - } + ] }), batch_size=batch_size ) diff --git a/setup.py b/setup.py index 3db2bc01fe5d38725b97f165502de1e57655b822..5f60d81773471297434f0f120df4e7fa04204541 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ CUSTOM_COMMANDS = [ ] ] -with open(os.path.join('requirements.txt'), 'r') as f: +with open(os.path.join('requirements.txt'), 'r', encoding='utf-8') as f: REQUIRED_PACKAGES = f.readlines() packages = find_packages() diff --git a/tests/models/crf/crfsuite_training_pipeline_test.py b/tests/models/crf/crfsuite_training_pipeline_test.py index 70d0e9ca2dfd00256dc7f4e87debed4bda090163..6c0cabd484c348b40a6f50828298802243539d60 100644 --- a/tests/models/crf/crfsuite_training_pipeline_test.py +++ b/tests/models/crf/crfsuite_training_pipeline_test.py @@ -16,7 +16,7 @@ from sciencebeam_gym.models.text.feature_extractor import ( CV_TAG_SCOPE ) -import sciencebeam_gym.models.text.crf.crfsuite_training_pipeline as crfsuite_training_pipeline +from sciencebeam_gym.models.text.crf import crfsuite_training_pipeline from sciencebeam_gym.models.text.crf.crfsuite_training_pipeline import ( load_and_convert_to_token_props, load_token_props_list_by_document, diff --git a/tests/pdf/pdf_to_png_test.py b/tests/pdf/pdf_to_png_test.py index 0cfbd96c2c732ef34164b49d9c051e63cd06fa6a..ceca30e0c718c2ea9f4bb2708f140442fc7b2949 100644 --- a/tests/pdf/pdf_to_png_test.py +++ b/tests/pdf/pdf_to_png_test.py @@ -6,7 +6,7 @@ from sciencebeam_gym.pdf.pdf_to_png import ( PdfToPng ) -import sciencebeam_gym.pdf.pdf_to_png as pdf_to_png +from sciencebeam_gym.pdf import pdf_to_png TEMP_DIR = '/tmp/1' diff --git a/tests/preprocess/lxml_to_svg_test.py b/tests/preprocess/lxml_to_svg_test.py index 8ae3c0e4ebb58626f9a93481862c2ea206b3c575..f0ae89102db336984b55a946ee77de026a53a464 100644 --- a/tests/preprocess/lxml_to_svg_test.py +++ b/tests/preprocess/lxml_to_svg_test.py @@ -62,7 +62,7 @@ COMMON_LXML_TOKEN_ATTRIBS = { def dict_extend(*dicts): - d = dict() + d = {} for x in dicts: d.update(x) return d diff --git a/tests/structured_document/structured_document_loader_test.py b/tests/structured_document/structured_document_loader_test.py index 3c51f8ab636fe48d60b0ca7f23b774a77b271042..d65f7148eba36a3f4a16d2853cbcb1ce84dab873 100644 --- a/tests/structured_document/structured_document_loader_test.py +++ b/tests/structured_document/structured_document_loader_test.py @@ -7,7 +7,7 @@ from unittest.mock import patch from lxml import etree from lxml.builder import E -import sciencebeam_gym.structured_document.structured_document_loader as structured_document_loader +from sciencebeam_gym.structured_document import structured_document_loader from sciencebeam_gym.structured_document.structured_document_loader import ( StructuredDocumentType, diff --git a/tests/structured_document/structured_document_saver_test.py b/tests/structured_document/structured_document_saver_test.py index 5291ad64ffee1422895046393a7861027b1305d2..a919cff1b59f9535ef9ec0366632c627c2f4317f 100644 --- a/tests/structured_document/structured_document_saver_test.py +++ b/tests/structured_document/structured_document_saver_test.py @@ -12,7 +12,7 @@ from sciencebeam_gym.structured_document.svg import ( SvgStructuredDocument ) -import sciencebeam_gym.structured_document.structured_document_saver as structured_document_saver +from sciencebeam_gym.structured_document import structured_document_saver from sciencebeam_gym.structured_document.structured_document_saver import ( save_lxml_structured_document, save_svg_structured_document, diff --git a/tests/trainer/evaluator_test.py b/tests/trainer/evaluator_test.py index aa63d982d2301dc15fdaf6eb0337de76d433616e..7106ee43c770ad7f862e85d41519a32feb78fbc2 100644 --- a/tests/trainer/evaluator_test.py +++ b/tests/trainer/evaluator_test.py @@ -75,7 +75,7 @@ class ExampleModel(object): self.examples = examples def build_graph(self, data_paths, batch_size, graph_mode): # pylint: disable=unused-argument - tensors = dict() + tensors = {} tensors['is_training'] = tf.placeholder(tf.bool) map_keys_tracker = MapKeysTracker() dataset = example_dataset(map_keys_tracker, self.examples) @@ -86,8 +86,8 @@ class ExampleModel(object): tensors['metric_values'] = [] tensors['metric_updates'] = [] tensors['global_step'] = tf.constant(100, tf.int32) - tensors['summaries'] = dict() - tensors['image_tensors'] = dict() + tensors['summaries'] = {} + tensors['image_tensors'] = {} tensors['evaluation_result'] = None image_shape = (10, 10, 3) pre_batch_tensors = { diff --git a/tests/trainer/models/pix2pix/pix2pix_core_test.py b/tests/trainer/models/pix2pix/pix2pix_core_test.py index 5afd1099ae5acfebd34640db75c2466e6067fed7..caa527da2f3010cc555872bbfcfac3272806b9fc 100644 --- a/tests/trainer/models/pix2pix/pix2pix_core_test.py +++ b/tests/trainer/models/pix2pix/pix2pix_core_test.py @@ -15,7 +15,7 @@ from sciencebeam_utils.utils.collection import ( extend_dict ) -import sciencebeam_gym.trainer.models.pix2pix.pix2pix_core as pix2pix_core +from sciencebeam_gym.trainer.models.pix2pix import pix2pix_core from sciencebeam_gym.trainer.models.pix2pix.pix2pix_core import ( create_encoder_decoder, diff --git a/tests/trainer/models/pix2pix/pix2pix_model_test.py b/tests/trainer/models/pix2pix/pix2pix_model_test.py index d34d4860d76bcb7454d338595d95c48c39e16dec..59bc5044a95638d11149b22c4fac321ea83bb34c 100644 --- a/tests/trainer/models/pix2pix/pix2pix_model_test.py +++ b/tests/trainer/models/pix2pix/pix2pix_model_test.py @@ -15,7 +15,7 @@ from sciencebeam_gym.trainer.models.pix2pix.pix2pix_core import ( ALL_BASE_LOSS ) -import sciencebeam_gym.trainer.models.pix2pix.pix2pix_model as pix2pix_model +from sciencebeam_gym.trainer.models.pix2pix import pix2pix_model from sciencebeam_gym.trainer.models.pix2pix.pix2pix_model import ( parse_color_map,