From b1dc8d3ab5d55d02d2bcd6fa0e24fc0d9773de20 Mon Sep 17 00:00:00 2001 From: Kevin Kozlowski Date: Wed, 3 Mar 2021 06:18:02 -0700 Subject: [PATCH 1/5] Prevent users from creating columns with python keyword names --- src/omero_metadata/populate.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/omero_metadata/populate.py b/src/omero_metadata/populate.py index a7a0c0c5..beeda358 100644 --- a/src/omero_metadata/populate.py +++ b/src/omero_metadata/populate.py @@ -36,6 +36,7 @@ import csv import re import json +import keyword from getpass import getpass from getopt import getopt, GetoptError @@ -225,6 +226,12 @@ def create_columns(self): def columns_sanity_check(self, columns): column_types = [column.__class__ for column in columns] column_names = [column.name for column in columns] + lower_case_kws = [kw.lower() for kw in keyword.kwlist] + for col_name in column_names: + if col_name.lower() in lower_case_kws: + raise MetadataError( + ('Cannot use column name "' + col_name + + '" because it is a reserved python keyword')) if WellColumn in column_types and ImageColumn in column_types: log.debug(column_types) raise MetadataError( From dc4c0cb71a793ae542ddc1d60fa49eae1df5e441 Mon Sep 17 00:00:00 2001 From: Kevin Kozlowski Date: Wed, 3 Mar 2021 07:24:23 -0700 Subject: [PATCH 2/5] flake8 --- src/omero_metadata/populate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/omero_metadata/populate.py b/src/omero_metadata/populate.py index beeda358..aa8f553c 100644 --- a/src/omero_metadata/populate.py +++ b/src/omero_metadata/populate.py @@ -230,8 +230,8 @@ def columns_sanity_check(self, columns): for col_name in column_names: if col_name.lower() in lower_case_kws: raise MetadataError( - ('Cannot use column name "' + col_name + - '" because it is a reserved python keyword')) + ('Cannot use column name "' + col_name + + '" because it is a reserved python keyword')) if WellColumn in column_types and ImageColumn in column_types: log.debug(column_types) raise MetadataError( From 9f85124d45cd9e7276b79890e3d1c3eff590e605 Mon Sep 17 00:00:00 2001 From: Kevin Kozlowski Date: Wed, 3 Mar 2021 09:28:28 -0700 Subject: [PATCH 3/5] Add warnings for column names with spaces --- src/omero_metadata/populate.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/omero_metadata/populate.py b/src/omero_metadata/populate.py index aa8f553c..7a8eface 100644 --- a/src/omero_metadata/populate.py +++ b/src/omero_metadata/populate.py @@ -226,12 +226,18 @@ def create_columns(self): def columns_sanity_check(self, columns): column_types = [column.__class__ for column in columns] column_names = [column.name for column in columns] + # Check for column names which are python keywords or contain spaces lower_case_kws = [kw.lower() for kw in keyword.kwlist] + omero_reserved_col_names = ['Image Name', + 'Dataset Name'] for col_name in column_names: if col_name.lower() in lower_case_kws: raise MetadataError( ('Cannot use column name "' + col_name + '" because it is a reserved python keyword')) + if ' ' in col_name and col_name not in omero_reserved_col_names: + log.warn('Column name "' + col_name + + '" contains a space and cannot be used for querying') if WellColumn in column_types and ImageColumn in column_types: log.debug(column_types) raise MetadataError( From e56859c7ed175a9845f00c2884730e402bbf4d4f Mon Sep 17 00:00:00 2001 From: Kevin Kozlowski Date: Thu, 4 Mar 2021 07:34:52 -0700 Subject: [PATCH 4/5] Add option to allow python kw column names --- src/omero_metadata/cli.py | 5 ++++- src/omero_metadata/populate.py | 20 ++++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/omero_metadata/cli.py b/src/omero_metadata/cli.py index bc788371..c3a2f277 100755 --- a/src/omero_metadata/cli.py +++ b/src/omero_metadata/cli.py @@ -242,6 +242,9 @@ def _configure(self, parser): populate.add_argument("--allow_nan", action="store_true", help=( "Allow empty values to become Nan in Long or Double columns")) + populate.add_argument("--force_cols", action="store_true", help=( + "Force creation of column names which are forbidden by default")) + populateroi.add_argument( "--measurement", type=int, default=None, help="Index of the measurement to populate. By default, all") @@ -533,7 +536,7 @@ def populate(self, args): cfg=args.cfg, cfgid=cfgid, attach=args.attach, options=localcfg, batch_size=args.batch, loops=loops, ms=ms, dry_run=args.dry_run, - allow_nan=args.allow_nan) + allow_nan=args.allow_nan, force_cols=args.force_cols) ctx.parse() def rois(self, args): diff --git a/src/omero_metadata/populate.py b/src/omero_metadata/populate.py index 7a8eface..8701562a 100644 --- a/src/omero_metadata/populate.py +++ b/src/omero_metadata/populate.py @@ -179,11 +179,12 @@ class HeaderResolver(object): 'plate': PlateColumn, }, **plate_keys) - def __init__(self, target_object, headers, column_types=None): + def __init__(self, target_object, headers, column_types=None, force_cols=False): self.target_object = target_object self.headers = headers self.headers_as_lower = [v.lower() for v in self.headers] self.types = column_types + self.force_cols = force_cols @staticmethod def is_row_column_types(row): @@ -227,14 +228,16 @@ def columns_sanity_check(self, columns): column_types = [column.__class__ for column in columns] column_names = [column.name for column in columns] # Check for column names which are python keywords or contain spaces - lower_case_kws = [kw.lower() for kw in keyword.kwlist] + if not self.force_cols: + lower_case_kws = [kw.lower() for kw in keyword.kwlist] + for col_name in column_names: + if col_name.lower() in lower_case_kws: + raise MetadataError( + ('Cannot use column name "' + col_name + + '" because it is a reserved python keyword')) omero_reserved_col_names = ['Image Name', 'Dataset Name'] for col_name in column_names: - if col_name.lower() in lower_case_kws: - raise MetadataError( - ('Cannot use column name "' + col_name + - '" because it is a reserved python keyword')) if ' ' in col_name and col_name not in omero_reserved_col_names: log.warn('Column name "' + col_name + '" contains a space and cannot be used for querying') @@ -995,7 +998,7 @@ class ParsingContext(object): def __init__(self, client, target_object, file=None, fileid=None, cfg=None, cfgid=None, attach=False, column_types=None, options=None, batch_size=1000, loops=10, ms=500, - dry_run=False, allow_nan=False): + dry_run=False, allow_nan=False, force_cols=False): ''' This lines should be handled outside of the constructor: @@ -1019,6 +1022,7 @@ def __init__(self, client, target_object, file=None, fileid=None, target_object, self.value_resolver) self.dry_run = dry_run + self.force_cols = force_cols def create_annotation_link(self): self.target_class = self.target_object.__class__ @@ -1063,7 +1067,7 @@ def preprocess_from_handle(self, data): log.debug('Column types: %r' % self.column_types) self.header_resolver = HeaderResolver( self.target_object, header_row, - column_types=self.column_types) + column_types=self.column_types, force_cols=self.force_cols) self.columns = self.header_resolver.create_columns() log.debug('Columns: %r' % self.columns) if len(self.columns) > MAX_COLUMN_COUNT: From 14f0448a90ccd781126a5c9dc267f4d0a6939102 Mon Sep 17 00:00:00 2001 From: Kevin Kozlowski Date: Thu, 4 Mar 2021 09:32:16 -0700 Subject: [PATCH 5/5] flake8 --- src/omero_metadata/cli.py | 3 ++- src/omero_metadata/populate.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/omero_metadata/cli.py b/src/omero_metadata/cli.py index c3a2f277..db30ce41 100755 --- a/src/omero_metadata/cli.py +++ b/src/omero_metadata/cli.py @@ -536,7 +536,8 @@ def populate(self, args): cfg=args.cfg, cfgid=cfgid, attach=args.attach, options=localcfg, batch_size=args.batch, loops=loops, ms=ms, dry_run=args.dry_run, - allow_nan=args.allow_nan, force_cols=args.force_cols) + allow_nan=args.allow_nan, + force_cols=args.force_cols) ctx.parse() def rois(self, args): diff --git a/src/omero_metadata/populate.py b/src/omero_metadata/populate.py index 8701562a..97c4c15d 100644 --- a/src/omero_metadata/populate.py +++ b/src/omero_metadata/populate.py @@ -179,7 +179,8 @@ class HeaderResolver(object): 'plate': PlateColumn, }, **plate_keys) - def __init__(self, target_object, headers, column_types=None, force_cols=False): + def __init__(self, target_object, headers, column_types=None, + force_cols=False): self.target_object = target_object self.headers = headers self.headers_as_lower = [v.lower() for v in self.headers]