From 7fb173bdd3c8980bb026c03de263a0474d0f006c Mon Sep 17 00:00:00 2001 From: linglp Date: Mon, 1 Apr 2024 17:50:36 -0400 Subject: [PATCH 01/51] add file-annotations-upload parameter --- schematic/models/metadata.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/schematic/models/metadata.py b/schematic/models/metadata.py index 1fc35e376..d68a13ba1 100644 --- a/schematic/models/metadata.py +++ b/schematic/models/metadata.py @@ -323,6 +323,7 @@ def submit_metadata_manifest( restrict_rules: bool, access_token: Optional[str] = None, validate_component: Optional[str] = None, + file_annotations_upload: Optional[bool] = True, hide_blanks: bool = False, project_scope: List = None, table_manipulation: str = "replace", @@ -388,6 +389,7 @@ def submit_metadata_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload = file_annotations_upload ) restrict_maniest = True @@ -401,6 +403,7 @@ def submit_metadata_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload = file_annotations_upload ) logger.info(f"No validation errors occured during validation.") @@ -423,6 +426,7 @@ def submit_metadata_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload ) restrict_maniest = True @@ -436,6 +440,7 @@ def submit_metadata_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload ) logger.debug( From 769598166c853b14196ecad47dcb7d68787903e7 Mon Sep 17 00:00:00 2001 From: linglp Date: Mon, 1 Apr 2024 17:55:17 -0400 Subject: [PATCH 02/51] add comment --- schematic/models/metadata.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/schematic/models/metadata.py b/schematic/models/metadata.py index d68a13ba1..de1cd942d 100644 --- a/schematic/models/metadata.py +++ b/schematic/models/metadata.py @@ -336,6 +336,7 @@ def submit_metadata_manifest( manifest_path: Path to the manifest file, which contains the metadata. dataset_id: Synapse ID of the dataset on Synapse containing the metadata manifest file. validate_component: Component from the schema.org schema based on which the manifest template has been generated. + file_annotations_upload (bool): Default to True. If false, do not add annotations to files. Returns: Manifest ID: If both validation and association were successful. Exceptions: @@ -389,7 +390,7 @@ def submit_metadata_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, - file_annotations_upload = file_annotations_upload + file_annotations_upload=file_annotations_upload ) restrict_maniest = True @@ -403,7 +404,7 @@ def submit_metadata_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, - file_annotations_upload = file_annotations_upload + file_annotations_upload=file_annotations_upload ) logger.info(f"No validation errors occured during validation.") From 1c11460a57b236834ccaa71a09bfd4aea12b6044 Mon Sep 17 00:00:00 2001 From: linglp Date: Mon, 1 Apr 2024 17:57:45 -0400 Subject: [PATCH 03/51] add optional parameter on API side --- schematic_api/api/openapi/api.yaml | 7 +++++++ schematic_api/api/routes.py | 3 +++ 2 files changed, 10 insertions(+) diff --git a/schematic_api/api/openapi/api.yaml b/schematic_api/api/openapi/api.yaml index 50c0e3dc0..15a3540d1 100644 --- a/schematic_api/api/openapi/api.yaml +++ b/schematic_api/api/openapi/api.yaml @@ -442,6 +442,13 @@ paths: enum: ["display_label", "class_label"] default: "class_label" required: false + - in: query + name: file_annotations_upload + schema: + type: boolean + default: true + description: if false, do not add annotations when submitting file-based manifests. + required: false - in: query name: project_scope schema: diff --git a/schematic_api/api/routes.py b/schematic_api/api/routes.py index c20a94a88..db120f851 100644 --- a/schematic_api/api/routes.py +++ b/schematic_api/api/routes.py @@ -19,6 +19,7 @@ import pandas as pd import json +from typing import Optional from schematic.configuration.configuration import CONFIG from schematic.visualization.attributes_explorer import AttributesExplorer @@ -392,6 +393,7 @@ def submit_manifest_route( project_scope=None, table_column_names=None, annotation_keys=None, + file_annotations_upload:Optional[bool]=True, ): # call config_handler() config_handler(asset_view=asset_view) @@ -449,6 +451,7 @@ def submit_manifest_route( project_scope=project_scope, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload ) return manifest_id From a74bb28a4fe0ba4e41cf7a3b0f6134372b986db4 Mon Sep 17 00:00:00 2001 From: linglp Date: Mon, 1 Apr 2024 18:07:06 -0400 Subject: [PATCH 04/51] add file annotations upload param in add_annotations_to_entities_files function --- schematic/store/synapse.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 1239ae52a..0e53b6c49 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1637,6 +1637,7 @@ def add_annotations_to_entities_files( hideBlanks: bool, manifest_synapse_table_id="", annotation_keys: str = "class_label", + file_annotations_upload: Optional[bool] = True, ): """Depending on upload type add Ids to entityId row. Add anotations to connected files. Args: @@ -1649,13 +1650,17 @@ def add_annotations_to_entities_files( annotation_keys: (str) display_label/class_label(default), Determines labeling syle for annotation keys. class_label will format the display name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain display label formatting while ensuring the label is formatted properly for Synapse annotations. + file_annotations_upload (bool): Default to True. If false, do not add annotations to files. Returns: - manifest (pd.DataFrame): modified to add entitiyId as appropriate. + manifest (pd.DataFrame): modified to add entitiyId as appropriate """ # Expected behavior is to annotate files if `Filename` is present regardless of `-mrt` setting - if "filename" in [col.lower() for col in manifest.columns]: + if ( + "filename" in [col.lower() for col in manifest.columns] + and file_annotations_upload + ): # get current list of files and store as dataframe dataset_files = self.getFilesInStorageDataset(datasetId) files_and_entityIds = self._get_file_entityIds( @@ -1706,6 +1711,7 @@ def upload_manifest_as_table( table_manipulation: str, table_column_names: str, annotation_keys: str, + file_annotations_upload: Optional[bool], ): """Upload manifest to Synapse as a table and csv. Args: @@ -1725,6 +1731,7 @@ def upload_manifest_as_table( annotation_keys: (str) display_label/class_label (default), Sets labeling syle for annotation keys. class_label will format the display name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain display label formatting while ensuring the label is formatted properly for Synapse annotations. + file_annotations_upload (bool): Default to True. If false, do not add annotations to files. Return: manifest_synapse_file_id: SynID of manifest csv uploaded to synapse. """ @@ -1747,6 +1754,7 @@ def upload_manifest_as_table( hideBlanks, manifest_synapse_table_id, annotation_keys, + file_annotations_upload=file_annotations_upload, ) # Load manifest to synapse as a CSV File manifest_synapse_file_id = self.upload_manifest_file( @@ -1793,6 +1801,7 @@ def upload_manifest_as_csv( hideBlanks, component_name, annotation_keys: str, + file_annotations_upload: Optional[bool], ): """Upload manifest to Synapse as a csv only. Args: @@ -1806,6 +1815,7 @@ def upload_manifest_as_csv( annotation_keys: (str) display_label/class_label (default), Sets labeling syle for annotation keys. class_label will format the display name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain display label formatting while ensuring the label is formatted properly for Synapse annotations. + file_annotations_upload (bool): Default to True. If false, do not add annotations to files. Return: manifest_synapse_file_id (str): SynID of manifest csv uploaded to synapse. """ @@ -1816,6 +1826,7 @@ def upload_manifest_as_csv( datasetId, hideBlanks, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload, ) # Load manifest to synapse as a CSV File @@ -1851,6 +1862,7 @@ def upload_manifest_combo( table_manipulation, table_column_names: str, annotation_keys: str, + file_annotations_upload: Optional[bool], ): """Upload manifest to Synapse as a table and CSV with entities. Args: @@ -1891,6 +1903,7 @@ def upload_manifest_combo( hideBlanks, manifest_synapse_table_id, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload, ) # Load manifest to synapse as a CSV File @@ -1934,6 +1947,7 @@ def associateMetadataWithFiles( table_manipulation: str = "replace", table_column_names: str = "class_label", annotation_keys: str = "class_label", + file_annotations_upload: Optional[bool] = True, ) -> str: """Associate metadata with files in a storage dataset already on Synapse. Upload metadataManifest in the storage dataset folder on Synapse as well. Return synapseId of the uploaded manifest file. @@ -1973,7 +1987,6 @@ def associateMetadataWithFiles( table_name, component_name = self._generate_table_name(manifest) # Upload manifest to synapse based on user input (manifest_record_type) - if manifest_record_type == "file_only": manifest_synapse_file_id = self.upload_manifest_as_csv( dmge, @@ -1985,6 +1998,7 @@ def associateMetadataWithFiles( manifest_record_type=manifest_record_type, component_name=component_name, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload, ) elif manifest_record_type == "table_and_file": manifest_synapse_file_id = self.upload_manifest_as_table( @@ -2000,6 +2014,7 @@ def associateMetadataWithFiles( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload, ) elif manifest_record_type == "file_and_entities": manifest_synapse_file_id = self.upload_manifest_as_csv( @@ -2012,6 +2027,7 @@ def associateMetadataWithFiles( manifest_record_type=manifest_record_type, component_name=component_name, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload, ) elif manifest_record_type == "table_file_and_entities": manifest_synapse_file_id = self.upload_manifest_combo( @@ -2027,6 +2043,7 @@ def associateMetadataWithFiles( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload, ) else: raise ValueError("Please enter a valid manifest_record_type.") From 49fe0aec2a4df34c5114f702ad5fc1cabad85aaa Mon Sep 17 00:00:00 2001 From: linglp Date: Mon, 1 Apr 2024 18:11:54 -0400 Subject: [PATCH 05/51] add comment --- schematic/store/synapse.py | 1 + 1 file changed, 1 insertion(+) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 0e53b6c49..ec14a9e38 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1882,6 +1882,7 @@ def upload_manifest_combo( annotation_keys: (str) display_label/class_label (default), Sets labeling syle for annotation keys. class_label will format the display name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain display label formatting while ensuring the label is formatted properly for Synapse annotations. + file_annotations_upload (bool): Default to True. If false, do not add annotations to files. Return: manifest_synapse_file_id (str): SynID of manifest csv uploaded to synapse. """ From 136763adc86602591877437ef1a6580ed134b0f8 Mon Sep 17 00:00:00 2001 From: linglp Date: Mon, 1 Apr 2024 18:46:43 -0400 Subject: [PATCH 06/51] modify file annotations uploa parameter position --- schematic/store/synapse.py | 67 ++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index ec14a9e38..8f16cde0d 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1637,7 +1637,6 @@ def add_annotations_to_entities_files( hideBlanks: bool, manifest_synapse_table_id="", annotation_keys: str = "class_label", - file_annotations_upload: Optional[bool] = True, ): """Depending on upload type add Ids to entityId row. Add anotations to connected files. Args: @@ -1650,17 +1649,13 @@ def add_annotations_to_entities_files( annotation_keys: (str) display_label/class_label(default), Determines labeling syle for annotation keys. class_label will format the display name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain display label formatting while ensuring the label is formatted properly for Synapse annotations. - file_annotations_upload (bool): Default to True. If false, do not add annotations to files. Returns: manifest (pd.DataFrame): modified to add entitiyId as appropriate """ - # Expected behavior is to annotate files if `Filename` is present regardless of `-mrt` setting - if ( - "filename" in [col.lower() for col in manifest.columns] - and file_annotations_upload - ): + # Expected behavior is to annotate files if `Filename` is present and if file_annotations_upload is set to True regardless of `-mrt` setting + if "filename" in [col.lower() for col in manifest.columns]: # get current list of files and store as dataframe dataset_files = self.getFilesInStorageDataset(datasetId) files_and_entityIds = self._get_file_entityIds( @@ -1746,16 +1741,16 @@ def upload_manifest_as_table( table_column_names=table_column_names, ) - manifest = self.add_annotations_to_entities_files( - dmge, - manifest, - manifest_record_type, - datasetId, - hideBlanks, - manifest_synapse_table_id, - annotation_keys, - file_annotations_upload=file_annotations_upload, - ) + if file_annotations_upload: + manifest = self.add_annotations_to_entities_files( + dmge, + manifest, + manifest_record_type, + datasetId, + hideBlanks, + manifest_synapse_table_id, + annotation_keys, + ) # Load manifest to synapse as a CSV File manifest_synapse_file_id = self.upload_manifest_file( manifest, @@ -1819,15 +1814,15 @@ def upload_manifest_as_csv( Return: manifest_synapse_file_id (str): SynID of manifest csv uploaded to synapse. """ - manifest = self.add_annotations_to_entities_files( - dmge, - manifest, - manifest_record_type, - datasetId, - hideBlanks, - annotation_keys=annotation_keys, - file_annotations_upload=file_annotations_upload, - ) + if file_annotations_upload: + manifest = self.add_annotations_to_entities_files( + dmge, + manifest, + manifest_record_type, + datasetId, + hideBlanks, + annotation_keys=annotation_keys, + ) # Load manifest to synapse as a CSV File manifest_synapse_file_id = self.upload_manifest_file( @@ -1896,16 +1891,16 @@ def upload_manifest_combo( table_column_names=table_column_names, ) - manifest = self.add_annotations_to_entities_files( - dmge, - manifest, - manifest_record_type, - datasetId, - hideBlanks, - manifest_synapse_table_id, - annotation_keys=annotation_keys, - file_annotations_upload=file_annotations_upload, - ) + if file_annotations_upload: + manifest = self.add_annotations_to_entities_files( + dmge, + manifest, + manifest_record_type, + datasetId, + hideBlanks, + manifest_synapse_table_id, + annotation_keys=annotation_keys, + ) # Load manifest to synapse as a CSV File manifest_synapse_file_id = self.upload_manifest_file( From be3a1090f9855e676e3f45ab7e32e55c659863be Mon Sep 17 00:00:00 2001 From: linglp Date: Mon, 1 Apr 2024 18:52:14 -0400 Subject: [PATCH 07/51] default to true --- schematic/store/synapse.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 8f16cde0d..58425c181 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1706,7 +1706,7 @@ def upload_manifest_as_table( table_manipulation: str, table_column_names: str, annotation_keys: str, - file_annotations_upload: Optional[bool], + file_annotations_upload: Optional[bool]=True, ): """Upload manifest to Synapse as a table and csv. Args: @@ -1796,7 +1796,7 @@ def upload_manifest_as_csv( hideBlanks, component_name, annotation_keys: str, - file_annotations_upload: Optional[bool], + file_annotations_upload: Optional[bool]=True, ): """Upload manifest to Synapse as a csv only. Args: @@ -1857,7 +1857,7 @@ def upload_manifest_combo( table_manipulation, table_column_names: str, annotation_keys: str, - file_annotations_upload: Optional[bool], + file_annotations_upload:Optional[bool]=True, ): """Upload manifest to Synapse as a table and CSV with entities. Args: From 82ba0d694a3eb7eed91d7f84a5b8d1b0e43f3346 Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 2 Apr 2024 13:55:04 -0400 Subject: [PATCH 08/51] run black --- schematic/models/metadata.py | 10 +++++----- schematic/store/synapse.py | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/schematic/models/metadata.py b/schematic/models/metadata.py index de1cd942d..e651cd76d 100644 --- a/schematic/models/metadata.py +++ b/schematic/models/metadata.py @@ -336,7 +336,7 @@ def submit_metadata_manifest( manifest_path: Path to the manifest file, which contains the metadata. dataset_id: Synapse ID of the dataset on Synapse containing the metadata manifest file. validate_component: Component from the schema.org schema based on which the manifest template has been generated. - file_annotations_upload (bool): Default to True. If false, do not add annotations to files. + file_annotations_upload (bool): Default to True. If false, do not add annotations to files. Returns: Manifest ID: If both validation and association were successful. Exceptions: @@ -390,7 +390,7 @@ def submit_metadata_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, - file_annotations_upload=file_annotations_upload + file_annotations_upload=file_annotations_upload, ) restrict_maniest = True @@ -404,7 +404,7 @@ def submit_metadata_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, - file_annotations_upload=file_annotations_upload + file_annotations_upload=file_annotations_upload, ) logger.info(f"No validation errors occured during validation.") @@ -427,7 +427,7 @@ def submit_metadata_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, - file_annotations_upload=file_annotations_upload + file_annotations_upload=file_annotations_upload, ) restrict_maniest = True @@ -441,7 +441,7 @@ def submit_metadata_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, - file_annotations_upload=file_annotations_upload + file_annotations_upload=file_annotations_upload, ) logger.debug( diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 58425c181..993b7ab1d 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1706,7 +1706,7 @@ def upload_manifest_as_table( table_manipulation: str, table_column_names: str, annotation_keys: str, - file_annotations_upload: Optional[bool]=True, + file_annotations_upload: Optional[bool] = True, ): """Upload manifest to Synapse as a table and csv. Args: @@ -1796,7 +1796,7 @@ def upload_manifest_as_csv( hideBlanks, component_name, annotation_keys: str, - file_annotations_upload: Optional[bool]=True, + file_annotations_upload: Optional[bool] = True, ): """Upload manifest to Synapse as a csv only. Args: @@ -1857,7 +1857,7 @@ def upload_manifest_combo( table_manipulation, table_column_names: str, annotation_keys: str, - file_annotations_upload:Optional[bool]=True, + file_annotations_upload: Optional[bool] = True, ): """Upload manifest to Synapse as a table and CSV with entities. Args: From a65d7d7f14545b1a7e9c48993e98d756b29c44f2 Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 2 Apr 2024 15:26:00 -0400 Subject: [PATCH 09/51] updated cli --- schematic/models/commands.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/schematic/models/commands.py b/schematic/models/commands.py index 81a4a3286..2e7f11a25 100644 --- a/schematic/models/commands.py +++ b/schematic/models/commands.py @@ -96,6 +96,12 @@ def model(ctx, config): # use as `schematic model ...` is_flag=True, help=query_dict(model_commands, ("model", "validate", "restrict_rules")), ) +@click.option( + "--file_annotations_upload", + "-fa", + is_flag=True, + help=query_dict(model_commands, ("model", "submit", "file_annotations_upload")), +) @click.option( "-ps", "--project_scope", @@ -147,6 +153,7 @@ def submit_manifest( data_model_labels, table_column_names, annotation_keys, + file_annotations_upload ): """ Running CLI with manifest validation (optional) and submission options. @@ -173,6 +180,7 @@ def submit_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload ) if manifest_id: From 2cae6d8c523c67ebfc56cd89e71f4b3fd1006f3a Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 2 Apr 2024 15:44:14 -0400 Subject: [PATCH 10/51] set file annotations upload to false --- schematic/models/commands.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/schematic/models/commands.py b/schematic/models/commands.py index 2e7f11a25..b64750e86 100644 --- a/schematic/models/commands.py +++ b/schematic/models/commands.py @@ -97,8 +97,9 @@ def model(ctx, config): # use as `schematic model ...` help=query_dict(model_commands, ("model", "validate", "restrict_rules")), ) @click.option( - "--file_annotations_upload", - "-fa", + "--file_annotations_upload/--no-file_annotations_upload", + "-fa/--no-fa", + default=True, is_flag=True, help=query_dict(model_commands, ("model", "submit", "file_annotations_upload")), ) From a5058a7791aa50c5b305874eec4db1b74a7d8bae Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 12:08:27 -0400 Subject: [PATCH 11/51] add cli test for manifest submission --- tests/test_cli.py | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 4631e9a9c..0dd025024 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -7,6 +7,7 @@ # from schematic import init from schematic.schemas.commands import schema from schematic.manifest.commands import manifest +from schematic.models.commands import model from schematic.configuration.configuration import Configuration @@ -49,10 +50,18 @@ def test_schema_convert_cli(self, runner, helpers): output_path = helpers.get_data_path("example.model.jsonld") - label_type = 'class_label' + label_type = "class_label" result = runner.invoke( - schema, ["convert", data_model_csv_path, "--output_jsonld", output_path, "--data_model_labels", label_type] + schema, + [ + "convert", + data_model_csv_path, + "--output_jsonld", + output_path, + "--data_model_labels", + label_type, + ], ) assert result.exit_code == 0 @@ -139,3 +148,36 @@ def test_get_example_manifest_excel( assert result.exit_code == 0 self.assert_expected_file(result, output_path) + + @pytest.mark.parametrize("with_annotations", [True, False]) + def test_submit_file_based_manifest( + self, runner, helpers, with_annotations, config: Configuration + ): + manifest_path = helpers.get_data_path("Example-bulkrnaseq.csv") + config.load_config("config_example.yml") + config.synapse_master_fileview_id = "syn55229694" + + if with_annotations: + annotation_opt = "-fa" + else: + annotation_opt = "--no-fa" + + result = runner.invoke( + model, + [ + "-c", + config.config_path, + "submit", + "-mrt", + "file_only", + "-d", + "syn55229693", + "-vc", + "BulkRNA-seqAssay", + "-mp", + manifest_path, + annotation_opt, + ], + ) + + assert result.exit_code == 0 From 0b9b769ded1919e42adfd2e32468e1e01f6f2082 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 12:27:05 -0400 Subject: [PATCH 12/51] add test manifest and modify test --- tests/data/mock_manifests/bulkrnaseq_test.csv | 4 ++++ tests/test_cli.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 tests/data/mock_manifests/bulkrnaseq_test.csv diff --git a/tests/data/mock_manifests/bulkrnaseq_test.csv b/tests/data/mock_manifests/bulkrnaseq_test.csv new file mode 100644 index 000000000..2b18715d8 --- /dev/null +++ b/tests/data/mock_manifests/bulkrnaseq_test.csv @@ -0,0 +1,4 @@ +Filename,Sample ID,File Format,Component,Genome Build,Genome FASTA,Id,entityId +Test sub folder/sample_file_one.txt,1,BAM,BulkRNA-seqAssay,GRCh37,,b0393125-7502-4c55-9fbd-eea2668a3e86,syn55229695 +Test sub folder/sample_file_three.txt,2,BAM,BulkRNA-seqAssay,GRCh37,,1cc5675e-6731-4cd2-ae2d-3a925cbff5b8,syn55229697 +Test sub folder/sample_file_two.txt,3,BAM,BulkRNA-seqAssay,GRCh37,,49156121-d736-48cf-a12b-02f7d84a0062,syn55229696 diff --git a/tests/test_cli.py b/tests/test_cli.py index 0dd025024..3c0927ab1 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -153,7 +153,7 @@ def test_get_example_manifest_excel( def test_submit_file_based_manifest( self, runner, helpers, with_annotations, config: Configuration ): - manifest_path = helpers.get_data_path("Example-bulkrnaseq.csv") + manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") config.load_config("config_example.yml") config.synapse_master_fileview_id = "syn55229694" From 1b1a54b8d7395c4dc78eeb0d76b8524fd2027535 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 12:31:56 -0400 Subject: [PATCH 13/51] run black --- schematic/models/commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/schematic/models/commands.py b/schematic/models/commands.py index b64750e86..4a9e3d99a 100644 --- a/schematic/models/commands.py +++ b/schematic/models/commands.py @@ -154,7 +154,7 @@ def submit_manifest( data_model_labels, table_column_names, annotation_keys, - file_annotations_upload + file_annotations_upload, ): """ Running CLI with manifest validation (optional) and submission options. @@ -181,7 +181,7 @@ def submit_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, - file_annotations_upload=file_annotations_upload + file_annotations_upload=file_annotations_upload, ) if manifest_id: From fb4dfb475e119aaab900b5ee73bf11ecbe05e29a Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 13:42:11 -0400 Subject: [PATCH 14/51] update test manifest csv --- tests/data/mock_manifests/bulkrnaseq_test.csv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/data/mock_manifests/bulkrnaseq_test.csv b/tests/data/mock_manifests/bulkrnaseq_test.csv index 2b18715d8..72dd17e9b 100644 --- a/tests/data/mock_manifests/bulkrnaseq_test.csv +++ b/tests/data/mock_manifests/bulkrnaseq_test.csv @@ -1,4 +1,4 @@ -Filename,Sample ID,File Format,Component,Genome Build,Genome FASTA,Id,entityId -Test sub folder/sample_file_one.txt,1,BAM,BulkRNA-seqAssay,GRCh37,,b0393125-7502-4c55-9fbd-eea2668a3e86,syn55229695 -Test sub folder/sample_file_three.txt,2,BAM,BulkRNA-seqAssay,GRCh37,,1cc5675e-6731-4cd2-ae2d-3a925cbff5b8,syn55229697 -Test sub folder/sample_file_two.txt,3,BAM,BulkRNA-seqAssay,GRCh37,,49156121-d736-48cf-a12b-02f7d84a0062,syn55229696 +Filename,Sample ID,File Format,Component,Genome Build,Genome FASTA +Test sub folder/sample_file_one.txt,1,BAM,BulkRNA-seqAssay,GRCh37, +Test sub folder/sample_file_three.txt,2,BAM,BulkRNA-seqAssay,GRCh37, +Test sub folder/sample_file_two.txt,3,BAM,BulkRNA-seqAssay,GRCh37, From 84a1fd0ee7faf068b8221ecee96b4e6ca37a2a90 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 14:34:52 -0400 Subject: [PATCH 15/51] patch submit function --- tests/test_cli.py | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 3c0927ab1..8d32d9a19 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,6 +1,7 @@ import os import pytest +from unittest.mock import patch from click.testing import CliRunner @@ -155,29 +156,30 @@ def test_submit_file_based_manifest( ): manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") config.load_config("config_example.yml") - config.synapse_master_fileview_id = "syn55229694" + config.synapse_master_fileview_id = "syn1234" if with_annotations: annotation_opt = "-fa" else: annotation_opt = "--no-fa" - result = runner.invoke( - model, - [ - "-c", - config.config_path, - "submit", - "-mrt", - "file_only", - "-d", - "syn55229693", - "-vc", - "BulkRNA-seqAssay", - "-mp", - manifest_path, - annotation_opt, - ], - ) + with patch("schematic.models.metadata.MetadataModel.submit_metadata_manifest"): + result = runner.invoke( + model, + [ + "-c", + config.config_path, + "submit", + "-mrt", + "file_only", + "-d", + "syn12345", + "-vc", + "BulkRNA-seqAssay", + "-mp", + manifest_path, + annotation_opt, + ], + ) - assert result.exit_code == 0 + assert result.exit_code == 0 From 0052a0c051d6ab473c58a21b59a7e8c7b09e1a07 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 15:26:50 -0400 Subject: [PATCH 16/51] add test --- tests/test_metadata.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 9bc5e5ae5..e28e7f1b1 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -3,6 +3,7 @@ import pytest +from unittest.mock import Mock, patch from schematic.models.metadata import MetadataModel logging.basicConfig(level=logging.DEBUG) @@ -93,3 +94,27 @@ def test_populate_manifest(self, helpers, return_excel, data_model_labels): os.remove(output_path) except: pass + + @pytest.mark.parametrize("file_annotations_upload", [True, False]) + @pytest.mark.parametrize("restrict_rules", [True, False]) + @pytest.mark.parametrize("hide_blanks", [True, False]) + @pytest.mark.parametrize( + "data_model_labels", + ["display_label", "class_label"], + ids=["data_model_labels-display_label", "data_model_labels-class_label"], + ) + @pytest.mark.parametrize("validate_component", [None, "BulkRNA-seqAssay"]) + def test_submit_metadata_manifest(self, helpers, file_annotations_upload, restrict_rules, data_model_labels, validate_component, hide_blanks): + meta_data_model = metadata_model(helpers, data_model_labels) + with patch( + "schematic.models.metadata.MetadataModel.validateModelManifest", + return_value=([],[]) + ): + with patch( + "schematic.store.synapse.SynapseStorage.associateMetadataWithFiles", + return_value="mock manifest id" + ): + mock_manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") + data_model_jsonld = helpers.get_data_path("example.model.jsonld") + mock_manifest_id = meta_data_model.submit_metadata_manifest(manifest_path=mock_manifest_path, path_to_json_ld=data_model_jsonld, validate_component = validate_component, dataset_id="mock dataset id", manifest_record_type="file_only", restrict_rules=restrict_rules, file_annotations_upload=file_annotations_upload, hide_blanks=hide_blanks) + assert mock_manifest_id == "mock manifest id" From 7ce13caf90d82ad996f0ccc6cbdbf1ef14876f43 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 23:04:41 -0400 Subject: [PATCH 17/51] add unit tests --- schematic/store/synapse.py | 4 +- tests/test_store.py | 393 +++++++++++++++++++++++++++++-------- 2 files changed, 318 insertions(+), 79 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 993b7ab1d..47e93a759 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1303,11 +1303,11 @@ def upload_manifest_file( parent=datasetId, name=file_name_new, ) - manifest_synapse_file_id = self.syn.store( manifestSynapseFile, isRestricted=restrict_manifest ).id - changeFileMetaData( + + synapseutils.copy_functions.changeFileMetaData( syn=self.syn, entity=manifest_synapse_file_id, downloadAs=file_name_new ) diff --git a/tests/test_store.py b/tests/test_store.py index 9b3048f9b..6b25aca22 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -4,14 +4,15 @@ import math import os from time import sleep -from unittest.mock import Mock, patch +from unittest.mock import patch, MagicMock import pandas as pd import pytest -from pandas.testing import assert_frame_equal +from pandas.testing import assert_frame_equal, assert_series_equal from synapseclient import EntityViewSchema, Folder from synapseclient.core.exceptions import SynapseHTTPError from synapseclient.entity import File +from synapseutils.copy_functions import changeFileMetaData from schematic.schemas.data_model_parser import DataModelParser from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer @@ -81,6 +82,25 @@ def datasetId(synapse_store, projectId, helpers): yield datasetId +@pytest.fixture +def dmge(helpers, config): + # associate org FollowUp metadata with files + inputModelLocaiton = helpers.get_data_path(os.path.basename(config.model_location)) + data_model_parser = DataModelParser(path_to_data_model=inputModelLocaiton) + # Parse Model + parsed_data_model = data_model_parser.parse_model() + + # Instantiate DataModelGraph + data_model_grapher = DataModelGraph(parsed_data_model) + + # Generate graph + graph_data_model = data_model_grapher.graph + + # Instantiate DataModelGraphExplorer + dmge = DataModelGraphExplorer(graph_data_model) + yield dmge + + def raise_final_error(retry_state): return retry_state.outcome.result() @@ -179,24 +199,8 @@ def test_annotation_submission( datasetId, manifest_record_type, config: Configuration, + dmge, ): - # Upload dataset annotations - - # Instantiate DataModelParser - data_model_parser = DataModelParser(path_to_data_model=config.model_location) - - # Parse Model - parsed_data_model = data_model_parser.parse_model() - - # Instantiate DataModelGraph - data_model_grapher = DataModelGraph(parsed_data_model) - - # Generate graph - graph_data_model = data_model_grapher.graph - - # Instantiate DataModelGraphExplorer - dmge = DataModelGraphExplorer(graph_data_model) - manifest_id = synapse_store.associateMetadataWithFiles( dmge=dmge, metadataManifestPath=helpers.get_data_path(manifest_path), @@ -525,6 +529,7 @@ def test_createTable( datasetId, table_column_names, annotation_keys, + dmge, ): table_manipulation = None @@ -544,25 +549,6 @@ def test_createTable( # associate metadata with files manifest_path = "mock_manifests/table_manifest.csv" - inputModelLocaiton = helpers.get_data_path( - os.path.basename(config.model_location) - ) - - # Instantiate DataModelParser - data_model_parser = DataModelParser(path_to_data_model=inputModelLocaiton) - - # Parse Model - parsed_data_model = data_model_parser.parse_model() - - # Instantiate DataModelGraph - data_model_grapher = DataModelGraph(parsed_data_model) - - # Generate graph - graph_data_model = data_model_grapher.graph - - # Instantiate DataModelGraphExplorer - dmge = DataModelGraphExplorer(graph_data_model) - # updating file view on synapse takes a long time manifestId = synapse_store.associateMetadataWithFiles( dmge=dmge, @@ -582,7 +568,6 @@ def test_createTable( # assert table exists assert table_name in existing_tables.keys() - @pytest.mark.parametrize( "table_column_names", ["display_label", "class_label"], @@ -602,6 +587,7 @@ def test_replaceTable( datasetId, table_column_names, annotation_keys, + dmge, ): table_manipulation = "replace" @@ -622,25 +608,6 @@ def test_replaceTable( not in synapse_store.get_table_info(projectId=projectId).keys() ) - # associate org FollowUp metadata with files - inputModelLocaiton = helpers.get_data_path( - os.path.basename(config.model_location) - ) - # sg = SchemaGenerator(inputModelLocaiton) - - data_model_parser = DataModelParser(path_to_data_model=inputModelLocaiton) - # Parse Model - parsed_data_model = data_model_parser.parse_model() - - # Instantiate DataModelGraph - data_model_grapher = DataModelGraph(parsed_data_model) - - # Generate graph - graph_data_model = data_model_grapher.graph - - # Instantiate DataModelGraphExplorer - dmge = DataModelGraphExplorer(graph_data_model) - # updating file view on synapse takes a long time manifestId = synapse_store.associateMetadataWithFiles( dmge=dmge, @@ -693,7 +660,6 @@ def test_replaceTable( # delete table synapse_store.syn.delete(tableId) - @pytest.mark.parametrize( "annotation_keys", ["display_label", "class_label"], @@ -707,6 +673,7 @@ def test_upsertTable( projectId, datasetId, annotation_keys, + dmge, ): table_manipulation = "upsert" @@ -727,24 +694,6 @@ def test_upsertTable( not in synapse_store.get_table_info(projectId=projectId).keys() ) - # associate org FollowUp metadata with files - inputModelLocaiton = helpers.get_data_path( - os.path.basename(config.model_location) - ) - - data_model_parser = DataModelParser(path_to_data_model=inputModelLocaiton) - # Parse Model - parsed_data_model = data_model_parser.parse_model() - - # Instantiate DataModelGraph - data_model_grapher = DataModelGraph(parsed_data_model) - - # Generate graph - graph_data_model = data_model_grapher.graph - - # Instantiate DataModelGraphExplorer - dmge = DataModelGraphExplorer(graph_data_model) - # updating file view on synapse takes a long time manifestId = synapse_store.associateMetadataWithFiles( dmge=dmge, @@ -890,3 +839,293 @@ def test_entity_type_checking(self, synapse_store, entity_id, caplog): "You are using entity type: folder. Please provide a file ID" in record.message ) + + +class TestManifestUpload: + def test_add_annotations_to_entities_files(self, helpers, synapse_store, dmge): + with patch( + "schematic.store.synapse.SynapseStorage.getFilesInStorageDataset", + return_value=[ + ("syn1224", "Test sub folder/sample_file_one.txt"), + ("syn1225", "Test sub folder/sample_file_three.txt"), + ("syn1226", "Test sub folder/sample_file_two.txt"), + ], + ): + manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") + manifest_df = helpers.get_data_frame(manifest_path) + new_df = synapse_store.add_annotations_to_entities_files( + dmge, + manifest_df, + manifest_record_type="entity", + datasetId="mock id", + hideBlanks=True, + ) + file_names_lst = new_df["Filename"].tolist() + entity_ids_lst = new_df["entityId"].tolist() + + # make sure that entityId and Id columns get added + assert "entityId" and "Id" in new_df.columns + assert file_names_lst == [ + "Test sub folder/sample_file_one.txt", + "Test sub folder/sample_file_three.txt", + "Test sub folder/sample_file_two.txt", + ] + assert entity_ids_lst == ["syn1224", "syn1225", "syn1226"] + + @pytest.mark.parametrize( + "mock_manifest_file_path", + [ + "mock_manifests/test_mock_manifest.csv", + "mock_manifests/test_mock_manifest_censored.csv", + ], + ) + def test_upload_manifest_file( + self, helpers, synapse_store, mock_manifest_file_path + ): + test_df = pd.DataFrame( + { + "Filename": { + 0: "Test sub folder/sample_file_one.txt", + 1: "Test sub folder/sample_file_three.txt", + 2: "Test sub folder/sample_file_two.txt", + }, + "Sample ID": {0: 1, 1: 2, 2: 3}, + "File Format": {0: "BAM", 1: "BAM", 2: "BAM"}, + "Component": { + 0: "BulkRNA-seqAssay", + 1: "BulkRNA-seqAssay", + 2: "BulkRNA-seqAssay", + }, + "Genome Build": {0: "GRCh37", 1: "GRCh37", 2: "GRCh37"}, + "Genome FASTA": {0: "", 1: "", 2: ""}, + "Id": {0: "mock1", 1: "mock2", 2: "mock3"}, + "entityId": {0: "syn1224", 1: "syn1225", 2: "syn1226"}, + } + ) + with patch("synapseclient.Synapse.store") as syn_store_mock, patch( + "synapseutils.copy_functions.changeFileMetaData" + ): + syn_store_mock.return_value.id = "mock manifest id" + mock_file_path = helpers.get_data_path(mock_manifest_file_path) + mock_manifest_synapse_file_id = synapse_store.upload_manifest_file( + manifest=test_df, + metadataManifestPath=mock_file_path, + datasetId="mock dataset id", + restrict_manifest=True, + ) + assert mock_manifest_synapse_file_id == "mock manifest id" + + @pytest.mark.parametrize("file_annotations_upload", [True, False]) + @pytest.mark.parametrize("hideBlanks", [True, False]) + @pytest.mark.parametrize("restrict", [True, False]) + @pytest.mark.parametrize("manifest_record_type", ["entity", "table", "both"]) + def test_upload_manifest_as_csv( + self, + helpers, + dmge, + synapse_store, + file_annotations_upload, + manifest_record_type, + hideBlanks, + restrict, + ): + with ( + patch( + "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files" + ) as add_anno_mock, + patch( + "schematic.store.synapse.SynapseStorage.upload_manifest_file", + return_value="mock manifest id", + ) as upload_manifest_mock, + patch( + "schematic.store.synapse.SynapseStorage.format_manifest_annotations" + ) as format_manifest_anno_mock, + patch.object(synapse_store.syn, "set_annotations"), + ): + manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") + manifest_df = helpers.get_data_frame(manifest_path) + synapse_store.upload_manifest_as_csv( + dmge, + manifest=manifest_df, + metadataManifestPath=manifest_path, + datasetId="mock synapse id", + restrict=restrict, + manifest_record_type=manifest_record_type, + file_annotations_upload=file_annotations_upload, + hideBlanks=hideBlanks, + component_name="BulkRNA-seqAssay", + annotation_keys="class_label", + ) + if file_annotations_upload: + add_anno_mock.assert_called_once() + else: + add_anno_mock.assert_not_called() + + upload_manifest_mock.assert_called_once() + format_manifest_anno_mock.assert_called_once() + + @pytest.mark.parametrize("file_annotations_upload", [True, False]) + @pytest.mark.parametrize("hideBlanks", [True, False]) + @pytest.mark.parametrize("restrict", [True, False]) + @pytest.mark.parametrize("manifest_record_type", ["entity", "table", "both"]) + def test_upload_manifest_as_table( + self, + helpers, + synapse_store, + dmge, + file_annotations_upload, + hideBlanks, + restrict, + manifest_record_type, + ): + mock_df = pd.DataFrame() + with ( + patch( + "schematic.store.synapse.SynapseStorage.uploadDB", + return_value=["mock_table_id", mock_df, "mock_table_manifest"], + ) as update_db_mock, + patch( + "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files" + ) as add_anno_mock, + patch( + "schematic.store.synapse.SynapseStorage.upload_manifest_file", + return_value="mock manifest id", + ), + patch.object(synapse_store.syn, "set_annotations") as set_anno_mock, + patch( + "schematic.store.synapse.SynapseStorage.format_manifest_annotations" + ) as format_manifest_anno_mock, + ): + manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") + manifest_df = helpers.get_data_frame(manifest_path) + synapse_store.upload_manifest_as_table( + dmge, + manifest=manifest_df, + metadataManifestPath=manifest_path, + datasetId="mock synapse id", + table_name="new table name", + component_name="BulkRNA-seqAssay", + restrict=restrict, + manifest_record_type=manifest_record_type, + hideBlanks=hideBlanks, + table_manipulation="replace", + table_column_names="class_label", + annotation_keys="class_label", + file_annotations_upload=file_annotations_upload, + ) + if file_annotations_upload: + add_anno_mock.assert_called_once() + else: + add_anno_mock.assert_not_called() + # need to set annotations for both table and files + format_manifest_anno_mock.call_count == 2 + set_anno_mock.call_count == 2 + update_db_mock.call_count == 2 + + @pytest.mark.parametrize("file_annotations_upload", [True, False]) + @pytest.mark.parametrize("hideBlanks", [True, False]) + @pytest.mark.parametrize("restrict", [True, False]) + @pytest.mark.parametrize("manifest_record_type", ["entity", "table", "both"]) + def test_upload_manifest_combo( + self, + helpers, + synapse_store, + dmge, + file_annotations_upload, + hideBlanks, + restrict, + manifest_record_type, + ): + mock_df = pd.DataFrame() + manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") + manifest_df = helpers.get_data_frame(manifest_path) + with ( + patch( + "schematic.store.synapse.SynapseStorage.uploadDB", + return_value=["mock_table_id", mock_df, "mock_table_manifest"], + ) as update_db_mock, + patch( + "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files" + ) as add_anno_mock, + patch( + "schematic.store.synapse.SynapseStorage.upload_manifest_file", + return_value="mock manifest id", + ), + patch.object(synapse_store.syn, "set_annotations") as set_anno_mock, + patch( + "schematic.store.synapse.SynapseStorage.format_manifest_annotations" + ) as format_manifest_anno_mock, + ): + synapse_store.upload_manifest_combo( + dmge, + manifest=manifest_df, + metadataManifestPath=manifest_path, + datasetId="mock synapse id", + table_name="new table name", + component_name="BulkRNA-seqAssay", + restrict=restrict, + manifest_record_type=manifest_record_type, + hideBlanks=hideBlanks, + table_manipulation="replace", + table_column_names="class_label", + annotation_keys="class_label", + file_annotations_upload=file_annotations_upload, + ) + + if file_annotations_upload: + add_anno_mock.assert_called_once() + else: + add_anno_mock.assert_not_called() + # need to set annotations for both table and files + format_manifest_anno_mock.call_count == 2 + set_anno_mock.call_count == 2 + update_db_mock.call_count == 2 + + @pytest.mark.parametrize( + "manifest_record_type,expected", + [ + ("file_only", "mock_id_csv"), + ("table_and_file", "mock_id_table"), + ("file_and_entities", "mock_id_csv"), + ("table_file_and_entities", "mock_id_entities"), + ], + ) + @pytest.mark.parametrize("restrict_rules", [True, False]) + @pytest.mark.parametrize("hide_blanks", [True, False]) + @pytest.mark.parametrize("file_annotations_upload", [True, False]) + def test_associateMetadataWithFiles( + self, + helpers, + restrict_rules, + hide_blanks, + synapse_store, + manifest_record_type, + expected, + file_annotations_upload, + dmge, + ): + with ( + patch( + "schematic.store.synapse.SynapseStorage.upload_manifest_as_csv", + return_value="mock_id_csv", + ), + patch( + "schematic.store.synapse.SynapseStorage.upload_manifest_as_table", + return_value="mock_id_table", + ), + patch( + "schematic.store.synapse.SynapseStorage.upload_manifest_combo", + return_value="mock_id_entities", + ), + ): + manifest_path = "mock_manifests/bulkrnaseq_test.csv" + manifest_id = synapse_store.associateMetadataWithFiles( + dmge=dmge, + metadataManifestPath=helpers.get_data_path(manifest_path), + datasetId="mock_dataset_id", + hideBlanks=hide_blanks, + restrict_manifest=restrict_rules, + manifest_record_type=manifest_record_type, + file_annotations_upload=file_annotations_upload, + ) + assert manifest_id == expected From b41139e9036c3acdc7d8d5653d64100221e155d1 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 23:15:11 -0400 Subject: [PATCH 18/51] add typing --- tests/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 8d32d9a19..838192e27 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -152,8 +152,8 @@ def test_get_example_manifest_excel( @pytest.mark.parametrize("with_annotations", [True, False]) def test_submit_file_based_manifest( - self, runner, helpers, with_annotations, config: Configuration - ): + self, runner: CliRunner, helpers, with_annotations: bool, config: Configuration + ) -> None: manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") config.load_config("config_example.yml") config.synapse_master_fileview_id = "syn1234" From 0464b672ad38ddc8199c0c7f28e7803252884673 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 23:18:22 -0400 Subject: [PATCH 19/51] add typing --- tests/test_metadata.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index e28e7f1b1..fd4cc5029 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -5,6 +5,7 @@ from unittest.mock import Mock, patch from schematic.models.metadata import MetadataModel +from typing import Optional logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) @@ -104,17 +105,36 @@ def test_populate_manifest(self, helpers, return_excel, data_model_labels): ids=["data_model_labels-display_label", "data_model_labels-class_label"], ) @pytest.mark.parametrize("validate_component", [None, "BulkRNA-seqAssay"]) - def test_submit_metadata_manifest(self, helpers, file_annotations_upload, restrict_rules, data_model_labels, validate_component, hide_blanks): + def test_submit_metadata_manifest( + self, + helpers, + file_annotations_upload: bool, + restrict_rules: bool, + data_model_labels: str, + hide_blanks: bool, + validate_component: Optional[str], + ) -> None: meta_data_model = metadata_model(helpers, data_model_labels) with patch( "schematic.models.metadata.MetadataModel.validateModelManifest", - return_value=([],[]) + return_value=([], []), ): with patch( "schematic.store.synapse.SynapseStorage.associateMetadataWithFiles", - return_value="mock manifest id" + return_value="mock manifest id", ): - mock_manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") + mock_manifest_path = helpers.get_data_path( + "mock_manifests/bulkrnaseq_test.csv" + ) data_model_jsonld = helpers.get_data_path("example.model.jsonld") - mock_manifest_id = meta_data_model.submit_metadata_manifest(manifest_path=mock_manifest_path, path_to_json_ld=data_model_jsonld, validate_component = validate_component, dataset_id="mock dataset id", manifest_record_type="file_only", restrict_rules=restrict_rules, file_annotations_upload=file_annotations_upload, hide_blanks=hide_blanks) + mock_manifest_id = meta_data_model.submit_metadata_manifest( + manifest_path=mock_manifest_path, + path_to_json_ld=data_model_jsonld, + validate_component=validate_component, + dataset_id="mock dataset id", + manifest_record_type="file_only", + restrict_rules=restrict_rules, + file_annotations_upload=file_annotations_upload, + hide_blanks=hide_blanks, + ) assert mock_manifest_id == "mock manifest id" From 37d72ac30546f47ba6055f88b05c28045ac84fcf Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 23:18:37 -0400 Subject: [PATCH 20/51] remove unused import --- tests/test_store.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_store.py b/tests/test_store.py index 6b25aca22..a24fb1c09 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -12,7 +12,6 @@ from synapseclient import EntityViewSchema, Folder from synapseclient.core.exceptions import SynapseHTTPError from synapseclient.entity import File -from synapseutils.copy_functions import changeFileMetaData from schematic.schemas.data_model_parser import DataModelParser from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer From 6dceddf128485e068daf81b7e4d6a8119793a034 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 23:19:21 -0400 Subject: [PATCH 21/51] add typing --- schematic/models/commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schematic/models/commands.py b/schematic/models/commands.py index 4a9e3d99a..ed0709351 100644 --- a/schematic/models/commands.py +++ b/schematic/models/commands.py @@ -154,7 +154,7 @@ def submit_manifest( data_model_labels, table_column_names, annotation_keys, - file_annotations_upload, + file_annotations_upload: bool, ): """ Running CLI with manifest validation (optional) and submission options. From 1443d68741ad4fd12f80903e5d7d6059b2eafef2 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 23:26:07 -0400 Subject: [PATCH 22/51] add more type hinting --- tests/test_store.py | 59 +++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index a24fb1c09..19fd899f4 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -16,6 +16,7 @@ from schematic.schemas.data_model_parser import DataModelParser from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer from schematic.schemas.data_model_relationships import DataModelRelationships +from schematic.store.synapse import SynapseStorage from schematic.models.metadata import MetadataModel from schematic.store.base import BaseStorage @@ -921,13 +922,13 @@ def test_upload_manifest_file( def test_upload_manifest_as_csv( self, helpers, - dmge, - synapse_store, - file_annotations_upload, - manifest_record_type, - hideBlanks, - restrict, - ): + dmge: DataModelGraphExplorer, + synapse_store: SynapseStorage, + file_annotations_upload: bool, + manifest_record_type: str, + hideBlanks: bool, + restrict: bool, + ) -> None: with ( patch( "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files" @@ -970,13 +971,13 @@ def test_upload_manifest_as_csv( def test_upload_manifest_as_table( self, helpers, - synapse_store, - dmge, - file_annotations_upload, - hideBlanks, - restrict, - manifest_record_type, - ): + synapse_store: SynapseStorage, + dmge: DataModelGraphExplorer, + file_annotations_upload: bool, + hideBlanks: bool, + restrict: bool, + manifest_record_type: str, + ) -> None: mock_df = pd.DataFrame() with ( patch( @@ -1028,13 +1029,13 @@ def test_upload_manifest_as_table( def test_upload_manifest_combo( self, helpers, - synapse_store, - dmge, - file_annotations_upload, - hideBlanks, - restrict, - manifest_record_type, - ): + synapse_store: SynapseStorage, + dmge: DataModelGraphExplorer, + file_annotations_upload: bool, + hideBlanks: bool, + restrict: bool, + manifest_record_type: str, + ) -> None: mock_df = pd.DataFrame() manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") manifest_df = helpers.get_data_frame(manifest_path) @@ -1095,14 +1096,14 @@ def test_upload_manifest_combo( def test_associateMetadataWithFiles( self, helpers, - restrict_rules, - hide_blanks, - synapse_store, - manifest_record_type, - expected, - file_annotations_upload, - dmge, - ): + restrict_rules: bool, + hide_blanks: bool, + synapse_store: SynapseStorage, + manifest_record_type: str, + expected: str, + file_annotations_upload: bool, + dmge: DataModelGraphExplorer, + ) -> None: with ( patch( "schematic.store.synapse.SynapseStorage.upload_manifest_as_csv", From b5ae47655bd0fb6c3581fd9e375e614bac904a36 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 23:37:44 -0400 Subject: [PATCH 23/51] organize import --- tests/test_metadata.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index fd4cc5029..7fc9f357d 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -1,11 +1,11 @@ -import os import logging +import os +from typing import Optional +from unittest.mock import patch import pytest -from unittest.mock import Mock, patch from schematic.models.metadata import MetadataModel -from typing import Optional logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) From be1d32ca714fd354b81e566bddd0e4071a06ee59 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 23:39:36 -0400 Subject: [PATCH 24/51] remove unused import --- tests/test_store.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index 19fd899f4..c829e3f45 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -4,26 +4,23 @@ import math import os from time import sleep -from unittest.mock import patch, MagicMock +from unittest.mock import patch import pandas as pd import pytest -from pandas.testing import assert_frame_equal, assert_series_equal +from pandas.testing import assert_frame_equal from synapseclient import EntityViewSchema, Folder from synapseclient.core.exceptions import SynapseHTTPError from synapseclient.entity import File +from schematic.models.metadata import MetadataModel +from schematic.schemas.data_model_graph import (DataModelGraph, + DataModelGraphExplorer) from schematic.schemas.data_model_parser import DataModelParser -from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer from schematic.schemas.data_model_relationships import DataModelRelationships -from schematic.store.synapse import SynapseStorage - -from schematic.models.metadata import MetadataModel from schematic.store.base import BaseStorage -from schematic.store.synapse import ( - DatasetFileView, - ManifestDownload, -) +from schematic.store.synapse import (DatasetFileView, ManifestDownload, + SynapseStorage) logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) From a59a86f8d996b08b0e74c610626849af906978cf Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 23:50:15 -0400 Subject: [PATCH 25/51] add type hinting --- tests/test_store.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index c829e3f45..e1b3a8fe4 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -4,15 +4,16 @@ import math import os from time import sleep +from typing import Generator from unittest.mock import patch import pandas as pd import pytest -from pandas.testing import assert_frame_equal from synapseclient import EntityViewSchema, Folder from synapseclient.core.exceptions import SynapseHTTPError from synapseclient.entity import File +from schematic.configuration.configuration import Configuration from schematic.models.metadata import MetadataModel from schematic.schemas.data_model_graph import (DataModelGraph, DataModelGraphExplorer) @@ -80,7 +81,16 @@ def datasetId(synapse_store, projectId, helpers): @pytest.fixture -def dmge(helpers, config): +def dmge(helpers, config:Configuration) -> Generator[DataModelGraphExplorer, None, None]: + """initiate data model explorer + + Args: + helpers (pytest fixture): fixture + config (Configuration): configuration class + + Yields: + DataModelGraphExplorer + """ # associate org FollowUp metadata with files inputModelLocaiton = helpers.get_data_path(os.path.basename(config.model_location)) data_model_parser = DataModelParser(path_to_data_model=inputModelLocaiton) From 57c986e19ca20fe84ed93851b122a158f0e1fb03 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 23:53:37 -0400 Subject: [PATCH 26/51] add assert frame equal import back --- tests/test_store.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_store.py b/tests/test_store.py index e1b3a8fe4..7a1793b82 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -12,6 +12,7 @@ from synapseclient import EntityViewSchema, Folder from synapseclient.core.exceptions import SynapseHTTPError from synapseclient.entity import File +from pandas.testing import assert_frame_equal from schematic.configuration.configuration import Configuration from schematic.models.metadata import MetadataModel From 2279e980c9b77c055570f85825d23bf96aaa8003 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 23:56:13 -0400 Subject: [PATCH 27/51] update comment --- tests/test_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_store.py b/tests/test_store.py index 7a1793b82..c83f6f875 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -871,7 +871,7 @@ def test_add_annotations_to_entities_files(self, helpers, synapse_store, dmge): file_names_lst = new_df["Filename"].tolist() entity_ids_lst = new_df["entityId"].tolist() - # make sure that entityId and Id columns get added + # test entityId and Id columns get added assert "entityId" and "Id" in new_df.columns assert file_names_lst == [ "Test sub folder/sample_file_one.txt", From d8397fee4e6be8d1291cc02e560a662d58e9b2c1 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 23:58:09 -0400 Subject: [PATCH 28/51] add docstring --- tests/test_store.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_store.py b/tests/test_store.py index c83f6f875..ea9334346 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -850,6 +850,8 @@ def test_entity_type_checking(self, synapse_store, entity_id, caplog): class TestManifestUpload: + """Test manifest upload + """ def test_add_annotations_to_entities_files(self, helpers, synapse_store, dmge): with patch( "schematic.store.synapse.SynapseStorage.getFilesInStorageDataset", From f9968f08c853f7b0a92aad1bf4aa5ec748e113c3 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 3 Apr 2024 23:59:44 -0400 Subject: [PATCH 29/51] change name of parameter --- tests/test_store.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index ea9334346..7dfcb5bce 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -926,7 +926,7 @@ def test_upload_manifest_file( assert mock_manifest_synapse_file_id == "mock manifest id" @pytest.mark.parametrize("file_annotations_upload", [True, False]) - @pytest.mark.parametrize("hideBlanks", [True, False]) + @pytest.mark.parametrize("hide_blanks", [True, False]) @pytest.mark.parametrize("restrict", [True, False]) @pytest.mark.parametrize("manifest_record_type", ["entity", "table", "both"]) def test_upload_manifest_as_csv( @@ -936,7 +936,7 @@ def test_upload_manifest_as_csv( synapse_store: SynapseStorage, file_annotations_upload: bool, manifest_record_type: str, - hideBlanks: bool, + hide_blanks: bool, restrict: bool, ) -> None: with ( @@ -962,7 +962,7 @@ def test_upload_manifest_as_csv( restrict=restrict, manifest_record_type=manifest_record_type, file_annotations_upload=file_annotations_upload, - hideBlanks=hideBlanks, + hideBlanks=hide_blanks, component_name="BulkRNA-seqAssay", annotation_keys="class_label", ) @@ -975,7 +975,7 @@ def test_upload_manifest_as_csv( format_manifest_anno_mock.assert_called_once() @pytest.mark.parametrize("file_annotations_upload", [True, False]) - @pytest.mark.parametrize("hideBlanks", [True, False]) + @pytest.mark.parametrize("hide_blanks", [True, False]) @pytest.mark.parametrize("restrict", [True, False]) @pytest.mark.parametrize("manifest_record_type", ["entity", "table", "both"]) def test_upload_manifest_as_table( @@ -984,7 +984,7 @@ def test_upload_manifest_as_table( synapse_store: SynapseStorage, dmge: DataModelGraphExplorer, file_annotations_upload: bool, - hideBlanks: bool, + hide_blanks: bool, restrict: bool, manifest_record_type: str, ) -> None: @@ -1017,7 +1017,7 @@ def test_upload_manifest_as_table( component_name="BulkRNA-seqAssay", restrict=restrict, manifest_record_type=manifest_record_type, - hideBlanks=hideBlanks, + hideBlanks=hide_blanks, table_manipulation="replace", table_column_names="class_label", annotation_keys="class_label", @@ -1042,7 +1042,7 @@ def test_upload_manifest_combo( synapse_store: SynapseStorage, dmge: DataModelGraphExplorer, file_annotations_upload: bool, - hideBlanks: bool, + hide_blanks: bool, restrict: bool, manifest_record_type: str, ) -> None: @@ -1075,7 +1075,7 @@ def test_upload_manifest_combo( component_name="BulkRNA-seqAssay", restrict=restrict, manifest_record_type=manifest_record_type, - hideBlanks=hideBlanks, + hideBlanks=hide_blanks, table_manipulation="replace", table_column_names="class_label", annotation_keys="class_label", From 47e188ce378fd907c6d9e5067ccda001fa66b9be Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 4 Apr 2024 00:03:29 -0400 Subject: [PATCH 30/51] add docstring and typing --- tests/test_store.py | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index 7dfcb5bce..3222837a5 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -16,13 +16,11 @@ from schematic.configuration.configuration import Configuration from schematic.models.metadata import MetadataModel -from schematic.schemas.data_model_graph import (DataModelGraph, - DataModelGraphExplorer) +from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer from schematic.schemas.data_model_parser import DataModelParser from schematic.schemas.data_model_relationships import DataModelRelationships from schematic.store.base import BaseStorage -from schematic.store.synapse import (DatasetFileView, ManifestDownload, - SynapseStorage) +from schematic.store.synapse import DatasetFileView, ManifestDownload, SynapseStorage logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) @@ -82,7 +80,9 @@ def datasetId(synapse_store, projectId, helpers): @pytest.fixture -def dmge(helpers, config:Configuration) -> Generator[DataModelGraphExplorer, None, None]: +def dmge( + helpers, config: Configuration +) -> Generator[DataModelGraphExplorer, None, None]: """initiate data model explorer Args: @@ -850,9 +850,18 @@ def test_entity_type_checking(self, synapse_store, entity_id, caplog): class TestManifestUpload: - """Test manifest upload - """ - def test_add_annotations_to_entities_files(self, helpers, synapse_store, dmge): + """Test manifest upload""" + + def test_add_annotations_to_entities_files( + self, helpers, synapse_store: SynapseStorage, dmge: DataModelGraphExplorer + ) -> None: + """test adding annotations to entities files + + Args: + helpers (fixture): a pytest fixture + synapse_store (SynapseStorage): mock synapse store + dmge (DataModelGraphExplorer): data model grpah explorer object + """ with patch( "schematic.store.synapse.SynapseStorage.getFilesInStorageDataset", return_value=[ @@ -890,8 +899,15 @@ def test_add_annotations_to_entities_files(self, helpers, synapse_store, dmge): ], ) def test_upload_manifest_file( - self, helpers, synapse_store, mock_manifest_file_path - ): + self, helpers, synapse_store: SynapseStorage, mock_manifest_file_path: str + ) -> None: + """test upload manifest file function + + Args: + helpers (fixture): a pytest fixture + synapse_store (SynapseStorage): mock synapse store + dmge (DataModelGraphExplorer): data model grpah explorer object + """ test_df = pd.DataFrame( { "Filename": { From 2dab282cd70c302a2b0b86610081a280f271cbda Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 4 Apr 2024 00:04:59 -0400 Subject: [PATCH 31/51] update assert statement --- tests/test_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_store.py b/tests/test_store.py index 3222837a5..1d3ad24b5 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -883,7 +883,7 @@ def test_add_annotations_to_entities_files( entity_ids_lst = new_df["entityId"].tolist() # test entityId and Id columns get added - assert "entityId" and "Id" in new_df.columns + assert "entityId" in new_df.columns and "Id" in new_df.columns assert file_names_lst == [ "Test sub folder/sample_file_one.txt", "Test sub folder/sample_file_three.txt", From 720a85d367bff3c501a9bce6ad56f645a3ae3bcd Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 4 Apr 2024 00:06:52 -0400 Subject: [PATCH 32/51] modify assert statement --- tests/test_store.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_store.py b/tests/test_store.py index 1d3ad24b5..a6b489968 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -883,7 +883,8 @@ def test_add_annotations_to_entities_files( entity_ids_lst = new_df["entityId"].tolist() # test entityId and Id columns get added - assert "entityId" in new_df.columns and "Id" in new_df.columns + assert "entityId" in new_df.columns + assert "Id" in new_df.columns assert file_names_lst == [ "Test sub folder/sample_file_one.txt", "Test sub folder/sample_file_three.txt", From fb805aef2b78413bbf71ca8760bdbd11c3ed9525 Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 4 Apr 2024 11:07:15 -0400 Subject: [PATCH 33/51] use hide blanks --- tests/test_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_store.py b/tests/test_store.py index a6b489968..528171842 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -1050,7 +1050,7 @@ def test_upload_manifest_as_table( update_db_mock.call_count == 2 @pytest.mark.parametrize("file_annotations_upload", [True, False]) - @pytest.mark.parametrize("hideBlanks", [True, False]) + @pytest.mark.parametrize("hide_blanks", [True, False]) @pytest.mark.parametrize("restrict", [True, False]) @pytest.mark.parametrize("manifest_record_type", ["entity", "table", "both"]) def test_upload_manifest_combo( From 1760a385ffb46fc064db728aa84837aa6495c572 Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 4 Apr 2024 11:14:43 -0400 Subject: [PATCH 34/51] add type hinting --- tests/test_store.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index 528171842..c83f97dcf 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -207,7 +207,7 @@ def test_annotation_submission( datasetId, manifest_record_type, config: Configuration, - dmge, + dmge: DataModelGraphExplorer, ): manifest_id = synapse_store.associateMetadataWithFiles( dmge=dmge, @@ -537,7 +537,7 @@ def test_createTable( datasetId, table_column_names, annotation_keys, - dmge, + dmge: DataModelGraphExplorer, ): table_manipulation = None @@ -595,7 +595,7 @@ def test_replaceTable( datasetId, table_column_names, annotation_keys, - dmge, + dmge: DataModelGraphExplorer, ): table_manipulation = "replace" @@ -681,7 +681,7 @@ def test_upsertTable( projectId, datasetId, annotation_keys, - dmge, + dmge: DataModelGraphExplorer, ): table_manipulation = "upsert" From ecd29dc090438d1a35cc7fadcb6a9671ff6c21b9 Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 4 Apr 2024 11:20:20 -0400 Subject: [PATCH 35/51] change to use assert --- tests/test_store.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index c83f97dcf..ba2ae9e91 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -1045,9 +1045,9 @@ def test_upload_manifest_as_table( else: add_anno_mock.assert_not_called() # need to set annotations for both table and files - format_manifest_anno_mock.call_count == 2 - set_anno_mock.call_count == 2 - update_db_mock.call_count == 2 + assert format_manifest_anno_mock.call_count == 2 + assert set_anno_mock.call_count == 2 + assert update_db_mock.call_count == 2 @pytest.mark.parametrize("file_annotations_upload", [True, False]) @pytest.mark.parametrize("hide_blanks", [True, False]) @@ -1104,9 +1104,9 @@ def test_upload_manifest_combo( else: add_anno_mock.assert_not_called() # need to set annotations for both table and files - format_manifest_anno_mock.call_count == 2 - set_anno_mock.call_count == 2 - update_db_mock.call_count == 2 + assert format_manifest_anno_mock.call_count == 2 + assert set_anno_mock.call_count == 2 + assert update_db_mock.call_count == 2 @pytest.mark.parametrize( "manifest_record_type,expected", From 32afd22c501df7b08393617c3c059b10ebd073a1 Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 4 Apr 2024 11:26:55 -0400 Subject: [PATCH 36/51] add type hinting --- tests/test_store.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index ba2ae9e91..3466e0d8a 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -21,6 +21,7 @@ from schematic.schemas.data_model_relationships import DataModelRelationships from schematic.store.base import BaseStorage from schematic.store.synapse import DatasetFileView, ManifestDownload, SynapseStorage +from conftest import Helpers logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) @@ -853,7 +854,10 @@ class TestManifestUpload: """Test manifest upload""" def test_add_annotations_to_entities_files( - self, helpers, synapse_store: SynapseStorage, dmge: DataModelGraphExplorer + self, + helpers: Helpers, + synapse_store: SynapseStorage, + dmge: DataModelGraphExplorer, ) -> None: """test adding annotations to entities files @@ -900,7 +904,10 @@ def test_add_annotations_to_entities_files( ], ) def test_upload_manifest_file( - self, helpers, synapse_store: SynapseStorage, mock_manifest_file_path: str + self, + helpers: Helpers, + synapse_store: SynapseStorage, + mock_manifest_file_path: str, ) -> None: """test upload manifest file function @@ -948,7 +955,7 @@ def test_upload_manifest_file( @pytest.mark.parametrize("manifest_record_type", ["entity", "table", "both"]) def test_upload_manifest_as_csv( self, - helpers, + helpers: Helpers, dmge: DataModelGraphExplorer, synapse_store: SynapseStorage, file_annotations_upload: bool, @@ -997,7 +1004,7 @@ def test_upload_manifest_as_csv( @pytest.mark.parametrize("manifest_record_type", ["entity", "table", "both"]) def test_upload_manifest_as_table( self, - helpers, + helpers: Helpers, synapse_store: SynapseStorage, dmge: DataModelGraphExplorer, file_annotations_upload: bool, @@ -1055,7 +1062,7 @@ def test_upload_manifest_as_table( @pytest.mark.parametrize("manifest_record_type", ["entity", "table", "both"]) def test_upload_manifest_combo( self, - helpers, + helpers: Helpers, synapse_store: SynapseStorage, dmge: DataModelGraphExplorer, file_annotations_upload: bool, @@ -1122,7 +1129,7 @@ def test_upload_manifest_combo( @pytest.mark.parametrize("file_annotations_upload", [True, False]) def test_associateMetadataWithFiles( self, - helpers, + helpers: Helpers, restrict_rules: bool, hide_blanks: bool, synapse_store: SynapseStorage, From 5b6d22834c9097f77ebe40a19a13fa19aa08dece Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 4 Apr 2024 11:29:31 -0400 Subject: [PATCH 37/51] add type hinting --- tests/test_cli.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 838192e27..67559830b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -10,6 +10,7 @@ from schematic.manifest.commands import manifest from schematic.models.commands import model from schematic.configuration.configuration import Configuration +from conftest import Helpers @pytest.fixture @@ -152,7 +153,11 @@ def test_get_example_manifest_excel( @pytest.mark.parametrize("with_annotations", [True, False]) def test_submit_file_based_manifest( - self, runner: CliRunner, helpers, with_annotations: bool, config: Configuration + self, + runner: CliRunner, + helpers: Helpers, + with_annotations: bool, + config: Configuration, ) -> None: manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") config.load_config("config_example.yml") From 1754e55fdd1bb82e6b4b3c4d3c75aa749486f8ca Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 4 Apr 2024 11:30:33 -0400 Subject: [PATCH 38/51] add type hinting --- tests/test_metadata.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 7fc9f357d..a3d153e14 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -6,6 +6,7 @@ import pytest from schematic.models.metadata import MetadataModel +from conftest import Helpers logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) @@ -107,7 +108,7 @@ def test_populate_manifest(self, helpers, return_excel, data_model_labels): @pytest.mark.parametrize("validate_component", [None, "BulkRNA-seqAssay"]) def test_submit_metadata_manifest( self, - helpers, + helpers: Helpers, file_annotations_upload: bool, restrict_rules: bool, data_model_labels: str, From 3ed77316761b18c94f0fe27d713057abc0a35cfb Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 4 Apr 2024 11:32:40 -0400 Subject: [PATCH 39/51] rename function and variable --- tests/test_store.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index 3466e0d8a..0fe3be71b 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -94,8 +94,8 @@ def dmge( DataModelGraphExplorer """ # associate org FollowUp metadata with files - inputModelLocaiton = helpers.get_data_path(os.path.basename(config.model_location)) - data_model_parser = DataModelParser(path_to_data_model=inputModelLocaiton) + input_model_location = helpers.get_data_path(os.path.basename(config.model_location)) + data_model_parser = DataModelParser(path_to_data_model=input_model_location) # Parse Model parsed_data_model = data_model_parser.parse_model() @@ -1127,7 +1127,7 @@ def test_upload_manifest_combo( @pytest.mark.parametrize("restrict_rules", [True, False]) @pytest.mark.parametrize("hide_blanks", [True, False]) @pytest.mark.parametrize("file_annotations_upload", [True, False]) - def test_associateMetadataWithFiles( + def test_associate_metadata_with_files( self, helpers: Helpers, restrict_rules: bool, From 1b280b52935a3c7bba268c26af20ea3ad442b5d8 Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 4 Apr 2024 11:34:21 -0400 Subject: [PATCH 40/51] remove unused import --- schematic/store/synapse.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 47e93a759..9cd48bf22 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -26,7 +26,6 @@ # allows specifying explicit variable types from typing import Dict, List, Tuple, Sequence, Union, Optional - from synapseclient import ( Synapse, File, @@ -47,7 +46,6 @@ SynapseUnmetAccessRestrictions, ) import synapseutils -from synapseutils.copy_functions import changeFileMetaData import uuid From 04a3ca828c2843844e08869e624bf8578d148e30 Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 4 Apr 2024 11:35:16 -0400 Subject: [PATCH 41/51] add type hinting --- tests/test_store.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index 0fe3be71b..eb33bdd84 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -82,7 +82,7 @@ def datasetId(synapse_store, projectId, helpers): @pytest.fixture def dmge( - helpers, config: Configuration + helpers: Helpers, config: Configuration ) -> Generator[DataModelGraphExplorer, None, None]: """initiate data model explorer @@ -94,7 +94,9 @@ def dmge( DataModelGraphExplorer """ # associate org FollowUp metadata with files - input_model_location = helpers.get_data_path(os.path.basename(config.model_location)) + input_model_location = helpers.get_data_path( + os.path.basename(config.model_location) + ) data_model_parser = DataModelParser(path_to_data_model=input_model_location) # Parse Model parsed_data_model = data_model_parser.parse_model() From 08da61f1577659f18cc84250130360630be45b13 Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 4 Apr 2024 12:08:13 -0400 Subject: [PATCH 42/51] fix import statement --- tests/test_cli.py | 2 +- tests/test_metadata.py | 2 +- tests/test_store.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 67559830b..7bd43ff3b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -10,7 +10,7 @@ from schematic.manifest.commands import manifest from schematic.models.commands import model from schematic.configuration.configuration import Configuration -from conftest import Helpers +from tests.conftest import Helpers @pytest.fixture diff --git a/tests/test_metadata.py b/tests/test_metadata.py index a3d153e14..fedfeaab8 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -6,7 +6,7 @@ import pytest from schematic.models.metadata import MetadataModel -from conftest import Helpers +from tests.conftest import Helpers logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) diff --git a/tests/test_store.py b/tests/test_store.py index eb33bdd84..d5fd3f779 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -21,7 +21,7 @@ from schematic.schemas.data_model_relationships import DataModelRelationships from schematic.store.base import BaseStorage from schematic.store.synapse import DatasetFileView, ManifestDownload, SynapseStorage -from conftest import Helpers +from tests.conftest import Helpers logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) From 2470869de46171a1dfa9c0698f32f2713cdf9c26 Mon Sep 17 00:00:00 2001 From: linglp Date: Fri, 5 Apr 2024 13:58:29 -0400 Subject: [PATCH 43/51] update test manifest --- tests/data/mock_manifests/bulkrnaseq_test.csv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/data/mock_manifests/bulkrnaseq_test.csv b/tests/data/mock_manifests/bulkrnaseq_test.csv index 72dd17e9b..cbe43d335 100644 --- a/tests/data/mock_manifests/bulkrnaseq_test.csv +++ b/tests/data/mock_manifests/bulkrnaseq_test.csv @@ -1,4 +1,4 @@ -Filename,Sample ID,File Format,Component,Genome Build,Genome FASTA -Test sub folder/sample_file_one.txt,1,BAM,BulkRNA-seqAssay,GRCh37, -Test sub folder/sample_file_three.txt,2,BAM,BulkRNA-seqAssay,GRCh37, -Test sub folder/sample_file_two.txt,3,BAM,BulkRNA-seqAssay,GRCh37, +Filename,Sample ID,File Format,Component,Genome Build,Genome FASTA,Id,entityId +Test sub folder/sample_file_one.txt,1,BAM,BulkRNA-seqAssay,GRCh37,,123456,syn1224 +Test sub folder/sample_file_three.txt,2,BAM,BulkRNA-seqAssay,GRCh37,,123457,syn1225 +Test sub folder/sample_file_two.txt,3,BAM,BulkRNA-seqAssay,GRCh37,,123458,syn1226 From 85e6a67760b9dd66e1c27f3dcc567c05adc29e71 Mon Sep 17 00:00:00 2001 From: linglp Date: Fri, 5 Apr 2024 18:17:47 -0400 Subject: [PATCH 44/51] modify test of test_add_annotations_to_entities_files function --- tests/test_store.py | 76 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 13 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index d5fd3f779..ec5ed6d9a 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -855,11 +855,65 @@ def test_entity_type_checking(self, synapse_store, entity_id, caplog): class TestManifestUpload: """Test manifest upload""" + @pytest.mark.parametrize( + "original_manifest_df, files_in_dataset, expected_entity_ids, expected_filenames", + [ + # there are new files in dataset folders after a manifest gets generated + # but the expected behavior is to add entity ID to existing "filename" column + ( + { + "Filename": {0: "Test sub folder/sample_file_one.txt"}, + "Sample ID": {0: 1}, + "File Format": {0: "BAM"}, + "Component": {0: "BulkRNA-seqAssay"}, + "Genome Build": {0: "GRCh37"}, + "Genome FASTA": {0: ""}, + "entityId": {0: ""}, + "Id": {0: "mock_id_0"}, + }, + [ + ("syn1224", "Test sub folder/sample_file_one.txt"), + ("syn1225", "Test sub folder/sample_file_two.txt"), + ], + ["syn1224"], + ["Test sub folder/sample_file_one.txt"], + ), + # there's no new files in dataset folder after a manifest gets generated + ( + { + "Filename": { + 0: "Test sub folder/sample_file_one.txt", + 1: "Test sub folder/sample_file_two.txt", + }, + "Sample ID": {0: 1, 1: 2}, + "File Format": {0: "BAM", 1: "BAM"}, + "Component": {0: "BulkRNA-seqAssay", 1: "BulkRNA-seqAssay"}, + "Genome Build": {0: "GRCh37", 1: "GRCh37"}, + "Genome FASTA": {0: "", 1: ""}, + "entityId": {0: "syn1224", 1: "syn1225"}, + "Id": {0: "mock_id_0", 1: "mock_id_1"}, + }, + [ + ("syn1224", "Test sub folder/sample_file_one.txt"), + ("syn1225", "Test sub folder/sample_file_two.txt"), + ], + ["syn1224", "syn1225"], + [ + "Test sub folder/sample_file_one.txt", + "Test sub folder/sample_file_two.txt", + ], + ), + ], + ) def test_add_annotations_to_entities_files( self, helpers: Helpers, synapse_store: SynapseStorage, dmge: DataModelGraphExplorer, + original_manifest_df: dict, + files_in_dataset: str, + expected_filenames: list[str], + expected_entity_ids: list[str], ) -> None: """test adding annotations to entities files @@ -867,17 +921,17 @@ def test_add_annotations_to_entities_files( helpers (fixture): a pytest fixture synapse_store (SynapseStorage): mock synapse store dmge (DataModelGraphExplorer): data model grpah explorer object + original_manifest_df (Dataframe): the dataframe of manifest that you want to submit + files_in_dataset (str): mock entityid and file name returned by getFilesInStorageDataset function + expected_filenames (list(str)): expected list of file names + expected_entity_ids (list(str)): expected list of entity ids """ with patch( "schematic.store.synapse.SynapseStorage.getFilesInStorageDataset", - return_value=[ - ("syn1224", "Test sub folder/sample_file_one.txt"), - ("syn1225", "Test sub folder/sample_file_three.txt"), - ("syn1226", "Test sub folder/sample_file_two.txt"), - ], + return_value=files_in_dataset, ): - manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") - manifest_df = helpers.get_data_frame(manifest_path) + manifest_df = pd.DataFrame(original_manifest_df) + new_df = synapse_store.add_annotations_to_entities_files( dmge, manifest_df, @@ -891,12 +945,8 @@ def test_add_annotations_to_entities_files( # test entityId and Id columns get added assert "entityId" in new_df.columns assert "Id" in new_df.columns - assert file_names_lst == [ - "Test sub folder/sample_file_one.txt", - "Test sub folder/sample_file_three.txt", - "Test sub folder/sample_file_two.txt", - ] - assert entity_ids_lst == ["syn1224", "syn1225", "syn1226"] + assert file_names_lst == expected_filenames + assert entity_ids_lst == expected_entity_ids @pytest.mark.parametrize( "mock_manifest_file_path", From 5c8d209cc7077c3a073299720a9ea7b4789788ca Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 9 Apr 2024 12:08:11 -0400 Subject: [PATCH 45/51] remove duplicated test resources and modify test to clean up --- tests/data/mock_manifests/bulkrnaseq_test.csv | 4 --- tests/test_cli.py | 2 +- tests/test_metadata.py | 29 ++++++++++++++++--- tests/test_store.py | 8 ++--- 4 files changed, 30 insertions(+), 13 deletions(-) delete mode 100644 tests/data/mock_manifests/bulkrnaseq_test.csv diff --git a/tests/data/mock_manifests/bulkrnaseq_test.csv b/tests/data/mock_manifests/bulkrnaseq_test.csv deleted file mode 100644 index cbe43d335..000000000 --- a/tests/data/mock_manifests/bulkrnaseq_test.csv +++ /dev/null @@ -1,4 +0,0 @@ -Filename,Sample ID,File Format,Component,Genome Build,Genome FASTA,Id,entityId -Test sub folder/sample_file_one.txt,1,BAM,BulkRNA-seqAssay,GRCh37,,123456,syn1224 -Test sub folder/sample_file_three.txt,2,BAM,BulkRNA-seqAssay,GRCh37,,123457,syn1225 -Test sub folder/sample_file_two.txt,3,BAM,BulkRNA-seqAssay,GRCh37,,123458,syn1226 diff --git a/tests/test_cli.py b/tests/test_cli.py index 7bd43ff3b..1e9e63829 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -159,7 +159,7 @@ def test_submit_file_based_manifest( with_annotations: bool, config: Configuration, ) -> None: - manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") + manifest_path = helpers.get_data_path("mock_manifests/test_BulkRNAseq.csv") config.load_config("config_example.yml") config.synapse_master_fileview_id = "syn1234" diff --git a/tests/test_metadata.py b/tests/test_metadata.py index fedfeaab8..c3ef1e105 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -1,6 +1,8 @@ import logging import os -from typing import Optional +import shutil +from typing import Optional, Generator +from pathlib import Path from unittest.mock import patch import pytest @@ -21,6 +23,26 @@ def metadata_model(helpers, data_model_labels): return metadata_model +@pytest.fixture +def test_bulkrnaseq(helpers: Helpers) -> Generator[Path, None, None]: + """create temporary copy of test_BulkRNAseq.csv + This fixture creates a temporary copy of the original 'test_BulkRNAseq.csv' file + After test, the copied file is removed. + Args: + helpers (Helpers): Helpers fixture + + Yields: + Generator[Path, None, None]: temporary file path of the copied version test_BulkRNAseq.csv + """ + # original bulkrnaseq csv + original_test_path = helpers.get_data_path("mock_manifests/test_BulkRNAseq.csv") + # Copy the original CSV file to a temporary directory + temp_csv_path = helpers.get_data_path("mock_manifests/test_BulkRNAseq2.csv") + shutil.copyfile(original_test_path, temp_csv_path) + yield temp_csv_path + # Teardown + if os.path.exists(temp_csv_path): + os.remove(temp_csv_path) class TestMetadataModel: @pytest.mark.parametrize("as_graph", [True, False], ids=["as_graph", "as_list"]) @@ -108,6 +130,7 @@ def test_populate_manifest(self, helpers, return_excel, data_model_labels): @pytest.mark.parametrize("validate_component", [None, "BulkRNA-seqAssay"]) def test_submit_metadata_manifest( self, + test_bulkrnaseq: Path, helpers: Helpers, file_annotations_upload: bool, restrict_rules: bool, @@ -124,9 +147,7 @@ def test_submit_metadata_manifest( "schematic.store.synapse.SynapseStorage.associateMetadataWithFiles", return_value="mock manifest id", ): - mock_manifest_path = helpers.get_data_path( - "mock_manifests/bulkrnaseq_test.csv" - ) + mock_manifest_path = test_bulkrnaseq data_model_jsonld = helpers.get_data_path("example.model.jsonld") mock_manifest_id = meta_data_model.submit_metadata_manifest( manifest_path=mock_manifest_path, diff --git a/tests/test_store.py b/tests/test_store.py index ec5ed6d9a..67af4db3c 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -1028,7 +1028,7 @@ def test_upload_manifest_as_csv( ) as format_manifest_anno_mock, patch.object(synapse_store.syn, "set_annotations"), ): - manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") + manifest_path = helpers.get_data_path("mock_manifests/test_BulkRNAseq.csv") manifest_df = helpers.get_data_frame(manifest_path) synapse_store.upload_manifest_as_csv( dmge, @@ -1082,7 +1082,7 @@ def test_upload_manifest_as_table( "schematic.store.synapse.SynapseStorage.format_manifest_annotations" ) as format_manifest_anno_mock, ): - manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") + manifest_path = helpers.get_data_path("mock_manifests/test_BulkRNAseq.csv") manifest_df = helpers.get_data_frame(manifest_path) synapse_store.upload_manifest_as_table( dmge, @@ -1123,7 +1123,7 @@ def test_upload_manifest_combo( manifest_record_type: str, ) -> None: mock_df = pd.DataFrame() - manifest_path = helpers.get_data_path("mock_manifests/bulkrnaseq_test.csv") + manifest_path = helpers.get_data_path("mock_manifests/test_BulkRNAseq.csv") manifest_df = helpers.get_data_frame(manifest_path) with ( patch( @@ -1204,7 +1204,7 @@ def test_associate_metadata_with_files( return_value="mock_id_entities", ), ): - manifest_path = "mock_manifests/bulkrnaseq_test.csv" + manifest_path = "mock_manifests/test_BulkRNAseq.csv" manifest_id = synapse_store.associateMetadataWithFiles( dmge=dmge, metadataManifestPath=helpers.get_data_path(manifest_path), From ae300493d8d1ab5c66e545718d72349cb83e1c33 Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 9 Apr 2024 12:36:07 -0400 Subject: [PATCH 46/51] fix spacing --- tests/test_metadata.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index c3ef1e105..7c1659ae9 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -23,13 +23,14 @@ def metadata_model(helpers, data_model_labels): return metadata_model + @pytest.fixture def test_bulkrnaseq(helpers: Helpers) -> Generator[Path, None, None]: """create temporary copy of test_BulkRNAseq.csv - This fixture creates a temporary copy of the original 'test_BulkRNAseq.csv' file + This fixture creates a temporary copy of the original 'test_BulkRNAseq.csv' file After test, the copied file is removed. Args: - helpers (Helpers): Helpers fixture + helpers (Helpers): Helpers fixture Yields: Generator[Path, None, None]: temporary file path of the copied version test_BulkRNAseq.csv @@ -44,6 +45,7 @@ def test_bulkrnaseq(helpers: Helpers) -> Generator[Path, None, None]: if os.path.exists(temp_csv_path): os.remove(temp_csv_path) + class TestMetadataModel: @pytest.mark.parametrize("as_graph", [True, False], ids=["as_graph", "as_list"]) @pytest.mark.parametrize( From e6620a9912f888807d7d577c3d8cd4756e8d78df Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 9 Apr 2024 12:40:00 -0400 Subject: [PATCH 47/51] fix typing --- tests/test_store.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index 67af4db3c..1807e40d9 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -4,7 +4,7 @@ import math import os from time import sleep -from typing import Generator +from typing import Generator, Any from unittest.mock import patch import pandas as pd @@ -907,10 +907,9 @@ class TestManifestUpload: ) def test_add_annotations_to_entities_files( self, - helpers: Helpers, synapse_store: SynapseStorage, dmge: DataModelGraphExplorer, - original_manifest_df: dict, + original_manifest: dict[str, Any], files_in_dataset: str, expected_filenames: list[str], expected_entity_ids: list[str], @@ -930,7 +929,7 @@ def test_add_annotations_to_entities_files( "schematic.store.synapse.SynapseStorage.getFilesInStorageDataset", return_value=files_in_dataset, ): - manifest_df = pd.DataFrame(original_manifest_df) + manifest_df = pd.DataFrame(original_manifest) new_df = synapse_store.add_annotations_to_entities_files( dmge, From 9d1623271d7672c13a11a6b529433d0eff280291 Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 9 Apr 2024 13:31:47 -0400 Subject: [PATCH 48/51] update typing, change variable name to avoid confusion --- tests/test_store.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index 1807e40d9..70f3dddbb 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -856,7 +856,7 @@ class TestManifestUpload: """Test manifest upload""" @pytest.mark.parametrize( - "original_manifest_df, files_in_dataset, expected_entity_ids, expected_filenames", + "original_manifest, files_in_dataset, expected_entity_ids, expected_filenames", [ # there are new files in dataset folders after a manifest gets generated # but the expected behavior is to add entity ID to existing "filename" column @@ -920,7 +920,7 @@ def test_add_annotations_to_entities_files( helpers (fixture): a pytest fixture synapse_store (SynapseStorage): mock synapse store dmge (DataModelGraphExplorer): data model grpah explorer object - original_manifest_df (Dataframe): the dataframe of manifest that you want to submit + original_manifest (Dictionary): the dataframe of manifest that you want to submit files_in_dataset (str): mock entityid and file name returned by getFilesInStorageDataset function expected_filenames (list(str)): expected list of file names expected_entity_ids (list(str)): expected list of entity ids From 1ef012262d7bc88b75aa2939756b573d34ec3652 Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 9 Apr 2024 13:59:20 -0400 Subject: [PATCH 49/51] update typing; remove Optional --- schematic/models/metadata.py | 2 +- schematic/store/synapse.py | 8 ++++---- schematic_api/api/routes.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/schematic/models/metadata.py b/schematic/models/metadata.py index e651cd76d..759cdce75 100644 --- a/schematic/models/metadata.py +++ b/schematic/models/metadata.py @@ -323,7 +323,7 @@ def submit_metadata_manifest( restrict_rules: bool, access_token: Optional[str] = None, validate_component: Optional[str] = None, - file_annotations_upload: Optional[bool] = True, + file_annotations_upload: bool = True, hide_blanks: bool = False, project_scope: List = None, table_manipulation: str = "replace", diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 9cd48bf22..e1250b982 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1704,7 +1704,7 @@ def upload_manifest_as_table( table_manipulation: str, table_column_names: str, annotation_keys: str, - file_annotations_upload: Optional[bool] = True, + file_annotations_upload: bool = True, ): """Upload manifest to Synapse as a table and csv. Args: @@ -1794,7 +1794,7 @@ def upload_manifest_as_csv( hideBlanks, component_name, annotation_keys: str, - file_annotations_upload: Optional[bool] = True, + file_annotations_upload: bool = True, ): """Upload manifest to Synapse as a csv only. Args: @@ -1855,7 +1855,7 @@ def upload_manifest_combo( table_manipulation, table_column_names: str, annotation_keys: str, - file_annotations_upload: Optional[bool] = True, + file_annotations_upload: bool = True, ): """Upload manifest to Synapse as a table and CSV with entities. Args: @@ -1941,7 +1941,7 @@ def associateMetadataWithFiles( table_manipulation: str = "replace", table_column_names: str = "class_label", annotation_keys: str = "class_label", - file_annotations_upload: Optional[bool] = True, + file_annotations_upload: bool = True, ) -> str: """Associate metadata with files in a storage dataset already on Synapse. Upload metadataManifest in the storage dataset folder on Synapse as well. Return synapseId of the uploaded manifest file. diff --git a/schematic_api/api/routes.py b/schematic_api/api/routes.py index db120f851..5389948ce 100644 --- a/schematic_api/api/routes.py +++ b/schematic_api/api/routes.py @@ -393,7 +393,7 @@ def submit_manifest_route( project_scope=None, table_column_names=None, annotation_keys=None, - file_annotations_upload:Optional[bool]=True, + file_annotations_upload:bool=True, ): # call config_handler() config_handler(asset_view=asset_view) From e3346c08ef072e98c848569f48524d53304ce2cf Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 10 Apr 2024 15:52:45 -0400 Subject: [PATCH 50/51] update flag to be -no-fa --- schematic/models/commands.py | 2 +- tests/test_cli.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/schematic/models/commands.py b/schematic/models/commands.py index ed0709351..ab933a800 100644 --- a/schematic/models/commands.py +++ b/schematic/models/commands.py @@ -98,7 +98,7 @@ def model(ctx, config): # use as `schematic model ...` ) @click.option( "--file_annotations_upload/--no-file_annotations_upload", - "-fa/--no-fa", + "-fa/-no-fa", default=True, is_flag=True, help=query_dict(model_commands, ("model", "submit", "file_annotations_upload")), diff --git a/tests/test_cli.py b/tests/test_cli.py index 1e9e63829..5384b3bf3 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -166,7 +166,7 @@ def test_submit_file_based_manifest( if with_annotations: annotation_opt = "-fa" else: - annotation_opt = "--no-fa" + annotation_opt = "-no-fa" with patch("schematic.models.metadata.MetadataModel.submit_metadata_manifest"): result = runner.invoke( From cd9d0da516c9d382b364ccbdc5375f592d01c460 Mon Sep 17 00:00:00 2001 From: linglp Date: Wed, 10 Apr 2024 15:53:56 -0400 Subject: [PATCH 51/51] remove unused import --- tests/test_store.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_store.py b/tests/test_store.py index 001b967db..638cf05cf 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -17,7 +17,6 @@ from pandas.testing import assert_frame_equal from schematic.configuration.configuration import Configuration -from schematic.models.metadata import MetadataModel from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer from schematic.schemas.data_model_parser import DataModelParser from tests.conftest import Helpers