diff --git a/schematic/models/commands.py b/schematic/models/commands.py index 81a4a3286..ab933a800 100644 --- a/schematic/models/commands.py +++ b/schematic/models/commands.py @@ -96,6 +96,13 @@ def model(ctx, config): # use as `schematic model ...` is_flag=True, help=query_dict(model_commands, ("model", "validate", "restrict_rules")), ) +@click.option( + "--file_annotations_upload/--no-file_annotations_upload", + "-fa/-no-fa", + default=True, + is_flag=True, + help=query_dict(model_commands, ("model", "submit", "file_annotations_upload")), +) @click.option( "-ps", "--project_scope", @@ -147,6 +154,7 @@ def submit_manifest( data_model_labels, table_column_names, annotation_keys, + file_annotations_upload: bool, ): """ Running CLI with manifest validation (optional) and submission options. @@ -173,6 +181,7 @@ def submit_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload, ) if manifest_id: diff --git a/schematic/models/metadata.py b/schematic/models/metadata.py index 963239d8e..b8c0e325a 100644 --- a/schematic/models/metadata.py +++ b/schematic/models/metadata.py @@ -324,6 +324,7 @@ def submit_metadata_manifest( restrict_rules: bool, access_token: Optional[str] = None, validate_component: Optional[str] = None, + file_annotations_upload: bool = True, hide_blanks: bool = False, project_scope: List = None, table_manipulation: str = "replace", @@ -336,6 +337,7 @@ def submit_metadata_manifest( manifest_path: Path to the manifest file, which contains the metadata. dataset_id: Synapse ID of the dataset on Synapse containing the metadata manifest file. validate_component: Component from the schema.org schema based on which the manifest template has been generated. + file_annotations_upload (bool): Default to True. If false, do not add annotations to files. Returns: Manifest ID: If both validation and association were successful. Exceptions: @@ -389,6 +391,7 @@ def submit_metadata_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload, ) restrict_maniest = True @@ -402,6 +405,7 @@ def submit_metadata_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload, ) logger.info(f"No validation errors occured during validation.") @@ -424,6 +428,7 @@ def submit_metadata_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload, ) restrict_maniest = True @@ -437,6 +442,7 @@ def submit_metadata_manifest( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload, ) logger.debug( diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index c05af157f..a15137ae8 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -25,7 +25,6 @@ # allows specifying explicit variable types from typing import Dict, List, Tuple, Sequence, Union, Optional - from synapseclient import ( Synapse, File, @@ -46,7 +45,6 @@ SynapseHTTPError, ) import synapseutils -from synapseutils.copy_functions import changeFileMetaData from schematic_db.rdb.synapse_database import SynapseDatabase @@ -1327,11 +1325,11 @@ def upload_manifest_file( parent=datasetId, name=file_name_new, ) - manifest_synapse_file_id = self.syn.store( manifestSynapseFile, isRestricted=restrict_manifest ).id - changeFileMetaData( + + synapseutils.copy_functions.changeFileMetaData( syn=self.syn, entity=manifest_synapse_file_id, downloadAs=file_name_new ) @@ -1677,11 +1675,11 @@ def add_annotations_to_entities_files( name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain display label formatting while ensuring the label is formatted properly for Synapse annotations. Returns: - manifest (pd.DataFrame): modified to add entitiyId as appropriate. + manifest (pd.DataFrame): modified to add entitiyId as appropriate """ - # Expected behavior is to annotate files if `Filename` is present regardless of `-mrt` setting + # Expected behavior is to annotate files if `Filename` is present and if file_annotations_upload is set to True regardless of `-mrt` setting if "filename" in [col.lower() for col in manifest.columns]: # get current list of files and store as dataframe dataset_files = self.getFilesInStorageDataset(datasetId) @@ -1733,6 +1731,7 @@ def upload_manifest_as_table( table_manipulation: str, table_column_names: str, annotation_keys: str, + file_annotations_upload: bool = True, ): """Upload manifest to Synapse as a table and csv. Args: @@ -1752,6 +1751,7 @@ def upload_manifest_as_table( annotation_keys: (str) display_label/class_label (default), Sets labeling syle for annotation keys. class_label will format the display name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain display label formatting while ensuring the label is formatted properly for Synapse annotations. + file_annotations_upload (bool): Default to True. If false, do not add annotations to files. Return: manifest_synapse_file_id: SynID of manifest csv uploaded to synapse. """ @@ -1766,15 +1766,16 @@ def upload_manifest_as_table( table_column_names=table_column_names, ) - manifest = self.add_annotations_to_entities_files( - dmge, - manifest, - manifest_record_type, - datasetId, - hideBlanks, - manifest_synapse_table_id, - annotation_keys, - ) + if file_annotations_upload: + manifest = self.add_annotations_to_entities_files( + dmge, + manifest, + manifest_record_type, + datasetId, + hideBlanks, + manifest_synapse_table_id, + annotation_keys, + ) # Load manifest to synapse as a CSV File manifest_synapse_file_id = self.upload_manifest_file( manifest, @@ -1820,6 +1821,7 @@ def upload_manifest_as_csv( hideBlanks, component_name, annotation_keys: str, + file_annotations_upload: bool = True, ): """Upload manifest to Synapse as a csv only. Args: @@ -1833,17 +1835,19 @@ def upload_manifest_as_csv( annotation_keys: (str) display_label/class_label (default), Sets labeling syle for annotation keys. class_label will format the display name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain display label formatting while ensuring the label is formatted properly for Synapse annotations. + file_annotations_upload (bool): Default to True. If false, do not add annotations to files. Return: manifest_synapse_file_id (str): SynID of manifest csv uploaded to synapse. """ - manifest = self.add_annotations_to_entities_files( - dmge, - manifest, - manifest_record_type, - datasetId, - hideBlanks, - annotation_keys=annotation_keys, - ) + if file_annotations_upload: + manifest = self.add_annotations_to_entities_files( + dmge, + manifest, + manifest_record_type, + datasetId, + hideBlanks, + annotation_keys=annotation_keys, + ) # Load manifest to synapse as a CSV File manifest_synapse_file_id = self.upload_manifest_file( @@ -1878,6 +1882,7 @@ def upload_manifest_combo( table_manipulation, table_column_names: str, annotation_keys: str, + file_annotations_upload: bool = True, ): """Upload manifest to Synapse as a table and CSV with entities. Args: @@ -1897,6 +1902,7 @@ def upload_manifest_combo( annotation_keys: (str) display_label/class_label (default), Sets labeling syle for annotation keys. class_label will format the display name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain display label formatting while ensuring the label is formatted properly for Synapse annotations. + file_annotations_upload (bool): Default to True. If false, do not add annotations to files. Return: manifest_synapse_file_id (str): SynID of manifest csv uploaded to synapse. """ @@ -1910,15 +1916,16 @@ def upload_manifest_combo( table_column_names=table_column_names, ) - manifest = self.add_annotations_to_entities_files( - dmge, - manifest, - manifest_record_type, - datasetId, - hideBlanks, - manifest_synapse_table_id, - annotation_keys=annotation_keys, - ) + if file_annotations_upload: + manifest = self.add_annotations_to_entities_files( + dmge, + manifest, + manifest_record_type, + datasetId, + hideBlanks, + manifest_synapse_table_id, + annotation_keys=annotation_keys, + ) # Load manifest to synapse as a CSV File manifest_synapse_file_id = self.upload_manifest_file( @@ -1961,6 +1968,7 @@ def associateMetadataWithFiles( table_manipulation: str = "replace", table_column_names: str = "class_label", annotation_keys: str = "class_label", + file_annotations_upload: bool = True, ) -> str: """Associate metadata with files in a storage dataset already on Synapse. Upload metadataManifest in the storage dataset folder on Synapse as well. Return synapseId of the uploaded manifest file. @@ -2000,7 +2008,6 @@ def associateMetadataWithFiles( table_name, component_name = self._generate_table_name(manifest) # Upload manifest to synapse based on user input (manifest_record_type) - if manifest_record_type == "file_only": manifest_synapse_file_id = self.upload_manifest_as_csv( dmge, @@ -2012,6 +2019,7 @@ def associateMetadataWithFiles( manifest_record_type=manifest_record_type, component_name=component_name, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload, ) elif manifest_record_type == "table_and_file": manifest_synapse_file_id = self.upload_manifest_as_table( @@ -2027,6 +2035,7 @@ def associateMetadataWithFiles( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload, ) elif manifest_record_type == "file_and_entities": manifest_synapse_file_id = self.upload_manifest_as_csv( @@ -2039,6 +2048,7 @@ def associateMetadataWithFiles( manifest_record_type=manifest_record_type, component_name=component_name, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload, ) elif manifest_record_type == "table_file_and_entities": manifest_synapse_file_id = self.upload_manifest_combo( @@ -2054,6 +2064,7 @@ def associateMetadataWithFiles( table_manipulation=table_manipulation, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload, ) else: raise ValueError("Please enter a valid manifest_record_type.") diff --git a/schematic_api/api/openapi/api.yaml b/schematic_api/api/openapi/api.yaml index 50c0e3dc0..15a3540d1 100644 --- a/schematic_api/api/openapi/api.yaml +++ b/schematic_api/api/openapi/api.yaml @@ -442,6 +442,13 @@ paths: enum: ["display_label", "class_label"] default: "class_label" required: false + - in: query + name: file_annotations_upload + schema: + type: boolean + default: true + description: if false, do not add annotations when submitting file-based manifests. + required: false - in: query name: project_scope schema: diff --git a/schematic_api/api/routes.py b/schematic_api/api/routes.py index c20a94a88..5389948ce 100644 --- a/schematic_api/api/routes.py +++ b/schematic_api/api/routes.py @@ -19,6 +19,7 @@ import pandas as pd import json +from typing import Optional from schematic.configuration.configuration import CONFIG from schematic.visualization.attributes_explorer import AttributesExplorer @@ -392,6 +393,7 @@ def submit_manifest_route( project_scope=None, table_column_names=None, annotation_keys=None, + file_annotations_upload:bool=True, ): # call config_handler() config_handler(asset_view=asset_view) @@ -449,6 +451,7 @@ def submit_manifest_route( project_scope=project_scope, table_column_names=table_column_names, annotation_keys=annotation_keys, + file_annotations_upload=file_annotations_upload ) return manifest_id diff --git a/tests/test_cli.py b/tests/test_cli.py index 4631e9a9c..5384b3bf3 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,13 +1,16 @@ import os import pytest +from unittest.mock import patch from click.testing import CliRunner # from schematic import init from schematic.schemas.commands import schema from schematic.manifest.commands import manifest +from schematic.models.commands import model from schematic.configuration.configuration import Configuration +from tests.conftest import Helpers @pytest.fixture @@ -49,10 +52,18 @@ def test_schema_convert_cli(self, runner, helpers): output_path = helpers.get_data_path("example.model.jsonld") - label_type = 'class_label' + label_type = "class_label" result = runner.invoke( - schema, ["convert", data_model_csv_path, "--output_jsonld", output_path, "--data_model_labels", label_type] + schema, + [ + "convert", + data_model_csv_path, + "--output_jsonld", + output_path, + "--data_model_labels", + label_type, + ], ) assert result.exit_code == 0 @@ -139,3 +150,41 @@ def test_get_example_manifest_excel( assert result.exit_code == 0 self.assert_expected_file(result, output_path) + + @pytest.mark.parametrize("with_annotations", [True, False]) + def test_submit_file_based_manifest( + self, + runner: CliRunner, + helpers: Helpers, + with_annotations: bool, + config: Configuration, + ) -> None: + manifest_path = helpers.get_data_path("mock_manifests/test_BulkRNAseq.csv") + config.load_config("config_example.yml") + config.synapse_master_fileview_id = "syn1234" + + if with_annotations: + annotation_opt = "-fa" + else: + annotation_opt = "-no-fa" + + with patch("schematic.models.metadata.MetadataModel.submit_metadata_manifest"): + result = runner.invoke( + model, + [ + "-c", + config.config_path, + "submit", + "-mrt", + "file_only", + "-d", + "syn12345", + "-vc", + "BulkRNA-seqAssay", + "-mp", + manifest_path, + annotation_opt, + ], + ) + + assert result.exit_code == 0 diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 9bc5e5ae5..7c1659ae9 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -1,9 +1,14 @@ -import os import logging +import os +import shutil +from typing import Optional, Generator +from pathlib import Path +from unittest.mock import patch import pytest from schematic.models.metadata import MetadataModel +from tests.conftest import Helpers logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) @@ -19,6 +24,28 @@ def metadata_model(helpers, data_model_labels): return metadata_model +@pytest.fixture +def test_bulkrnaseq(helpers: Helpers) -> Generator[Path, None, None]: + """create temporary copy of test_BulkRNAseq.csv + This fixture creates a temporary copy of the original 'test_BulkRNAseq.csv' file + After test, the copied file is removed. + Args: + helpers (Helpers): Helpers fixture + + Yields: + Generator[Path, None, None]: temporary file path of the copied version test_BulkRNAseq.csv + """ + # original bulkrnaseq csv + original_test_path = helpers.get_data_path("mock_manifests/test_BulkRNAseq.csv") + # Copy the original CSV file to a temporary directory + temp_csv_path = helpers.get_data_path("mock_manifests/test_BulkRNAseq2.csv") + shutil.copyfile(original_test_path, temp_csv_path) + yield temp_csv_path + # Teardown + if os.path.exists(temp_csv_path): + os.remove(temp_csv_path) + + class TestMetadataModel: @pytest.mark.parametrize("as_graph", [True, False], ids=["as_graph", "as_list"]) @pytest.mark.parametrize( @@ -93,3 +120,45 @@ def test_populate_manifest(self, helpers, return_excel, data_model_labels): os.remove(output_path) except: pass + + @pytest.mark.parametrize("file_annotations_upload", [True, False]) + @pytest.mark.parametrize("restrict_rules", [True, False]) + @pytest.mark.parametrize("hide_blanks", [True, False]) + @pytest.mark.parametrize( + "data_model_labels", + ["display_label", "class_label"], + ids=["data_model_labels-display_label", "data_model_labels-class_label"], + ) + @pytest.mark.parametrize("validate_component", [None, "BulkRNA-seqAssay"]) + def test_submit_metadata_manifest( + self, + test_bulkrnaseq: Path, + helpers: Helpers, + file_annotations_upload: bool, + restrict_rules: bool, + data_model_labels: str, + hide_blanks: bool, + validate_component: Optional[str], + ) -> None: + meta_data_model = metadata_model(helpers, data_model_labels) + with patch( + "schematic.models.metadata.MetadataModel.validateModelManifest", + return_value=([], []), + ): + with patch( + "schematic.store.synapse.SynapseStorage.associateMetadataWithFiles", + return_value="mock manifest id", + ): + mock_manifest_path = test_bulkrnaseq + data_model_jsonld = helpers.get_data_path("example.model.jsonld") + mock_manifest_id = meta_data_model.submit_metadata_manifest( + manifest_path=mock_manifest_path, + path_to_json_ld=data_model_jsonld, + validate_component=validate_component, + dataset_id="mock dataset id", + manifest_record_type="file_only", + restrict_rules=restrict_rules, + file_annotations_upload=file_annotations_upload, + hide_blanks=hide_blanks, + ) + assert mock_manifest_id == "mock manifest id" diff --git a/tests/test_store.py b/tests/test_store.py index 60e1eeb54..638cf05cf 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -6,17 +6,20 @@ import math import os from time import sleep +from typing import Generator, Any from unittest.mock import patch import shutil import pandas as pd import pytest -from pandas.testing import assert_frame_equal from synapseclient import EntityViewSchema, Folder from synapseclient.entity import File +from pandas.testing import assert_frame_equal -from schematic.schemas.data_model_parser import DataModelParser +from schematic.configuration.configuration import Configuration from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer +from schematic.schemas.data_model_parser import DataModelParser +from tests.conftest import Helpers from schematic.store.base import BaseStorage from schematic.store.synapse import ( @@ -83,6 +86,38 @@ def datasetId(synapse_store, projectId, helpers): yield datasetId +@pytest.fixture +def dmge( + helpers: Helpers, config: Configuration +) -> Generator[DataModelGraphExplorer, None, None]: + """initiate data model explorer + + Args: + helpers (pytest fixture): fixture + config (Configuration): configuration class + + Yields: + DataModelGraphExplorer + """ + # associate org FollowUp metadata with files + input_model_location = helpers.get_data_path( + os.path.basename(config.model_location) + ) + data_model_parser = DataModelParser(path_to_data_model=input_model_location) + # Parse Model + parsed_data_model = data_model_parser.parse_model() + + # Instantiate DataModelGraph + data_model_grapher = DataModelGraph(parsed_data_model) + + # Generate graph + graph_data_model = data_model_grapher.graph + + # Instantiate DataModelGraphExplorer + dmge = DataModelGraphExplorer(graph_data_model) + yield dmge + + def raise_final_error(retry_state): return retry_state.outcome.result() @@ -205,24 +240,8 @@ def test_annotation_submission( datasetId, manifest_record_type, config: Configuration, + dmge: DataModelGraphExplorer, ): - # Upload dataset annotations - - # Instantiate DataModelParser - data_model_parser = DataModelParser(path_to_data_model=config.model_location) - - # Parse Model - parsed_data_model = data_model_parser.parse_model() - - # Instantiate DataModelGraph - data_model_grapher = DataModelGraph(parsed_data_model) - - # Generate graph - graph_data_model = data_model_grapher.graph - - # Instantiate DataModelGraphExplorer - dmge = DataModelGraphExplorer(graph_data_model) - manifest_id = synapse_store.associateMetadataWithFiles( dmge=dmge, metadataManifestPath=helpers.get_data_path(manifest_path), @@ -550,6 +569,7 @@ def test_createTable( datasetId, table_column_names, annotation_keys, + dmge: DataModelGraphExplorer, ): table_manipulation = None @@ -569,25 +589,6 @@ def test_createTable( # associate metadata with files manifest_path = "mock_manifests/table_manifest.csv" - inputModelLocaiton = helpers.get_data_path( - os.path.basename(config.model_location) - ) - - # Instantiate DataModelParser - data_model_parser = DataModelParser(path_to_data_model=inputModelLocaiton) - - # Parse Model - parsed_data_model = data_model_parser.parse_model() - - # Instantiate DataModelGraph - data_model_grapher = DataModelGraph(parsed_data_model) - - # Generate graph - graph_data_model = data_model_grapher.graph - - # Instantiate DataModelGraphExplorer - dmge = DataModelGraphExplorer(graph_data_model) - # updating file view on synapse takes a long time manifestId = synapse_store.associateMetadataWithFiles( dmge=dmge, @@ -607,7 +608,6 @@ def test_createTable( # assert table exists assert table_name in existing_tables.keys() - @pytest.mark.parametrize( "table_column_names", ["display_label", "class_label"], @@ -627,6 +627,7 @@ def test_replaceTable( datasetId, table_column_names, annotation_keys, + dmge: DataModelGraphExplorer, ): table_manipulation = "replace" @@ -647,25 +648,6 @@ def test_replaceTable( not in synapse_store.get_table_info(projectId=projectId).keys() ) - # associate org FollowUp metadata with files - inputModelLocaiton = helpers.get_data_path( - os.path.basename(config.model_location) - ) - # sg = SchemaGenerator(inputModelLocaiton) - - data_model_parser = DataModelParser(path_to_data_model=inputModelLocaiton) - # Parse Model - parsed_data_model = data_model_parser.parse_model() - - # Instantiate DataModelGraph - data_model_grapher = DataModelGraph(parsed_data_model) - - # Generate graph - graph_data_model = data_model_grapher.graph - - # Instantiate DataModelGraphExplorer - dmge = DataModelGraphExplorer(graph_data_model) - # updating file view on synapse takes a long time manifestId = synapse_store.associateMetadataWithFiles( dmge=dmge, @@ -718,7 +700,6 @@ def test_replaceTable( # delete table synapse_store.syn.delete(tableId) - @pytest.mark.parametrize( "annotation_keys", ["display_label", "class_label"], @@ -732,6 +713,7 @@ def test_upsertTable( projectId, datasetId, annotation_keys, + dmge: DataModelGraphExplorer, ): table_manipulation = "upsert" @@ -752,24 +734,6 @@ def test_upsertTable( not in synapse_store.get_table_info(projectId=projectId).keys() ) - # associate org FollowUp metadata with files - inputModelLocaiton = helpers.get_data_path( - os.path.basename(config.model_location) - ) - - data_model_parser = DataModelParser(path_to_data_model=inputModelLocaiton) - # Parse Model - parsed_data_model = data_model_parser.parse_model() - - # Instantiate DataModelGraph - data_model_grapher = DataModelGraph(parsed_data_model) - - # Generate graph - graph_data_model = data_model_grapher.graph - - # Instantiate DataModelGraphExplorer - dmge = DataModelGraphExplorer(graph_data_model) - # updating file view on synapse takes a long time manifestId = synapse_store.associateMetadataWithFiles( dmge=dmge, @@ -915,3 +879,367 @@ def test_entity_type_checking(self, synapse_store, entity_id, caplog): "You are using entity type: folder. Please provide a file ID" in record.message ) + + +class TestManifestUpload: + """Test manifest upload""" + + @pytest.mark.parametrize( + "original_manifest, files_in_dataset, expected_entity_ids, expected_filenames", + [ + # there are new files in dataset folders after a manifest gets generated + # but the expected behavior is to add entity ID to existing "filename" column + ( + { + "Filename": {0: "Test sub folder/sample_file_one.txt"}, + "Sample ID": {0: 1}, + "File Format": {0: "BAM"}, + "Component": {0: "BulkRNA-seqAssay"}, + "Genome Build": {0: "GRCh37"}, + "Genome FASTA": {0: ""}, + "entityId": {0: ""}, + "Id": {0: "mock_id_0"}, + }, + [ + ("syn1224", "Test sub folder/sample_file_one.txt"), + ("syn1225", "Test sub folder/sample_file_two.txt"), + ], + ["syn1224"], + ["Test sub folder/sample_file_one.txt"], + ), + # there's no new files in dataset folder after a manifest gets generated + ( + { + "Filename": { + 0: "Test sub folder/sample_file_one.txt", + 1: "Test sub folder/sample_file_two.txt", + }, + "Sample ID": {0: 1, 1: 2}, + "File Format": {0: "BAM", 1: "BAM"}, + "Component": {0: "BulkRNA-seqAssay", 1: "BulkRNA-seqAssay"}, + "Genome Build": {0: "GRCh37", 1: "GRCh37"}, + "Genome FASTA": {0: "", 1: ""}, + "entityId": {0: "syn1224", 1: "syn1225"}, + "Id": {0: "mock_id_0", 1: "mock_id_1"}, + }, + [ + ("syn1224", "Test sub folder/sample_file_one.txt"), + ("syn1225", "Test sub folder/sample_file_two.txt"), + ], + ["syn1224", "syn1225"], + [ + "Test sub folder/sample_file_one.txt", + "Test sub folder/sample_file_two.txt", + ], + ), + ], + ) + def test_add_annotations_to_entities_files( + self, + synapse_store: SynapseStorage, + dmge: DataModelGraphExplorer, + original_manifest: dict[str, Any], + files_in_dataset: str, + expected_filenames: list[str], + expected_entity_ids: list[str], + ) -> None: + """test adding annotations to entities files + + Args: + helpers (fixture): a pytest fixture + synapse_store (SynapseStorage): mock synapse store + dmge (DataModelGraphExplorer): data model grpah explorer object + original_manifest (Dictionary): the dataframe of manifest that you want to submit + files_in_dataset (str): mock entityid and file name returned by getFilesInStorageDataset function + expected_filenames (list(str)): expected list of file names + expected_entity_ids (list(str)): expected list of entity ids + """ + with patch( + "schematic.store.synapse.SynapseStorage.getFilesInStorageDataset", + return_value=files_in_dataset, + ): + manifest_df = pd.DataFrame(original_manifest) + + new_df = synapse_store.add_annotations_to_entities_files( + dmge, + manifest_df, + manifest_record_type="entity", + datasetId="mock id", + hideBlanks=True, + ) + file_names_lst = new_df["Filename"].tolist() + entity_ids_lst = new_df["entityId"].tolist() + + # test entityId and Id columns get added + assert "entityId" in new_df.columns + assert "Id" in new_df.columns + assert file_names_lst == expected_filenames + assert entity_ids_lst == expected_entity_ids + + @pytest.mark.parametrize( + "mock_manifest_file_path", + [ + "mock_manifests/test_mock_manifest.csv", + "mock_manifests/test_mock_manifest_censored.csv", + ], + ) + def test_upload_manifest_file( + self, + helpers: Helpers, + synapse_store: SynapseStorage, + mock_manifest_file_path: str, + ) -> None: + """test upload manifest file function + + Args: + helpers (fixture): a pytest fixture + synapse_store (SynapseStorage): mock synapse store + dmge (DataModelGraphExplorer): data model grpah explorer object + """ + test_df = pd.DataFrame( + { + "Filename": { + 0: "Test sub folder/sample_file_one.txt", + 1: "Test sub folder/sample_file_three.txt", + 2: "Test sub folder/sample_file_two.txt", + }, + "Sample ID": {0: 1, 1: 2, 2: 3}, + "File Format": {0: "BAM", 1: "BAM", 2: "BAM"}, + "Component": { + 0: "BulkRNA-seqAssay", + 1: "BulkRNA-seqAssay", + 2: "BulkRNA-seqAssay", + }, + "Genome Build": {0: "GRCh37", 1: "GRCh37", 2: "GRCh37"}, + "Genome FASTA": {0: "", 1: "", 2: ""}, + "Id": {0: "mock1", 1: "mock2", 2: "mock3"}, + "entityId": {0: "syn1224", 1: "syn1225", 2: "syn1226"}, + } + ) + with patch("synapseclient.Synapse.store") as syn_store_mock, patch( + "synapseutils.copy_functions.changeFileMetaData" + ): + syn_store_mock.return_value.id = "mock manifest id" + mock_file_path = helpers.get_data_path(mock_manifest_file_path) + mock_manifest_synapse_file_id = synapse_store.upload_manifest_file( + manifest=test_df, + metadataManifestPath=mock_file_path, + datasetId="mock dataset id", + restrict_manifest=True, + ) + assert mock_manifest_synapse_file_id == "mock manifest id" + + @pytest.mark.parametrize("file_annotations_upload", [True, False]) + @pytest.mark.parametrize("hide_blanks", [True, False]) + @pytest.mark.parametrize("restrict", [True, False]) + @pytest.mark.parametrize("manifest_record_type", ["entity", "table", "both"]) + def test_upload_manifest_as_csv( + self, + helpers: Helpers, + dmge: DataModelGraphExplorer, + synapse_store: SynapseStorage, + file_annotations_upload: bool, + manifest_record_type: str, + hide_blanks: bool, + restrict: bool, + ) -> None: + with ( + patch( + "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files" + ) as add_anno_mock, + patch( + "schematic.store.synapse.SynapseStorage.upload_manifest_file", + return_value="mock manifest id", + ) as upload_manifest_mock, + patch( + "schematic.store.synapse.SynapseStorage.format_manifest_annotations" + ) as format_manifest_anno_mock, + patch.object(synapse_store.syn, "set_annotations"), + ): + manifest_path = helpers.get_data_path("mock_manifests/test_BulkRNAseq.csv") + manifest_df = helpers.get_data_frame(manifest_path) + synapse_store.upload_manifest_as_csv( + dmge, + manifest=manifest_df, + metadataManifestPath=manifest_path, + datasetId="mock synapse id", + restrict=restrict, + manifest_record_type=manifest_record_type, + file_annotations_upload=file_annotations_upload, + hideBlanks=hide_blanks, + component_name="BulkRNA-seqAssay", + annotation_keys="class_label", + ) + if file_annotations_upload: + add_anno_mock.assert_called_once() + else: + add_anno_mock.assert_not_called() + + upload_manifest_mock.assert_called_once() + format_manifest_anno_mock.assert_called_once() + + @pytest.mark.parametrize("file_annotations_upload", [True, False]) + @pytest.mark.parametrize("hide_blanks", [True, False]) + @pytest.mark.parametrize("restrict", [True, False]) + @pytest.mark.parametrize("manifest_record_type", ["entity", "table", "both"]) + def test_upload_manifest_as_table( + self, + helpers: Helpers, + synapse_store: SynapseStorage, + dmge: DataModelGraphExplorer, + file_annotations_upload: bool, + hide_blanks: bool, + restrict: bool, + manifest_record_type: str, + ) -> None: + mock_df = pd.DataFrame() + with ( + patch( + "schematic.store.synapse.SynapseStorage.uploadDB", + return_value=["mock_table_id", mock_df, "mock_table_manifest"], + ) as update_db_mock, + patch( + "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files" + ) as add_anno_mock, + patch( + "schematic.store.synapse.SynapseStorage.upload_manifest_file", + return_value="mock manifest id", + ), + patch.object(synapse_store.syn, "set_annotations") as set_anno_mock, + patch( + "schematic.store.synapse.SynapseStorage.format_manifest_annotations" + ) as format_manifest_anno_mock, + ): + manifest_path = helpers.get_data_path("mock_manifests/test_BulkRNAseq.csv") + manifest_df = helpers.get_data_frame(manifest_path) + synapse_store.upload_manifest_as_table( + dmge, + manifest=manifest_df, + metadataManifestPath=manifest_path, + datasetId="mock synapse id", + table_name="new table name", + component_name="BulkRNA-seqAssay", + restrict=restrict, + manifest_record_type=manifest_record_type, + hideBlanks=hide_blanks, + table_manipulation="replace", + table_column_names="class_label", + annotation_keys="class_label", + file_annotations_upload=file_annotations_upload, + ) + if file_annotations_upload: + add_anno_mock.assert_called_once() + else: + add_anno_mock.assert_not_called() + # need to set annotations for both table and files + assert format_manifest_anno_mock.call_count == 2 + assert set_anno_mock.call_count == 2 + assert update_db_mock.call_count == 2 + + @pytest.mark.parametrize("file_annotations_upload", [True, False]) + @pytest.mark.parametrize("hide_blanks", [True, False]) + @pytest.mark.parametrize("restrict", [True, False]) + @pytest.mark.parametrize("manifest_record_type", ["entity", "table", "both"]) + def test_upload_manifest_combo( + self, + helpers: Helpers, + synapse_store: SynapseStorage, + dmge: DataModelGraphExplorer, + file_annotations_upload: bool, + hide_blanks: bool, + restrict: bool, + manifest_record_type: str, + ) -> None: + mock_df = pd.DataFrame() + manifest_path = helpers.get_data_path("mock_manifests/test_BulkRNAseq.csv") + manifest_df = helpers.get_data_frame(manifest_path) + with ( + patch( + "schematic.store.synapse.SynapseStorage.uploadDB", + return_value=["mock_table_id", mock_df, "mock_table_manifest"], + ) as update_db_mock, + patch( + "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files" + ) as add_anno_mock, + patch( + "schematic.store.synapse.SynapseStorage.upload_manifest_file", + return_value="mock manifest id", + ), + patch.object(synapse_store.syn, "set_annotations") as set_anno_mock, + patch( + "schematic.store.synapse.SynapseStorage.format_manifest_annotations" + ) as format_manifest_anno_mock, + ): + synapse_store.upload_manifest_combo( + dmge, + manifest=manifest_df, + metadataManifestPath=manifest_path, + datasetId="mock synapse id", + table_name="new table name", + component_name="BulkRNA-seqAssay", + restrict=restrict, + manifest_record_type=manifest_record_type, + hideBlanks=hide_blanks, + table_manipulation="replace", + table_column_names="class_label", + annotation_keys="class_label", + file_annotations_upload=file_annotations_upload, + ) + + if file_annotations_upload: + add_anno_mock.assert_called_once() + else: + add_anno_mock.assert_not_called() + # need to set annotations for both table and files + assert format_manifest_anno_mock.call_count == 2 + assert set_anno_mock.call_count == 2 + assert update_db_mock.call_count == 2 + + @pytest.mark.parametrize( + "manifest_record_type,expected", + [ + ("file_only", "mock_id_csv"), + ("table_and_file", "mock_id_table"), + ("file_and_entities", "mock_id_csv"), + ("table_file_and_entities", "mock_id_entities"), + ], + ) + @pytest.mark.parametrize("restrict_rules", [True, False]) + @pytest.mark.parametrize("hide_blanks", [True, False]) + @pytest.mark.parametrize("file_annotations_upload", [True, False]) + def test_associate_metadata_with_files( + self, + helpers: Helpers, + restrict_rules: bool, + hide_blanks: bool, + synapse_store: SynapseStorage, + manifest_record_type: str, + expected: str, + file_annotations_upload: bool, + dmge: DataModelGraphExplorer, + ) -> None: + with ( + patch( + "schematic.store.synapse.SynapseStorage.upload_manifest_as_csv", + return_value="mock_id_csv", + ), + patch( + "schematic.store.synapse.SynapseStorage.upload_manifest_as_table", + return_value="mock_id_table", + ), + patch( + "schematic.store.synapse.SynapseStorage.upload_manifest_combo", + return_value="mock_id_entities", + ), + ): + manifest_path = "mock_manifests/test_BulkRNAseq.csv" + manifest_id = synapse_store.associateMetadataWithFiles( + dmge=dmge, + metadataManifestPath=helpers.get_data_path(manifest_path), + datasetId="mock_dataset_id", + hideBlanks=hide_blanks, + restrict_manifest=restrict_rules, + manifest_record_type=manifest_record_type, + file_annotations_upload=file_annotations_upload, + ) + assert manifest_id == expected