From 69c31feb24777807c416bca4e574ef30431820fc Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 10 Aug 2023 11:07:09 -0400 Subject: [PATCH 1/5] add includesTypes parameter --- schematic/store/synapse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 93da6109f..bec86384a 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -413,7 +413,7 @@ def getFilesInStorageDataset( """ # select all files within a given storage dataset folder (top level folder in a Synapse storage project or folder marked with contentType = 'dataset') - walked_path = walk(self.syn, datasetId) + walked_path = walk(self.syn, datasetId, includeTypes=["folder", "file"]) file_list = [] From d397129ce84f0aab082bc1c8407b9c5287b4cd48 Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 10 Aug 2023 12:00:18 -0400 Subject: [PATCH 2/5] add test --- tests/test_store.py | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index 4005069b2..b0f333725 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -1,21 +1,25 @@ from __future__ import annotations -import os -import math + import logging -import pytest -from time import sleep -from tenacity import Retrying, RetryError, stop_after_attempt, wait_random_exponential +import math +import os +from time import sleep +from unittest.mock import patch import pandas as pd +import pytest from synapseclient import EntityViewSchema, Folder - -from schematic.models.metadata import MetadataModel -from schematic.store.base import BaseStorage -from schematic.store.synapse import SynapseStorage, DatasetFileView, ManifestDownload -from schematic.schemas.generator import SchemaGenerator from synapseclient.core.exceptions import SynapseHTTPError from synapseclient.entity import File +from tenacity import (RetryError, Retrying, stop_after_attempt, + wait_random_exponential) + from schematic.configuration.configuration import Configuration +from schematic.models.metadata import MetadataModel +from schematic.schemas.generator import SchemaGenerator +from schematic.store.base import BaseStorage +from schematic.store.synapse import (DatasetFileView, ManifestDownload, + SynapseStorage) logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) @@ -232,6 +236,25 @@ def test_getDatasetProject(self, dataset_id, synapse_store): with pytest.raises(PermissionError): synapse_store.getDatasetProject("syn12345678") + + def test_getFilesInStorageDataset(self, synapse_store): + mock_return = [ + ( + ("parent_folder", "syn123"), + [("test_folder", "syn124")], + [("test_file", "syn126")], + ), + ( + (os.path.join("parent_folder", "test_folder"), "syn124"), + [], + [("test_file_2", "syn125")], + ), + ] + expected_return = [('syn126', 'parent_folder/test_file'), ('syn125', 'parent_folder/test_folder/test_file_2')] + with patch('synapseutils.walk_functions._helpWalk', return_value=mock_return): + file_list = synapse_store.getFilesInStorageDataset(datasetId="syn_mock", fileNames=None, fullpath=True) + assert file_list == expected_return + @pytest.mark.parametrize("downloadFile", [True, False]) def test_getDatasetManifest(self, synapse_store, downloadFile): From fffa80f9a1a4d46a15401f57301c8ce36d4c355f Mon Sep 17 00:00:00 2001 From: linglp Date: Thu, 10 Aug 2023 12:52:30 -0400 Subject: [PATCH 3/5] test full_path parameter --- tests/test_store.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index b0f333725..8d8ff1734 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -237,7 +237,8 @@ def test_getDatasetProject(self, dataset_id, synapse_store): with pytest.raises(PermissionError): synapse_store.getDatasetProject("syn12345678") - def test_getFilesInStorageDataset(self, synapse_store): + @pytest.mark.parametrize("full_path", [True, False]) + def test_getFilesInStorageDataset(self, synapse_store, full_path): mock_return = [ ( ("parent_folder", "syn123"), @@ -250,11 +251,14 @@ def test_getFilesInStorageDataset(self, synapse_store): [("test_file_2", "syn125")], ), ] - expected_return = [('syn126', 'parent_folder/test_file'), ('syn125', 'parent_folder/test_folder/test_file_2')] + expected_return_full_path = [('syn126', 'parent_folder/test_file'), ('syn125', 'parent_folder/test_folder/test_file_2')] + expected_return_not_full_path = [('syn126', 'test_file'), ('syn125', 'test_file_2')] with patch('synapseutils.walk_functions._helpWalk', return_value=mock_return): - file_list = synapse_store.getFilesInStorageDataset(datasetId="syn_mock", fileNames=None, fullpath=True) - assert file_list == expected_return - + file_list = synapse_store.getFilesInStorageDataset(datasetId="syn_mock", fileNames=None, fullpath=full_path) + if full_path: + assert file_list == expected_return_full_path + else: + assert file_list == expected_return_not_full_path @pytest.mark.parametrize("downloadFile", [True, False]) def test_getDatasetManifest(self, synapse_store, downloadFile): From 5287c42dc033a146fd427902632e30a3dced02ce Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 15 Aug 2023 15:06:48 -0400 Subject: [PATCH 4/5] simplify test --- tests/test_store.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tests/test_store.py b/tests/test_store.py index 8d8ff1734..afaef751f 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -237,8 +237,8 @@ def test_getDatasetProject(self, dataset_id, synapse_store): with pytest.raises(PermissionError): synapse_store.getDatasetProject("syn12345678") - @pytest.mark.parametrize("full_path", [True, False]) - def test_getFilesInStorageDataset(self, synapse_store, full_path): + @pytest.mark.parametrize("full_path,expected", [(True, [('syn126', 'parent_folder/test_file'), ('syn125', 'parent_folder/test_folder/test_file_2')]),(False, [('syn126', 'test_file'), ('syn125', 'test_file_2')])]) + def test_getFilesInStorageDataset(self, synapse_store, full_path, expected): mock_return = [ ( ("parent_folder", "syn123"), @@ -251,14 +251,9 @@ def test_getFilesInStorageDataset(self, synapse_store, full_path): [("test_file_2", "syn125")], ), ] - expected_return_full_path = [('syn126', 'parent_folder/test_file'), ('syn125', 'parent_folder/test_folder/test_file_2')] - expected_return_not_full_path = [('syn126', 'test_file'), ('syn125', 'test_file_2')] with patch('synapseutils.walk_functions._helpWalk', return_value=mock_return): file_list = synapse_store.getFilesInStorageDataset(datasetId="syn_mock", fileNames=None, fullpath=full_path) - if full_path: - assert file_list == expected_return_full_path - else: - assert file_list == expected_return_not_full_path + assert file_list == expected @pytest.mark.parametrize("downloadFile", [True, False]) def test_getDatasetManifest(self, synapse_store, downloadFile): From a700f48f6a9423be2e7ef5d1b13c5af75963a5bd Mon Sep 17 00:00:00 2001 From: linglp Date: Tue, 15 Aug 2023 15:08:23 -0400 Subject: [PATCH 5/5] update to use synapseutils.walk --- schematic/store/synapse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index bec86384a..e2e5772ec 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -35,7 +35,7 @@ from synapseclient.table import CsvFileTable, build_table, Schema from synapseclient.annotations import from_synapse_annotations from synapseclient.core.exceptions import SynapseHTTPError, SynapseAuthenticationError, SynapseUnmetAccessRestrictions -from synapseutils import walk +import synapseutils from synapseutils.copy_functions import changeFileMetaData import uuid @@ -413,7 +413,7 @@ def getFilesInStorageDataset( """ # select all files within a given storage dataset folder (top level folder in a Synapse storage project or folder marked with contentType = 'dataset') - walked_path = walk(self.syn, datasetId, includeTypes=["folder", "file"]) + walked_path = synapseutils.walk(self.syn, datasetId, includeTypes=["folder", "file"]) file_list = []