From 4b8b1347c83c78e5b3d8b459abe6e996b187a580 Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Tue, 23 Jan 2018 14:08:08 -0800 Subject: [PATCH 01/19] Pipeline initialization, view parsing, user confirmation tests --- annotator/Pipeline.py | 11 +++++-- tests/unit/conftest.py | 5 +++ tests/unit/test_Pipeline.py | 64 +++++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 2 deletions(-) diff --git a/annotator/Pipeline.py b/annotator/Pipeline.py index b8a9edf..9753975 100644 --- a/annotator/Pipeline.py +++ b/annotator/Pipeline.py @@ -41,7 +41,10 @@ def __init__(self, syn, view=None, meta=None, activeCols=[], Optional. Whether to sort the columns lexicographically in `view` and/or `meta`. Defaults to True. """ - self.syn = syn + if isinstance(syn, sc.Synapse): + self.syn = syn + else: + raise TypeError("syn must be a synapseclient.Synapse object") self.view = view if view is None else self._parseView(view, sortCols) self._entityViewSchema = (self.syn.get(view) if isinstance(view, str) else None) @@ -470,7 +473,7 @@ def _parseView(self, view, sortCols, isMeta=False): """ if isinstance(view, str): return utils.synread(self.syn, view, sortCols=sortCols) - elif isinstance(view, list) and meta: + elif isinstance(view, list) and isMeta: return utils.combineSynapseTabulars(self.syn, view, axis=1) elif isinstance(view, pd.DataFrame): if sortCols: @@ -524,7 +527,11 @@ def _getUserConfirmation(self, message="Proceed anyways? (y) or (n): "): Otherwise asks user to input confirmation again. """ proceed = '' + counter = 0 while not proceed: + counter += 1 + if counter > 3: + return False # after three attempts return False proceed = input(message) if len(proceed) and not proceed[0] in ['Y', 'y', 'N', 'n']: proceed = '' diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index c078421..a546b76 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -4,6 +4,7 @@ import pandas import synapseclient import uuid +import annotator SAMPLE_FILE = "https://raw.githubusercontent.com/Sage-Bionetworks/annotator/master/tests/sampleFile.csv" @@ -124,3 +125,7 @@ def entities(syn, sampleFile, project): 'table_schema': schema, 'entity_view': entity_view_} return ents + +@pytest.fixture(scope='session') +def genericPipeline(syn): + return annotator.Pipeline(syn) diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index e69de29..ada6a09 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -0,0 +1,64 @@ +import annotator +import pytest +import pandas + + +class TestInitialization(object): + def test_minimal(self, genericPipeline): + assert isinstance(genericPipeline, annotator.Pipeline) + + def test_minimal_type_error(self): + with pytest.raises(TypeError): + p = annotator.Pipeline(0) + + +class TestViewParsing(object): + def test__parseView_df(self, sampleFile, genericPipeline): + view = genericPipeline._parseView(sampleFile, sortCols=False) + pandas.testing.assert_frame_equal(view, sampleFile) + + def test__parseView_str(self, sampleFile, genericPipeline, entities): + view = genericPipeline._parseView(entities['files'][0].id, + sortCols=False) + pandas.testing.assert_frame_equal(view, sampleFile) + + def test__parseView_list_meta(self, sampleFile, genericPipeline, entities): + view = genericPipeline._parseView( + [f['id'] for f in entities['files']], + sortCols=False, + isMeta=True) + block_shape = sampleFile.shape + assert view.shape == ( + block_shape[0], + len(entities['files'])*block_shape[1]) + + def test__parseView_list_not_meta(self, genericPipeline, entities): + with pytest.raises(TypeError): + view = genericPipeline._parseView( + [f['id'] for f in entities['files']], + sortCols=False, + isMeta=False) + + def test__parseView_sortCols(self, sampleFile, genericPipeline): + view = genericPipeline._parseView(sampleFile, sortCols=True) + assert isinstance(view, pandas.DataFrame) + assert list(sorted(sampleFile.columns)) == list(view.columns) + +class TestConfirmationPrompt(object): + def test_getUserConfirmation_yes(self, genericPipeline, monkeypatch): + inputs = ['yes', 'y', 'YeS'] + monkeypatch.setattr('builtins.input', lambda _: next((i for i in inputs))) + for i in inputs: + assert genericPipeline._getUserConfirmation() + + def test_getUserConfirmation_no(self, genericPipeline, monkeypatch): + inputs = ['no', 'n', 'NO thank you'] + monkeypatch.setattr('builtins.input', lambda _: next((i for i in inputs))) + for i in inputs: + assert not genericPipeline._getUserConfirmation() + + def test_getUserConfirmation_unknown(self, genericPipeline, monkeypatch): + inputs = ['wut', '#$%!#$%', 'Show me what u got'] + monkeypatch.setattr('builtins.input', lambda _: next((i for i in inputs))) + for i in inputs: + assert not genericPipeline._getUserConfirmation() From 5f72af416229fcbfc8d2d356c70c92a25a083572 Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Mon, 29 Jan 2018 09:49:50 -0800 Subject: [PATCH 02/19] active column tests --- annotator/Pipeline.py | 5 ++-- tests/unit/test_Pipeline.py | 56 +++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/annotator/Pipeline.py b/annotator/Pipeline.py index 9753975..4696a54 100644 --- a/annotator/Pipeline.py +++ b/annotator/Pipeline.py @@ -569,7 +569,7 @@ def _validate(self): ", ".join(self.schema.loc[k].value.values))) return warnings - def removeActiveCols(self, activeCols): + def removeActiveCols(self, activeCols, backup=True): """ Remove a column name from `self._activeCols` Parameters @@ -577,7 +577,8 @@ def removeActiveCols(self, activeCols): activeCols : str or list-like Column name(s) to remove. """ - self.backup("removeActiveCols") + if backup: + self.backup("removeActiveCols") if isinstance(activeCols, str): self._activeCols.remove(activeCols) else: # is list-like diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index ada6a09..82df332 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -62,3 +62,59 @@ def test_getUserConfirmation_unknown(self, genericPipeline, monkeypatch): monkeypatch.setattr('builtins.input', lambda _: next((i for i in inputs))) for i in inputs: assert not genericPipeline._getUserConfirmation() + +class TestActiveColumns(object): + @pytest.fixture + def pipeline(self, genericPipeline): + genericPipeline._activeCols = ["pizza", "pie"] + genericPipeline._metaActiveCols = ["pizza", "pie"] + return genericPipeline + + def test_removeActiveCols_str(self, pipeline): + pipeline.removeActiveCols("pizza", backup=False) + assert pipeline._activeCols == ["pie"] + + def test_removeActiveCols_list(self, pipeline): + pipeline.removeActiveCols(["pizza", "pie"], backup=False) + assert pipeline._activeCols == [] + + def test_addActiveCols_str(self, pipeline): + pipeline.addActiveCols("marmalade", backup=False) + assert "marmalade" in pipeline._activeCols + + def test_addActiveCols_list(self, pipeline): + pipeline.addActiveCols(["sunday", "funday"], backup=False) + assert pipeline._activeCols == ["pizza", "pie", "sunday", "funday"] + + def test_addActiveCols_dict(self, pipeline): + pipeline.addActiveCols({"creme": "brulee", "chai": "latte"}, backup=False) + assert "creme" in pipeline._activeCols + assert "chai" in pipeline._activeCols + + def test_addActiveCols_dataframe(self, pipeline): + df = pandas.DataFrame({"key": ["chai", "creme"], "value": ["latte", "brulee"]}) + pipeline.addActiveCols(df, backup=False) + assert "creme" in pipeline._activeCols + assert "chai" in pipeline._activeCols + + def test_addActiveCols_str_meta(self, pipeline): + pipeline.addActiveCols("marmalade", isMeta=True, backup=False) + assert "marmalade" in pipeline._metaActiveCols + + def test_addActiveCols_list_meta(self, pipeline): + pipeline.addActiveCols(["sunday", "funday"], isMeta=True, backup=False) + assert pipeline._metaActiveCols == ["pizza", "pie", "sunday", "funday"] + + def test_addActiveCols_dict_meta(self, pipeline): + pipeline.addActiveCols({"creme": "brulee", "chai": "latte"}, + isMeta=True, backup=False) + assert "creme" in pipeline._metaActiveCols + assert "chai" in pipeline._metaActiveCols + + def test_addActiveCols_dataframe_meta(self, pipeline): + df = pandas.DataFrame( + {"key": ["chai", "creme"], "value": ["latte", "brulee"]}) + pipeline.addActiveCols(df, isMeta=True, backup=False) + assert "creme" in pipeline._metaActiveCols + assert "chai" in pipeline._metaActiveCols + From dea0bdf2e2d242dc197447cb0ce2c475c20898fc Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Wed, 31 Jan 2018 15:11:03 -0800 Subject: [PATCH 03/19] test for adding to scope of view --- tests/unit/conftest.py | 9 +++++---- tests/unit/test_Pipeline.py | 13 ++++++++++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index a546b76..8ee92a8 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -107,20 +107,21 @@ def genericTable(syn, project, sampleFile): def entities(syn, sampleFile, project): # store a folder for our entities sample_folder = folder(syn, project) + sample_folder_two = folder(syn, project) # store sample files _file = file_(syn, sample_folder, SAMPLE_FILE, name="file1.csv") _file2 = file_(syn, sample_folder, SAMPLE_FILE, name="file2.csv") - _file3 = file_(syn, sample_folder, SAMPLE_FILE, name="file3.csv") + _file3 = file_(syn, sample_folder_two, SAMPLE_FILE, name="file3.csv") # store a sample metadata file meta = synapseclient.File(path=SAMPLE_META, name='meta', - parent=sample_folder) + parent=project) meta = syn.store(meta) # store a sample table (same values as sample file) schema = table(syn, project, sampleFile) # store a sample file view - entity_view_ = entity_view(syn, project, scopes=sample_folder) + entity_view_ = entity_view(syn, project, scopes=project) ents = {'files': [_file, _file2, _file3], - 'folder': sample_folder, + 'folders': [sample_folder, sample_folder_two], 'meta': meta, 'table_schema': schema, 'entity_view': entity_view_} diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index 82df332..29272fb 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -1,6 +1,7 @@ import annotator import pytest import pandas +from . import conftest class TestInitialization(object): @@ -92,7 +93,8 @@ def test_addActiveCols_dict(self, pipeline): assert "chai" in pipeline._activeCols def test_addActiveCols_dataframe(self, pipeline): - df = pandas.DataFrame({"key": ["chai", "creme"], "value": ["latte", "brulee"]}) + df = pandas.DataFrame({"key": ["chai", "creme"], + "value": ["latte", "brulee"]}) pipeline.addActiveCols(df, backup=False) assert "creme" in pipeline._activeCols assert "chai" in pipeline._activeCols @@ -118,3 +120,12 @@ def test_addActiveCols_dataframe_meta(self, pipeline): assert "creme" in pipeline._metaActiveCols assert "chai" in pipeline._metaActiveCols + +class TestScopeModification(object): + def test_addView(self, syn, entities, project): + entity_view = conftest.entity_view(syn, project, entities['folders'][0]) + p = annotator.Pipeline(syn, view=entity_view.id) + p.addView(entities['folders'][1].id) + correctScopeIds = [f.id[3:] for f in entities['folders']] + assert all([i in correctScopeIds for i in p._entityViewSchema['scopeIds']]) + From a2f19bb98e4192bb06d7ec406d8712ce9b36a1c4 Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Thu, 1 Feb 2018 16:18:56 -0800 Subject: [PATCH 04/19] added default value tests --- tests/unit/test_Pipeline.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index 29272fb..6cbe6a1 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -129,3 +129,13 @@ def test_addView(self, syn, entities, project): correctScopeIds = [f.id[3:] for f in entities['folders']] assert all([i in correctScopeIds for i in p._entityViewSchema['scopeIds']]) +class TestDefaultValues(object): + def test_addDefaultValues_preexisting_col(self, genericPipeline, sampleFile): + genericPipeline.view = sampleFile + genericPipeline.addDefaultValues({'favoriteColor': 'purple'}, backup=False) + assert all([v == "purple" for v in genericPipeline.view['favoriteColor']]) + + def test_addDefaultValues_new_col(self, genericPipeline, sampleFile): + genericPipeline.view = sampleFile + genericPipeline.addDefaultValues({'favoriteCheese': 'Stinky Bishop'}, backup=False) + assert all([v == "Stinky Bishop" for v in genericPipeline.view['favoriteCheese']]) From dbcba54cf09788523f487cf81d932a74530f2cd1 Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Thu, 1 Feb 2018 16:45:28 -0800 Subject: [PATCH 05/19] added add fileFormat column tests. --- tests/unit/test_Pipeline.py | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index 6cbe6a1..a400c0d 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -123,19 +123,36 @@ def test_addActiveCols_dataframe_meta(self, pipeline): class TestScopeModification(object): def test_addView(self, syn, entities, project): - entity_view = conftest.entity_view(syn, project, entities['folders'][0]) + entity_view = conftest.entity_view(syn, project, entities["folders"][0]) p = annotator.Pipeline(syn, view=entity_view.id) - p.addView(entities['folders'][1].id) - correctScopeIds = [f.id[3:] for f in entities['folders']] - assert all([i in correctScopeIds for i in p._entityViewSchema['scopeIds']]) + p.addView(entities["folders"][1].id) + correctScopeIds = [f.id[3:] for f in entities["folders"]] + assert all([i in correctScopeIds + for i in p._entityViewSchema["scopeIds"]]) class TestDefaultValues(object): def test_addDefaultValues_preexisting_col(self, genericPipeline, sampleFile): genericPipeline.view = sampleFile - genericPipeline.addDefaultValues({'favoriteColor': 'purple'}, backup=False) - assert all([v == "purple" for v in genericPipeline.view['favoriteColor']]) + genericPipeline.addDefaultValues( + {"favoriteColor": "purple"}, backup=False) + assert all([v == "purple" + for v in genericPipeline.view["favoriteColor"]]) def test_addDefaultValues_new_col(self, genericPipeline, sampleFile): genericPipeline.view = sampleFile - genericPipeline.addDefaultValues({'favoriteCheese': 'Stinky Bishop'}, backup=False) - assert all([v == "Stinky Bishop" for v in genericPipeline.view['favoriteCheese']]) + genericPipeline.addDefaultValues( + {"favoriteCheese": "Stinky Bishop"}, backup=False) + assert all([v == "Stinky Bishop" + for v in genericPipeline.view["favoriteCheese"]]) + +class TestFileFormatColumn(object): + @pytest.fixture + def sampleView(self): + return pandas.DataFrame({"name": ["celery_man.gif", "fastq.fastq", + "unknown", "slim.fastq.gz", "slim.tar.gz"]}) + + def test_addFileFormatCol(self, genericPipeline, sampleView): + genericPipeline.view = sampleView + genericPipeline.addFileFormatCol() + assert genericPipeline.view["fileFormat"] == \ + ["gif", "fastq", None, "fastq", "tar"] From 17187174a61e91f74b083f9a011db7803df08b9e Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Thu, 1 Feb 2018 16:45:28 -0800 Subject: [PATCH 06/19] added add fileFormat column tests. --- tests/unit/test_Pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index a400c0d..520f1f1 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -149,10 +149,10 @@ class TestFileFormatColumn(object): @pytest.fixture def sampleView(self): return pandas.DataFrame({"name": ["celery_man.gif", "fastq.fastq", - "unknown", "slim.fastq.gz", "slim.tar.gz"]}) + "unknown", "slim.fastq.gz", "slim.tar.gz", None]}) def test_addFileFormatCol(self, genericPipeline, sampleView): genericPipeline.view = sampleView genericPipeline.addFileFormatCol() assert genericPipeline.view["fileFormat"] == \ - ["gif", "fastq", None, "fastq", "tar"] + ["gif", "fastq", None, "fastq", "tar", None] From f15950f71740ce45a5bc37728cf395fdf038db8a Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Wed, 7 Feb 2018 11:35:21 -0800 Subject: [PATCH 07/19] test addLinks --- tests/unit/test_Pipeline.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index 520f1f1..0ef9374 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -156,3 +156,26 @@ def test_addFileFormatCol(self, genericPipeline, sampleView): genericPipeline.addFileFormatCol() assert genericPipeline.view["fileFormat"] == \ ["gif", "fastq", None, "fastq", "tar", None] + +class TestLinks(object): + @pytest.fixture + def pipeline(self, genericPipeline, genericEntityView, sampleMetadata): + genericPipeline.view = genericEntityView + genericPipeline._meta = sampleMetadata + return genericPipeline + + def test_addLinks(self, pipeline): + links = {'name': 'mexico', 'createdBy': 'serbia'} + pipeline.addLinks(links=links, append=False, backup=False) + assert pipeline.links == links + links2 = {'name': 'mexico'} + pipeline.addLinks(links=links2, append=False, backup=False) + assert pipeline.links == links2 + links3 = {'createdBy': 'serbia'} + pipeline.addLinks(links=links3, append=True, backup=False) + assert pipeline.links == links + with pytest.raises(TypeError): + pipeline.addLinks(links="hello", append=False, backup=False) + with pytest.raises(AttributeError): + pipeline.view = None + pipeline.addLinks() From 669100731d81bb4f956a1e187c4214923d39c479 Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Thu, 8 Feb 2018 12:11:06 -0800 Subject: [PATCH 08/19] test addFileFormatColumn fix. --- tests/unit/test_Pipeline.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index 0ef9374..46196af 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -154,8 +154,9 @@ def sampleView(self): def test_addFileFormatCol(self, genericPipeline, sampleView): genericPipeline.view = sampleView genericPipeline.addFileFormatCol() - assert genericPipeline.view["fileFormat"] == \ - ["gif", "fastq", None, "fastq", "tar", None] + assert all([i == j for i, j in + zip(genericPipeline.view["fileFormat"], + ["gif", "fastq", None, "fastq", "tar", None])]) class TestLinks(object): @pytest.fixture From ccb15c0a4e85207fab1b53f8372b8cc2f8339906 Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Thu, 8 Feb 2018 12:28:06 -0800 Subject: [PATCH 09/19] test isValidKeyCol --- tests/unit/test_Pipeline.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index 46196af..720fb98 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -180,3 +180,22 @@ def test_addLinks(self, pipeline): with pytest.raises(AttributeError): pipeline.view = None pipeline.addLinks() + + +class TestKey(object): + @pytest.fixture(scope='class') + def pipeline(self, genericPipeline, sampleFile, sampleMetadata): + genericPipeline.view = sampleFile + genericPipeline._meta = sampleMetadata + return genericPipeline + + def test_isValidKeyPair(self, pipeline): + # no overlap + assert not pipeline.isValidKeyPair("name", "mexico") + # complete overlap (view is subset of meta) + pipeline.view["mexico_view"] = list( + pipeline._meta["mexico"][:len(pipeline.view)]) + assert pipeline.isValidKeyPair("mexico_view", "mexico") + # partial overlap + pipeline.view.loc[0, 'mexico_view'] = None + assert not pipeline.isValidKeyPair("mexico_view", "mexico") From 8fe28d21e660e0f3b62cbe0d3731f1d0c7693c1c Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Thu, 8 Feb 2018 14:25:32 -0800 Subject: [PATCH 10/19] test substituteColumnValues --- tests/unit/test_Pipeline.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index 720fb98..b2dd1e7 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -199,3 +199,14 @@ def test_isValidKeyPair(self, pipeline): # partial overlap pipeline.view.loc[0, 'mexico_view'] = None assert not pipeline.isValidKeyPair("mexico_view", "mexico") + +class TestMisc(object): + @pytest.fixture(scope='class') + def pipeline(self, genericPipeline, sampleFile): + genericPipeline.view = sampleFile + return genericPipeline + + def test_substituteColumnValues(self, pipeline): + pipeline.substituteColumnValues( + "name", {"phil": "Phillip", "tom": "Tomothy"}) + assert pipeline.view["name"].values == ["Phillip", "Tomothy"] From c792d26479df3df702ad97803685a7d709f90553 Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Thu, 8 Feb 2018 14:49:24 -0800 Subject: [PATCH 11/19] test removeActiveCols and some pep8 --- tests/unit/test_Pipeline.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index b2dd1e7..fdf6b9e 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -45,25 +45,30 @@ def test__parseView_sortCols(self, sampleFile, genericPipeline): assert isinstance(view, pandas.DataFrame) assert list(sorted(sampleFile.columns)) == list(view.columns) + class TestConfirmationPrompt(object): def test_getUserConfirmation_yes(self, genericPipeline, monkeypatch): inputs = ['yes', 'y', 'YeS'] - monkeypatch.setattr('builtins.input', lambda _: next((i for i in inputs))) + monkeypatch.setattr('builtins.input', + lambda _: next((i for i in inputs))) for i in inputs: assert genericPipeline._getUserConfirmation() def test_getUserConfirmation_no(self, genericPipeline, monkeypatch): inputs = ['no', 'n', 'NO thank you'] - monkeypatch.setattr('builtins.input', lambda _: next((i for i in inputs))) + monkeypatch.setattr('builtins.input', + lambda _: next((i for i in inputs))) for i in inputs: assert not genericPipeline._getUserConfirmation() def test_getUserConfirmation_unknown(self, genericPipeline, monkeypatch): inputs = ['wut', '#$%!#$%', 'Show me what u got'] - monkeypatch.setattr('builtins.input', lambda _: next((i for i in inputs))) + monkeypatch.setattr('builtins.input', + lambda _: next((i for i in inputs))) for i in inputs: assert not genericPipeline._getUserConfirmation() + class TestActiveColumns(object): @pytest.fixture def pipeline(self, genericPipeline): @@ -88,7 +93,8 @@ def test_addActiveCols_list(self, pipeline): assert pipeline._activeCols == ["pizza", "pie", "sunday", "funday"] def test_addActiveCols_dict(self, pipeline): - pipeline.addActiveCols({"creme": "brulee", "chai": "latte"}, backup=False) + pipeline.addActiveCols( + {"creme": "brulee", "chai": "latte"}, backup=False) assert "creme" in pipeline._activeCols assert "chai" in pipeline._activeCols @@ -109,7 +115,7 @@ def test_addActiveCols_list_meta(self, pipeline): def test_addActiveCols_dict_meta(self, pipeline): pipeline.addActiveCols({"creme": "brulee", "chai": "latte"}, - isMeta=True, backup=False) + isMeta=True, backup=False) assert "creme" in pipeline._metaActiveCols assert "chai" in pipeline._metaActiveCols @@ -120,16 +126,27 @@ def test_addActiveCols_dataframe_meta(self, pipeline): assert "creme" in pipeline._metaActiveCols assert "chai" in pipeline._metaActiveCols + def test_removeActiveCols_str(self, pipeline): + pipeline.removeActiveCols("pizza", backup=False) + assert "pizza" not in pipeline._activeCols + + def test_removeActiveCols_list(self, pipeline): + pipeline.removeActiveCols(["pizza", "pie"], backup=False) + assert ("pizza" not in pipeline._activeCols and + "pie" not in pipeline._activeCols) + class TestScopeModification(object): def test_addView(self, syn, entities, project): - entity_view = conftest.entity_view(syn, project, entities["folders"][0]) + entity_view = conftest.entity_view( + syn, project, entities["folders"][0]) p = annotator.Pipeline(syn, view=entity_view.id) p.addView(entities["folders"][1].id) correctScopeIds = [f.id[3:] for f in entities["folders"]] assert all([i in correctScopeIds for i in p._entityViewSchema["scopeIds"]]) + class TestDefaultValues(object): def test_addDefaultValues_preexisting_col(self, genericPipeline, sampleFile): genericPipeline.view = sampleFile @@ -145,11 +162,13 @@ def test_addDefaultValues_new_col(self, genericPipeline, sampleFile): assert all([v == "Stinky Bishop" for v in genericPipeline.view["favoriteCheese"]]) + class TestFileFormatColumn(object): @pytest.fixture def sampleView(self): - return pandas.DataFrame({"name": ["celery_man.gif", "fastq.fastq", - "unknown", "slim.fastq.gz", "slim.tar.gz", None]}) + return pandas.DataFrame( + {"name": ["celery_man.gif", "fastq.fastq", "unknown", + "slim.fastq.gz", "slim.tar.gz", None]}) def test_addFileFormatCol(self, genericPipeline, sampleView): genericPipeline.view = sampleView @@ -158,6 +177,7 @@ def test_addFileFormatCol(self, genericPipeline, sampleView): zip(genericPipeline.view["fileFormat"], ["gif", "fastq", None, "fastq", "tar", None])]) + class TestLinks(object): @pytest.fixture def pipeline(self, genericPipeline, genericEntityView, sampleMetadata): @@ -200,6 +220,7 @@ def test_isValidKeyPair(self, pipeline): pipeline.view.loc[0, 'mexico_view'] = None assert not pipeline.isValidKeyPair("mexico_view", "mexico") + class TestMisc(object): @pytest.fixture(scope='class') def pipeline(self, genericPipeline, sampleFile): From b08b1a8e06d6c5a66fe27a9b728b2e8e2726b113 Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Mon, 12 Feb 2018 15:41:20 -0800 Subject: [PATCH 12/19] test publish --- tests/unit/test_Pipeline.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index fdf6b9e..1c2b670 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -231,3 +231,32 @@ def test_substituteColumnValues(self, pipeline): pipeline.substituteColumnValues( "name", {"phil": "Phillip", "tom": "Tomothy"}) assert pipeline.view["name"].values == ["Phillip", "Tomothy"] + + +class TestPublish(object): + @pytest.fixture(scope='class') + def pipeline(self, genericPipeline, sampleFile): + genericPipeline.view = sampleFile + return genericPipeline + + def test_publish_no_view_or_schema(self, pipeline, genericPipeline): + with pytest.raises(AttributeError): + # no view or schema set + genericPipeline.publish(validate=False) + with pytest.raises(AttributeError): + # view set but no schema set + pipeline.publish(validate=False) + + def test_publish(self, syn, entities, genericPipeline): + q = syn.tableQuery("select * from {}".format( + entities['entity_view'].id)) + genericPipeline.view = q.asDataFrame() + genericPipeline._entityViewSchema = entities['entity_view'] + genericPipeline.view['type'] = 'updatedFile' + genericPipeline.publish(validate=False) + published_view = syn.tableQuery("select * from {}".format( + entities['entity_view'].id)).asDataFrame() + pandas.testing.assert_frame_equal( + genericPipeline.view, + published_view, + check_like=True) From ea836649f1f20904d4ea161c018267bd1990f23e Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Tue, 13 Feb 2018 11:24:33 -0800 Subject: [PATCH 13/19] test createFileView --- tests/unit/conftest.py | 10 ++++++---- tests/unit/test_Pipeline.py | 26 ++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 8ee92a8..24fb812 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -109,7 +109,8 @@ def entities(syn, sampleFile, project): sample_folder = folder(syn, project) sample_folder_two = folder(syn, project) # store sample files - _file = file_(syn, sample_folder, SAMPLE_FILE, name="file1.csv") + _file = file_(syn, sample_folder, SAMPLE_FILE, name="file1.csv", + annotations={'preexistingAnnotation': 'yes'}) _file2 = file_(syn, sample_folder, SAMPLE_FILE, name="file2.csv") _file3 = file_(syn, sample_folder_two, SAMPLE_FILE, name="file3.csv") # store a sample metadata file @@ -120,12 +121,13 @@ def entities(syn, sampleFile, project): schema = table(syn, project, sampleFile) # store a sample file view entity_view_ = entity_view(syn, project, scopes=project) - ents = {'files': [_file, _file2, _file3], + entities = {'files': [_file, _file2, _file3], 'folders': [sample_folder, sample_folder_two], 'meta': meta, 'table_schema': schema, - 'entity_view': entity_view_} - return ents + 'entity_view': entity_view_, + 'project': project} + return entities @pytest.fixture(scope='session') def genericPipeline(syn): diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index 1c2b670..6cb3845 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -260,3 +260,29 @@ def test_publish(self, syn, entities, genericPipeline): genericPipeline.view, published_view, check_like=True) + +class TestCreateFileView(object): + @pytest.fixture(scope='class') + def pipeline(self, genericPipeline, entities): + additionalCols = {"coffee": "black", "cheese": None} + genericPipeline.createFileView( + name="--Test--", + parent=entities['project'].id, + scope=entities['project'].id, + addCols=additionalCols) + return genericPipeline + + def test_createFileView(self, pipeline): + published_view = syn.tableQuery("select * from {}".format( + pipeline._entityViewSchema.id)).asDataFrame() + # check default value propogated locally but not globally + assert all([v == "black" for v in pipeline.view["coffee"]]) + assert all([pandas.isnull(v) for v in published_view["coffee"]]) + # check that columns with unspecified values were created + assert "cheese" in pipeline.view + assert "cheese" in published_view + # check that preexisting annotations were included + assert any([pd.notnull(v) for v in + pipeline.view['preexistingAnnotation']]) + assert any([pd.notnull(v) for v in + published_view['preexistingAnnotation']]) From 691a21921851ca8cdaa933e3e6d88fa318f9a4c6 Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Tue, 13 Feb 2018 13:35:58 -0800 Subject: [PATCH 14/19] conftest file upload refactoring --- tests/unit/conftest.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 24fb812..75ee15b 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -42,17 +42,16 @@ def project(syn): syn.delete(project) -def file_(syn, parent, path, annotations=None, **kwargs): +def file_(syn, parent, path, **kwargs): if 'name' not in kwargs: name = str(uuid.uuid4()) else: name = kwargs.pop('name') - file_ = synapseclient.File(path=path, name=name, - parent=parent, **kwargs) - if annotations: - for a in annotations.items(): - key, value = a - file_[key] = value + file_ = synapseclient.File( + path=path, + name=name, + parent=parent, + **kwargs) file_ = syn.store(file_) return file_ @@ -110,13 +109,14 @@ def entities(syn, sampleFile, project): sample_folder_two = folder(syn, project) # store sample files _file = file_(syn, sample_folder, SAMPLE_FILE, name="file1.csv", - annotations={'preexistingAnnotation': 'yes'}) - _file2 = file_(syn, sample_folder, SAMPLE_FILE, name="file2.csv") - _file3 = file_(syn, sample_folder_two, SAMPLE_FILE, name="file3.csv") + annotations={'preexistingAnnotation': 'yes'}, + synapseStore=False) + _file2 = file_(syn, sample_folder, SAMPLE_FILE, name="file2.csv", + synapseStore=False) + _file3 = file_(syn, sample_folder_two, SAMPLE_FILE, name="file3.csv", + synapseStore=False) # store a sample metadata file - meta = synapseclient.File(path=SAMPLE_META, name='meta', - parent=project) - meta = syn.store(meta) + meta = file_(syn, project, SAMPLE_META, name='meta', synapseStore=False) # store a sample table (same values as sample file) schema = table(syn, project, sampleFile) # store a sample file view From b28b076f09b13c12e1514709a7e1694ba0a420a9 Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Tue, 13 Feb 2018 14:04:15 -0800 Subject: [PATCH 15/19] some additional fixes as a result of the previous conftest function call modification --- tests/unit/conftest.py | 1 - tests/unit/test_utils.py | 9 ++++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 75ee15b..339b68a 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -109,7 +109,6 @@ def entities(syn, sampleFile, project): sample_folder_two = folder(syn, project) # store sample files _file = file_(syn, sample_folder, SAMPLE_FILE, name="file1.csv", - annotations={'preexistingAnnotation': 'yes'}, synapseStore=False) _file2 = file_(syn, sample_folder, SAMPLE_FILE, name="file2.csv", synapseStore=False) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 8a21b14..b41b954 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -113,11 +113,14 @@ def scopeFolders(self, syn, project): folder_two = conftest.folder(syn, project) folder_three = conftest.folder(syn, project) file_one = conftest.file_(syn, folder_one, conftest.SAMPLE_FILE, - {'color': 'red'}) + annotations={'color': 'red'}, + synapseStore=False) file_two = conftest.file_(syn, folder_two, conftest.SAMPLE_FILE, - {'pizza': 'pineapple'}) + annotations={'pizza': 'pineapple'}, + synapseStore=False) file_three = conftest.file_(syn, folder_three, conftest.SAMPLE_FILE, - {'cookie': 'monster'}) + annotations={'cookie': 'monster'}, + synapseStore=False) return {1: folder_one, 2: folder_two, 3: folder_three} """ From 1bd9c4196f191c8587d83c0e63670169002bd47a Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Wed, 14 Feb 2018 12:18:24 -0800 Subject: [PATCH 16/19] test transfer links --- tests/unit/conftest.py | 3 ++- tests/unit/test_Pipeline.py | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 339b68a..8a8a3bf 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -119,7 +119,8 @@ def entities(syn, sampleFile, project): # store a sample table (same values as sample file) schema = table(syn, project, sampleFile) # store a sample file view - entity_view_ = entity_view(syn, project, scopes=project) + entity_view_ = entity_view(syn, project, + scopes=[sample_folder, sample_folder_two]) entities = {'files': [_file, _file2, _file3], 'folders': [sample_folder, sample_folder_two], 'meta': meta, diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index 6cb3845..c5ba43f 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -180,8 +180,10 @@ def test_addFileFormatCol(self, genericPipeline, sampleView): class TestLinks(object): @pytest.fixture - def pipeline(self, genericPipeline, genericEntityView, sampleMetadata): - genericPipeline.view = genericEntityView + def pipeline(self, syn, genericPipeline, entities, sampleMetadata): + view = syn.tableQuery("select * from {}".format( + entities['entity_view'].id)).asDataFrame() + genericPipeline.view = view genericPipeline._meta = sampleMetadata return genericPipeline @@ -201,6 +203,18 @@ def test_addLinks(self, pipeline): pipeline.view = None pipeline.addLinks() + def test_transferLinks(self, pipeline): + print(pipeline.view) + pipeline.view['id'] = [1,2,3] + pipeline.view['spanishWords'] = None + pipeline.view['serbianWords'] = None + pipeline.keyCol = 'id' + pipeline.links = {'spanishWords': 'mexico', 'serbianWords': 'serbia'} + pipeline.transferLinks() + assert all(pipeline.view['spanishWords'] == ['quien', 'que', 'donde']) + assert all(pipeline.view['serbianWords'] == ['ко', 'Шта', 'где']) + assert 'id' not in pipeline.view # by default we drop the `on` column + class TestKey(object): @pytest.fixture(scope='class') From 33a320f631e8acdb781d85a5c091b43875d5f8b1 Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Thu, 15 Feb 2018 11:12:38 -0800 Subject: [PATCH 17/19] cleanup --- tests/unit/test_Pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index c5ba43f..9413f8c 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -204,7 +204,6 @@ def test_addLinks(self, pipeline): pipeline.addLinks() def test_transferLinks(self, pipeline): - print(pipeline.view) pipeline.view['id'] = [1,2,3] pipeline.view['spanishWords'] = None pipeline.view['serbianWords'] = None @@ -275,6 +274,7 @@ def test_publish(self, syn, entities, genericPipeline): published_view, check_like=True) + class TestCreateFileView(object): @pytest.fixture(scope='class') def pipeline(self, genericPipeline, entities): From 4ad5550d23b7b96746a3a50dea44fa2ec0eaa327 Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Thu, 15 Feb 2018 12:48:12 -0800 Subject: [PATCH 18/19] test backup and a bug fix for the backup function --- annotator/Pipeline.py | 8 +++++--- tests/unit/test_Pipeline.py | 39 +++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/annotator/Pipeline.py b/annotator/Pipeline.py index 4696a54..be13a81 100644 --- a/annotator/Pipeline.py +++ b/annotator/Pipeline.py @@ -65,11 +65,13 @@ def __init__(self, syn, view=None, meta=None, activeCols=[], self.links = links if isinstance(links, dict) else None self._backup = [] - def backup(self, message): + def backup(self, message=None): """ Backup the state of `self` and store in `self._backup` """ self._backup.append((Pipeline( - self.syn, self.view, self._meta, self._activeCols, - self._metaActiveCols, self.links, self._sortCols), message)) + syn=self.syn, view=self.view, meta=self._meta, + activeCols=self._activeCols, metaActiveCols=self._metaActiveCols, + links=self.links, sortCols=self._sortCols, schema=self.schema), + message)) if len(self._backup) > self.BACKUP_LENGTH: self._backup = self._backup[1:] diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index 9413f8c..cff45c9 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -300,3 +300,42 @@ def test_createFileView(self, pipeline): pipeline.view['preexistingAnnotation']]) assert any([pd.notnull(v) for v in published_view['preexistingAnnotation']]) + +class TestBackup(object): + def pipeline(self, genericPipeline): + return genericPipeline + + def test_backup_empty(self, syn): + genericPipeline = annotator.Pipeline(syn) + genericPipeline.backup() + assert len(genericPipeline._backup) == 1 + assert genericPipeline._backup[0][0].syn == genericPipeline.syn + + def test_backup_full(self, syn, sampleFile, sampleMetadata): + genericPipeline = annotator.Pipeline(syn) + cols = ['name'] + links = {'name': 'mexico'} + schema = pandas.DataFrame({'a': [1]}) + genericPipeline.view = sampleFile + genericPipeline._meta = sampleMetadata + genericPipeline._activeCols = cols + genericPipeline._metaActiveCols = cols + genericPipeline.links = links + genericPipeline.schema = schema + genericPipeline.backup() + assert len(genericPipeline._backup) == 1 + pandas.testing.assert_frame_equal( + genericPipeline._backup[0][0].view, + sampleFile, + check_like=True) + pandas.testing.assert_frame_equal( + genericPipeline._backup[0][0]._meta, + sampleMetadata, + check_like=True) + assert genericPipeline._backup[0][0]._activeCols == cols + assert genericPipeline._backup[0][0]._metaActiveCols == cols + assert genericPipeline._backup[0][0].links == links + pandas.testing.assert_frame_equal( + genericPipeline._backup[0][0].schema, + schema, + check_like=True) From d5fac3233d10f81e2929d3abfb19354a2c35ef45 Mon Sep 17 00:00:00 2001 From: Phil Snyder Date: Thu, 15 Feb 2018 13:00:51 -0800 Subject: [PATCH 19/19] pep8 --- tests/unit/test_Pipeline.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_Pipeline.py b/tests/unit/test_Pipeline.py index cff45c9..7f9e5f4 100644 --- a/tests/unit/test_Pipeline.py +++ b/tests/unit/test_Pipeline.py @@ -204,7 +204,7 @@ def test_addLinks(self, pipeline): pipeline.addLinks() def test_transferLinks(self, pipeline): - pipeline.view['id'] = [1,2,3] + pipeline.view['id'] = [1, 2, 3] pipeline.view['spanishWords'] = None pipeline.view['serbianWords'] = None pipeline.keyCol = 'id' @@ -212,7 +212,7 @@ def test_transferLinks(self, pipeline): pipeline.transferLinks() assert all(pipeline.view['spanishWords'] == ['quien', 'que', 'donde']) assert all(pipeline.view['serbianWords'] == ['ко', 'Шта', 'где']) - assert 'id' not in pipeline.view # by default we drop the `on` column + assert 'id' not in pipeline.view # by default we drop the `on` column class TestKey(object): @@ -301,6 +301,7 @@ def test_createFileView(self, pipeline): assert any([pd.notnull(v) for v in published_view['preexistingAnnotation']]) + class TestBackup(object): def pipeline(self, genericPipeline): return genericPipeline