From 8aaafb2c0f42e0b4947a06bb15c944e414907975 Mon Sep 17 00:00:00 2001 From: Addison Schiller Date: Mon, 13 Nov 2017 10:56:39 -0500 Subject: [PATCH 1/3] Look for Dataverse renamed files on upload Dataverse 'ingests' certain file types. These file types get renamed. In upload when Waterbutler tries to find the correct metadata to return, it will 500 since it was not looking for the renamed file. --- tests/providers/dataverse/fixtures.py | 9 +++ .../dataverse/fixtures/root_provider.json | 16 +++++ tests/providers/dataverse/test_metadata.py | 27 ++++++++ tests/providers/dataverse/test_utils.py | 55 +++++++++++++++ waterbutler/providers/dataverse/metadata.py | 15 ++++ waterbutler/providers/dataverse/provider.py | 3 +- waterbutler/providers/dataverse/utils.py | 68 +++++++++++++++++++ 7 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 tests/providers/dataverse/test_utils.py create mode 100644 waterbutler/providers/dataverse/utils.py diff --git a/tests/providers/dataverse/fixtures.py b/tests/providers/dataverse/fixtures.py index 94eef05bb..a3bb06181 100644 --- a/tests/providers/dataverse/fixtures.py +++ b/tests/providers/dataverse/fixtures.py @@ -31,6 +31,7 @@ def settings(): 'name': 'A look at wizards', } + @pytest.fixture def native_file_metadata(): with open(os.path.join(os.path.dirname(__file__), 'fixtures/root_provider.json'), 'r') as fp: @@ -65,12 +66,20 @@ def dataset_metadata_object(): 'Dataset Test Version' ) + @pytest.fixture def file_metadata_object(): with open(os.path.join(os.path.dirname(__file__), 'fixtures/root_provider.json'), 'r') as fp: return DataverseFileMetadata(json.load(fp)['native_file_metadata']['datafile'], 'latest') +@pytest.fixture +def csv_file_metadata_object(): + with open(os.path.join(os.path.dirname(__file__), 'fixtures/root_provider.json'), 'r') as fp: + return DataverseFileMetadata(json.load(fp)['csv_native_file_metadata']['datafile'], + 'latest') + + @pytest.fixture def revision_metadata_object(): return DataverseRevision('Test Dataset Verision') diff --git a/tests/providers/dataverse/fixtures/root_provider.json b/tests/providers/dataverse/fixtures/root_provider.json index 3fd461287..850753ee5 100644 --- a/tests/providers/dataverse/fixtures/root_provider.json +++ b/tests/providers/dataverse/fixtures/root_provider.json @@ -258,6 +258,22 @@ "label":"thefile.txt", "version":1 }, + "csv_native_file_metadata":{ + "datafile":{ + "contentType":"text/tab-separated-values", + "description":"", + "filename":"%2Fusr%2Flocal%2Fglassfish4%2Fglassfish%2Fdomains%2Fdomain1%2Ffiles%2F10.5072%2FFK2%2F232XYH%2F14c7a73d734-8383551cc713", + "id":20, + "md5":"6b50249f91258397fc5cb7d5a4127e15", + "name":"thefile.tab", + "originalFormatLabel":"Comma Separated Values", + "originalFileFormat": "text/csv" + }, + "datasetVersionId":5, + "description":"", + "label":"thefile.tab", + "version":1 + }, "checksum_mismatch_dataset_metadata":{ "data":{ "createTime":"2015-04-02T13:21:59Z", diff --git a/tests/providers/dataverse/test_metadata.py b/tests/providers/dataverse/test_metadata.py index ccb139087..fd31b8419 100644 --- a/tests/providers/dataverse/test_metadata.py +++ b/tests/providers/dataverse/test_metadata.py @@ -2,9 +2,11 @@ from tests.providers.dataverse.fixtures import ( dataset_metadata_object, revision_metadata_object, + csv_file_metadata_object, file_metadata_object ) + class TestDatasetMetadata: def test_dataset_metadata(self, dataset_metadata_object): @@ -45,6 +47,7 @@ def test_file_metadata(self, file_metadata_object): assert not file_metadata_object.created_utc assert file_metadata_object.content_type == 'text/plain; charset=US-ASCII' assert file_metadata_object.etag == 'latest::20' + assert file_metadata_object.original_name == 'thefile.txt' assert file_metadata_object.extra == { 'fileId': '20', 'datasetVersion': 'latest', @@ -53,3 +56,27 @@ def test_file_metadata(self, file_metadata_object): 'md5': '6b50249f91258397fc5cb7d5a4127e15', }, } + + def test_csv_file_metadata(self, csv_file_metadata_object): + assert csv_file_metadata_object.is_file + assert not csv_file_metadata_object.is_folder + assert csv_file_metadata_object.provider == 'dataverse' + assert csv_file_metadata_object.kind == 'file' + assert csv_file_metadata_object.file_id == '20' + assert csv_file_metadata_object.name == 'thefile.tab' + assert csv_file_metadata_object.path == '/20' + assert csv_file_metadata_object.materialized_path == '/thefile.tab' + assert not csv_file_metadata_object.size + assert not csv_file_metadata_object.modified + assert not csv_file_metadata_object.created_utc + assert csv_file_metadata_object.content_type == 'text/tab-separated-values' + assert csv_file_metadata_object.etag == 'latest::20' + assert csv_file_metadata_object.original_name == 'thefile.csv' + assert csv_file_metadata_object.extra == { + 'fileId': '20', + 'datasetVersion': 'latest', + 'hasPublishedVersion': False, + 'hashes': { + 'md5': '6b50249f91258397fc5cb7d5a4127e15', + }, + } diff --git a/tests/providers/dataverse/test_utils.py b/tests/providers/dataverse/test_utils.py new file mode 100644 index 000000000..1d4846b74 --- /dev/null +++ b/tests/providers/dataverse/test_utils.py @@ -0,0 +1,55 @@ +import pytest + +from waterbutler.providers.dataverse import utils as dv_utils + + +@pytest.fixture +def format_dict(): + return { + 'xlsx': { + 'originalFileFormat': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'originalFormatLabel': 'MS Excel (XLSX)', + 'contentType': 'text/tab-separated-values', + + }, + 'RData': { + 'originalFileFormat': 'application/x-rlang-transport', + 'originalFormatLabel': 'R Data', + 'contentType': 'text/tab-separated-values' + + }, + 'sav': { + 'originalFileFormat': 'application/x-spss-sav', + 'originalFormatLabel': 'SPSS SAV', + 'contentType': 'text/tab-separated-values' + }, + 'dta': { + 'originalFileFormat': 'application/x-stata', + 'originalFormatLabel': 'Stata Binary', + 'contentType': 'text/tab-separated-values' + + }, + 'por': { + 'originalFileFormat': 'application/x-spss-por', + 'originalFormatLabel': 'SPSS Portable', + 'contentType': 'text/tab-separated-values' + + }, + 'csv': { + 'originalFileFormat': 'text/csv', + 'originalFormatLabel': 'Comma Separated Values', + 'contentType': 'text/tab-separated-values' + } + } + + +class TestUtils: + + def test_original_ext_from_raw_metadata(self, format_dict): + for key in format_dict: + assert key == dv_utils.original_ext_from_raw_metadata(format_dict[key]) + + def test_original_ext_from_raw_metadata_none_case(self, format_dict): + for key in format_dict: + format_dict[key]['originalFormatLabel'] = 'blarg' + assert dv_utils.original_ext_from_raw_metadata(format_dict[key]) is None diff --git a/waterbutler/providers/dataverse/metadata.py b/waterbutler/providers/dataverse/metadata.py index 125325dbf..5047a6130 100644 --- a/waterbutler/providers/dataverse/metadata.py +++ b/waterbutler/providers/dataverse/metadata.py @@ -1,4 +1,5 @@ from waterbutler.core import metadata +from waterbutler.providers.dataverse import utils as dv_utils class BaseDataverseMetadata(metadata.BaseMetadata): @@ -26,6 +27,20 @@ def file_id(self): def name(self): return self.raw.get('name', None) or self.raw.get('filename', None) + @property + def original_name(self): + """ Dataverse 'ingests' some files types. This changes their extension. + This property will look through the metadata to try to determine the original + name of the file. + """ + + ext = dv_utils.original_ext_from_raw_metadata(self.raw) + if ext is None: + return self.name + else: + name = self.name[:self.name.rfind('.')] + return name + '.{}'.format(ext) + @property def path(self): return self.build_path(self.file_id) diff --git a/waterbutler/providers/dataverse/provider.py b/waterbutler/providers/dataverse/provider.py index eddaed0b9..82587eda1 100644 --- a/waterbutler/providers/dataverse/provider.py +++ b/waterbutler/providers/dataverse/provider.py @@ -178,7 +178,8 @@ async def upload(self, stream, path, **kwargs): # Find appropriate version of file metadata = await self._get_data('latest') files = metadata if isinstance(metadata, list) else [] - file_metadata = next(file for file in files if file.name == path.name) + file_metadata = next(file for file in files if file.name == path.name or + file.original_name == path.name) if stream.writers['md5'].hexdigest != file_metadata.extra['hashes']['md5']: raise exceptions.UploadChecksumMismatchError() diff --git a/waterbutler/providers/dataverse/utils.py b/waterbutler/providers/dataverse/utils.py new file mode 100644 index 000000000..f2d7651c8 --- /dev/null +++ b/waterbutler/providers/dataverse/utils.py @@ -0,0 +1,68 @@ +ORIGINAL_FORMATS = { + 'xlsx': { + 'original_format': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'original_label': 'MS Excel (XLSX)', + 'content_type': 'text/tab-separated-values', + + }, + # Rdata can come in a few different forms, so just list all of them here + 'RData': { + 'original_format': 'application/x-rlang-transport', + 'original_label': 'R Data', + 'content_type': 'text/tab-separated-values' + + }, + 'rdata': { + 'original_format': 'application/x-rlang-transport', + 'original_label': 'R Data', + 'content_type': 'text/tab-separated-values' + + }, + 'Rdata': { + 'original_format': 'application/x-rlang-transport', + 'original_label': 'R Data', + 'content_type': 'text/tab-separated-values' + + }, + 'sav': { + 'original_format': 'application/x-spss-sav', + 'original_label': 'SPSS SAV', + 'content_type': 'text/tab-separated-values' + }, + 'dta': { + 'original_format': 'application/x-stata', + 'original_label': 'Stata Binary', + 'content_type': 'text/tab-separated-values' + + }, + 'por': { + 'original_format': 'application/x-spss-por', + 'original_label': 'SPSS Portable', + 'content_type': 'text/tab-separated-values' + + }, + 'csv': { + 'original_format': 'text/csv', + 'original_label': 'Comma Separated Values', + 'content_type': 'text/tab-separated-values' + } +} + + +def original_ext_from_raw_metadata(data): + """Use the raw metadata to figure out the original extension.""" + label = data.get('originalFormatLabel', None) + file_format = data.get('originalFileFormat', None) + content_type = data.get('contentType', None) + + if not label or not file_format or not content_type: + return None + + for key in ORIGINAL_FORMATS: + if (label == ORIGINAL_FORMATS[key]['original_label'] and + file_format == ORIGINAL_FORMATS[key]['original_format'] and + content_type == ORIGINAL_FORMATS[key]['content_type']): + + return key + + return None From c80368466b22d692f1028e899398e5d4873a84b8 Mon Sep 17 00:00:00 2001 From: Addison Schiller Date: Tue, 21 Nov 2017 13:43:35 -0500 Subject: [PATCH 2/3] Response to CR --- tests/providers/dataverse/test_metadata.py | 6 ++- tests/providers/dataverse/test_provider.py | 27 +++++++++++ tests/providers/dataverse/test_utils.py | 6 +-- waterbutler/providers/dataverse/exceptions.py | 15 ++++++ waterbutler/providers/dataverse/metadata.py | 19 ++++---- waterbutler/providers/dataverse/provider.py | 17 +++++-- waterbutler/providers/dataverse/utils.py | 46 ++++++++----------- 7 files changed, 90 insertions(+), 46 deletions(-) create mode 100644 waterbutler/providers/dataverse/exceptions.py diff --git a/tests/providers/dataverse/test_metadata.py b/tests/providers/dataverse/test_metadata.py index fd31b8419..44ff6a384 100644 --- a/tests/providers/dataverse/test_metadata.py +++ b/tests/providers/dataverse/test_metadata.py @@ -47,7 +47,7 @@ def test_file_metadata(self, file_metadata_object): assert not file_metadata_object.created_utc assert file_metadata_object.content_type == 'text/plain; charset=US-ASCII' assert file_metadata_object.etag == 'latest::20' - assert file_metadata_object.original_name == 'thefile.txt' + assert file_metadata_object.original_names == ['thefile.txt'] assert file_metadata_object.extra == { 'fileId': '20', 'datasetVersion': 'latest', @@ -71,7 +71,9 @@ def test_csv_file_metadata(self, csv_file_metadata_object): assert not csv_file_metadata_object.created_utc assert csv_file_metadata_object.content_type == 'text/tab-separated-values' assert csv_file_metadata_object.etag == 'latest::20' - assert csv_file_metadata_object.original_name == 'thefile.csv' + names = csv_file_metadata_object.original_names + assert 'thefile.csv' in names + assert 'thefile.CSV' in names assert csv_file_metadata_object.extra == { 'fileId': '20', 'datasetVersion': 'latest', diff --git a/tests/providers/dataverse/test_provider.py b/tests/providers/dataverse/test_provider.py index 141abe45c..01e4c6164 100644 --- a/tests/providers/dataverse/test_provider.py +++ b/tests/providers/dataverse/test_provider.py @@ -11,6 +11,7 @@ from waterbutler.core.path import WaterButlerPath from waterbutler.providers.dataverse import settings as dvs from waterbutler.providers.dataverse import DataverseProvider +from waterbutler.providers.dataverse.exceptions import DataverseIngestionLockError from waterbutler.providers.dataverse.metadata import DataverseFileMetadata, DataverseRevision from tests.providers.dataverse.fixtures import ( @@ -235,6 +236,32 @@ async def test_upload_create(self, provider, file_stream, native_file_metadata, assert aiohttpretty.has_call(method='GET', uri=latest_url) assert aiohttpretty.has_call(method='GET', uri=latest_published_url) + @pytest.mark.asyncio + @pytest.mark.aiohttpretty + async def test_upload_ingestion_exception(self, provider, file_stream, native_file_metadata, + empty_native_dataset_metadata, native_dataset_metadata): + path = WaterButlerPath('/thefile.txt') + url = provider.build_url(dvs.EDIT_MEDIA_BASE_URL, 'study', provider.doi) + aiohttpretty.register_uri('POST', url, status=400, body=b'something dataset lock: Ingest') + + with pytest.raises(DataverseIngestionLockError): + await provider.upload(file_stream, path) + + assert aiohttpretty.has_call(method='POST', uri=url) + + @pytest.mark.asyncio + @pytest.mark.aiohttpretty + async def test_upload_random_exception(self, provider, file_stream, native_file_metadata, + empty_native_dataset_metadata, native_dataset_metadata): + path = WaterButlerPath('/thefile.txt') + url = provider.build_url(dvs.EDIT_MEDIA_BASE_URL, 'study', provider.doi) + aiohttpretty.register_uri('POST', url, status=400, body=b'something something error') + + with pytest.raises(exceptions.UploadError): + await provider.upload(file_stream, path) + + assert aiohttpretty.has_call(method='POST', uri=url) + @pytest.mark.asyncio @pytest.mark.aiohttpretty async def test_upload_updates(self, provider, diff --git a/tests/providers/dataverse/test_utils.py b/tests/providers/dataverse/test_utils.py index 1d4846b74..f7c572b83 100644 --- a/tests/providers/dataverse/test_utils.py +++ b/tests/providers/dataverse/test_utils.py @@ -10,13 +10,11 @@ def format_dict(): 'originalFileFormat': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'originalFormatLabel': 'MS Excel (XLSX)', 'contentType': 'text/tab-separated-values', - }, 'RData': { 'originalFileFormat': 'application/x-rlang-transport', 'originalFormatLabel': 'R Data', 'contentType': 'text/tab-separated-values' - }, 'sav': { 'originalFileFormat': 'application/x-spss-sav', @@ -27,13 +25,11 @@ def format_dict(): 'originalFileFormat': 'application/x-stata', 'originalFormatLabel': 'Stata Binary', 'contentType': 'text/tab-separated-values' - }, 'por': { 'originalFileFormat': 'application/x-spss-por', 'originalFormatLabel': 'SPSS Portable', 'contentType': 'text/tab-separated-values' - }, 'csv': { 'originalFileFormat': 'text/csv', @@ -47,7 +43,7 @@ class TestUtils: def test_original_ext_from_raw_metadata(self, format_dict): for key in format_dict: - assert key == dv_utils.original_ext_from_raw_metadata(format_dict[key]) + assert key in dv_utils.original_ext_from_raw_metadata(format_dict[key]) def test_original_ext_from_raw_metadata_none_case(self, format_dict): for key in format_dict: diff --git a/waterbutler/providers/dataverse/exceptions.py b/waterbutler/providers/dataverse/exceptions.py new file mode 100644 index 000000000..b41364b7f --- /dev/null +++ b/waterbutler/providers/dataverse/exceptions.py @@ -0,0 +1,15 @@ +from http import HTTPStatus + +from waterbutler.core.exceptions import UploadError + + +class DataverseIngestionLockError(UploadError): + def __init__(self, message, code=HTTPStatus.BAD_REQUEST): + """``dummy`` argument is because children of ``WaterButlerError`` must be instantiable with + a single integer argument. See :class:`waterbutler.core.exceptions.WaterButlerError` + for details. + """ + super().__init__( + 'Some uploads to Dataverse will lock uploading for a time. Please wait' + ' a few seconds and try again.', + code=code) diff --git a/waterbutler/providers/dataverse/metadata.py b/waterbutler/providers/dataverse/metadata.py index 5047a6130..beafe3f6b 100644 --- a/waterbutler/providers/dataverse/metadata.py +++ b/waterbutler/providers/dataverse/metadata.py @@ -28,18 +28,21 @@ def name(self): return self.raw.get('name', None) or self.raw.get('filename', None) @property - def original_name(self): + def original_names(self): """ Dataverse 'ingests' some files types. This changes their extension. - This property will look through the metadata to try to determine the original - name of the file. + This property will look through the metadata to try to determine possible + original names of the file. """ - ext = dv_utils.original_ext_from_raw_metadata(self.raw) - if ext is None: - return self.name + extensions = dv_utils.original_ext_from_raw_metadata(self.raw) + if extensions is None: + return [self.name] else: - name = self.name[:self.name.rfind('.')] - return name + '.{}'.format(ext) + names = [] + for ext in extensions: + name = self.name[:self.name.rfind('.')] + names.append(name + '.{}'.format(ext)) + return names @property def path(self): diff --git a/waterbutler/providers/dataverse/provider.py b/waterbutler/providers/dataverse/provider.py index 82587eda1..30d6165fa 100644 --- a/waterbutler/providers/dataverse/provider.py +++ b/waterbutler/providers/dataverse/provider.py @@ -11,6 +11,7 @@ from waterbutler.providers.dataverse import settings from waterbutler.providers.dataverse.metadata import DataverseRevision from waterbutler.providers.dataverse.metadata import DataverseDatasetMetadata +from waterbutler.providers.dataverse.exceptions import DataverseIngestionLockError class DataverseProvider(provider.BaseProvider): @@ -170,16 +171,26 @@ async def upload(self, stream, path, **kwargs): headers=dv_headers, auth=(self.token, ), data=file_stream, - expects=(201, ), + expects=(201, 400,), throws=exceptions.UploadError ) + + if resp.status == 400: + data = await resp.read() + data = data.decode('utf-8') + + if 'dataset lock: Ingest' in data: + raise DataverseIngestionLockError({'response': data}) + else: + raise (await exceptions.exception_from_response(resp, + error=exceptions.UploadError)) await resp.release() # Find appropriate version of file metadata = await self._get_data('latest') files = metadata if isinstance(metadata, list) else [] - file_metadata = next(file for file in files if file.name == path.name or - file.original_name == path.name) + file_metadata = next(file for file in files if (file.name == path.name or + path.name in file.original_names)) if stream.writers['md5'].hexdigest != file_metadata.extra['hashes']['md5']: raise exceptions.UploadChecksumMismatchError() diff --git a/waterbutler/providers/dataverse/utils.py b/waterbutler/providers/dataverse/utils.py index f2d7651c8..4abdb871e 100644 --- a/waterbutler/providers/dataverse/utils.py +++ b/waterbutler/providers/dataverse/utils.py @@ -1,56 +1,46 @@ ORIGINAL_FORMATS = { - 'xlsx': { - 'original_format': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', - 'original_label': 'MS Excel (XLSX)', - 'content_type': 'text/tab-separated-values', - }, - # Rdata can come in a few different forms, so just list all of them here 'RData': { 'original_format': 'application/x-rlang-transport', 'original_label': 'R Data', - 'content_type': 'text/tab-separated-values' - - }, - 'rdata': { - 'original_format': 'application/x-rlang-transport', - 'original_label': 'R Data', - 'content_type': 'text/tab-separated-values' - - }, - 'Rdata': { - 'original_format': 'application/x-rlang-transport', - 'original_label': 'R Data', - 'content_type': 'text/tab-separated-values' - + 'content_type': 'text/tab-separated-values', + 'all_extensions': ['rdata', 'Rdata', 'RData'] }, 'sav': { 'original_format': 'application/x-spss-sav', 'original_label': 'SPSS SAV', - 'content_type': 'text/tab-separated-values' + 'content_type': 'text/tab-separated-values', + 'all_extensions': ['sav'] }, 'dta': { 'original_format': 'application/x-stata', 'original_label': 'Stata Binary', - 'content_type': 'text/tab-separated-values' - + 'content_type': 'text/tab-separated-values', + 'all_extensions': ['dta'] }, 'por': { 'original_format': 'application/x-spss-por', 'original_label': 'SPSS Portable', - 'content_type': 'text/tab-separated-values' - + 'content_type': 'text/tab-separated-values', + 'all_extensions': ['por'] }, 'csv': { 'original_format': 'text/csv', 'original_label': 'Comma Separated Values', - 'content_type': 'text/tab-separated-values' + 'content_type': 'text/tab-separated-values', + 'all_extensions': ['csv', 'CSV'] + }, + 'xlsx': { + 'original_format': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'original_label': 'MS Excel (XLSX)', + 'content_type': 'text/tab-separated-values', + 'all_extensions': ['xlsx'] } } def original_ext_from_raw_metadata(data): - """Use the raw metadata to figure out the original extension.""" + """Use the raw metadata to figure out possible original extensions.""" label = data.get('originalFormatLabel', None) file_format = data.get('originalFileFormat', None) content_type = data.get('contentType', None) @@ -63,6 +53,6 @@ def original_ext_from_raw_metadata(data): file_format == ORIGINAL_FORMATS[key]['original_format'] and content_type == ORIGINAL_FORMATS[key]['content_type']): - return key + return ORIGINAL_FORMATS[key]['all_extensions'] return None From 9f0cdbbefe4791b8e487390a0784d51b6da260dd Mon Sep 17 00:00:00 2001 From: Addison Schiller Date: Thu, 30 Nov 2017 10:02:24 -0500 Subject: [PATCH 3/3] Use .items over dict lookups --- tests/providers/dataverse/test_utils.py | 6 +++--- waterbutler/providers/dataverse/utils.py | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/providers/dataverse/test_utils.py b/tests/providers/dataverse/test_utils.py index f7c572b83..0566d5e14 100644 --- a/tests/providers/dataverse/test_utils.py +++ b/tests/providers/dataverse/test_utils.py @@ -46,6 +46,6 @@ def test_original_ext_from_raw_metadata(self, format_dict): assert key in dv_utils.original_ext_from_raw_metadata(format_dict[key]) def test_original_ext_from_raw_metadata_none_case(self, format_dict): - for key in format_dict: - format_dict[key]['originalFormatLabel'] = 'blarg' - assert dv_utils.original_ext_from_raw_metadata(format_dict[key]) is None + for key, ext in format_dict.items(): + ext['originalFormatLabel'] = 'blarg' + assert dv_utils.original_ext_from_raw_metadata(ext) is None diff --git a/waterbutler/providers/dataverse/utils.py b/waterbutler/providers/dataverse/utils.py index 4abdb871e..e6db4148a 100644 --- a/waterbutler/providers/dataverse/utils.py +++ b/waterbutler/providers/dataverse/utils.py @@ -48,11 +48,11 @@ def original_ext_from_raw_metadata(data): if not label or not file_format or not content_type: return None - for key in ORIGINAL_FORMATS: - if (label == ORIGINAL_FORMATS[key]['original_label'] and - file_format == ORIGINAL_FORMATS[key]['original_format'] and - content_type == ORIGINAL_FORMATS[key]['content_type']): + for key, ext in ORIGINAL_FORMATS.items(): + if (label == ext['original_label'] and + file_format == ext['original_format'] and + content_type == ext['content_type']): - return ORIGINAL_FORMATS[key]['all_extensions'] + return ext['all_extensions'] return None