From e919457d2113ad88b1364c3dc6689d38067d5d91 Mon Sep 17 00:00:00 2001 From: Justin Simpson Date: Thu, 3 Aug 2017 21:04:35 -0700 Subject: [PATCH] Adding tests for package.extract_file fixes #221 This commit adds 4 tests and fixes to make the tests pass. There are 2 specific bugs that this commit fixes: 1) when calling extract_file with a relative_path (i.e request to extract a single file, not an aip) on an uncompressed aip, a single file is now returned. Previously the entire aip was returned 2) when calling extract_file with a relative_path and the requested file does not exist, an error is raised. --- .../locations/fixtures/package.json | 20 +++++++- .../locations/fixtures/working_bag.7z | Bin 0 -> 595 bytes storage_service/locations/models/package.py | 32 ++++++++----- .../locations/tests/test_package.py | 43 ++++++++++++++++++ 4 files changed, 83 insertions(+), 12 deletions(-) create mode 100644 storage_service/locations/fixtures/working_bag.7z diff --git a/storage_service/locations/fixtures/package.json b/storage_service/locations/fixtures/package.json index 01dec828a..c01ddff29 100644 --- a/storage_service/locations/fixtures/package.json +++ b/storage_service/locations/fixtures/package.json @@ -114,7 +114,25 @@ "status": "Uploaded", "misc_attributes": "{}" } -},{ +}, +{ + "pk": 7, + "model": "locations.package", + "fields": { + "uuid": "88deec53-c7dc-4828-865c-7356386e9399", + "description": "Small bagged 7zipped package", + "origin_pipeline": null, + "current_location": "615103f0-0ee0-4a12-ba17-43192d1143ea", + "current_path": "working_bag.7z", + "pointer_file_location": "", + "pointer_file_path": "", + "size": 595, + "package_type": "AIP", + "status": "Uploaded", + "misc_attributes": "{}" + } +}, +{ "pk": 1, "model": "locations.file", "fields": { diff --git a/storage_service/locations/fixtures/working_bag.7z b/storage_service/locations/fixtures/working_bag.7z new file mode 100644 index 0000000000000000000000000000000000000000..4556d1579852f0d4196a909a7fd23ef511cb0d25 GIT binary patch literal 595 zcmV-Z0<8Tvdc3bE8~_8hfX8_d0ssI20000Z000000001ve-m;-T4*^jL0KkKS!-`> zN&o;(Uw{A*Km-5vKOoNlKj+@yFaZf_5j{z$(-UPjQM71{G&Io0iJ+=}hK&^Tz^AAT zfB*r5RX-w3nAFCnq|?+4o+tn|l3dGj(xegQH{gM6YFiy~Uzofk0-?TOC&+bZ=dEjN ztEatlTN6^{vaY)OTj36gm1O8P2rL{bLk2EEg>6aTg)9n0&})ep z!VEA`@@htq_%+n+8?}gtj%)-X0g2&(kh^CUS!HdBH&O^0kJdMP%UNrh+nz&SE~f|@ z8syu9^T;ZO=BA>cxNMBoakrl)K;t^lB$SEFjWkYiF@VS<8l*l~j!4JUQ{ZwW$&$O* z6a}0nJk?WO^hDNHw>% zB>(^bfinlL57jwjme(ZH{<4>P&agw-SinDq=ex&H70z~flQ6~Jzb`y~acK7km}g-K z@33c6)wkB^*0)k3t-^tfbG_?xdOp@lw28qndXE*uPaLsH9`{%F2YIrFtQu?wpp4n& zJr-1rSMwm{IGjg$?lp-UjJ8JAG~W3m92;q&Eo=$E5C8xSf?Ns#4Xo7?007!W20Z`( literal 0 HcmV?d00001 diff --git a/storage_service/locations/models/package.py b/storage_service/locations/models/package.py index 2780c3e1b..e6091a622 100644 --- a/storage_service/locations/models/package.py +++ b/storage_service/locations/models/package.py @@ -484,11 +484,12 @@ def extract_file(self, relative_path='', extract_path=None): Returns path to the extracted file and a temp dir that needs to be deleted. """ - if extract_path is None: - ss_internal = Location.active.get(purpose=Location.STORAGE_SERVICE_INTERNAL) - extract_path = tempfile.mkdtemp(dir=ss_internal.full_path) + ss_internal = Location.active.get(purpose=Location.STORAGE_SERVICE_INTERNAL) full_path = self.fetch_local_path() + if extract_path is None: + extract_path = tempfile.mkdtemp(dir=ss_internal.full_path) + # The basename is the base directory containing a package # like an AIP inside the compressed file. try: @@ -527,17 +528,26 @@ def extract_file(self, relative_path='', extract_path=None): if relative_path: command.append(relative_path) LOGGER.info('Extracting file with: %s to %s', command, output_path) - rc = subprocess.call(command) - LOGGER.debug('Extract file RC: %s', rc) - if rc: + rc = subprocess.check_output(command) + if 'No files extracted' in rc: raise StorageException(_('Extraction error')) else: - LOGGER.info('Copying AIP from: %s to %s', full_path, output_path) - shutil.copytree(full_path, output_path) + if relative_path: + #copy only one file out of aip + head, tail = os.path.split(full_path) + src = os.path.join(head, relative_path) + os.mkdir(os.path.join(extract_path, basename)) + shutil.copy(src, output_path) + else: + src = full_path + shutil.copytree(full_path, output_path) + + LOGGER.info('Copying from: %s to %s', src, output_path) + - if not relative_path: - self.local_path_location = ss_internal - self.local_path = output_path + # if not relative_path: + # self.local_path_location = ss_internal + # self.local_path = output_path return (output_path, extract_path) diff --git a/storage_service/locations/tests/test_package.py b/storage_service/locations/tests/test_package.py index df5baaa1d..ec24d890f 100644 --- a/storage_service/locations/tests/test_package.py +++ b/storage_service/locations/tests/test_package.py @@ -1,4 +1,7 @@ import os +import pytest +import shutil +import tempfile import vcr from django.test import TestCase @@ -25,6 +28,11 @@ def setUp(self): # Arkivum space points at fixtures directory models.Space.objects.filter(uuid='6fb34c82-4222-425e-b0ea-30acfd31f52e').update(path=FIXTURES_DIR) + self.tmp_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.tmp_dir) + def test_parsing_mets_data(self): mets_data = self.package._parse_mets(prefix=self.mets_path) assert mets_data['transfer_uuid'] == 'de1b31fa-97dd-48e0-8417-03be78359531' @@ -134,3 +142,38 @@ def test_fixity_force_local(self): assert failures == [] assert message == '' assert timestamp is None + + def test_extract_file_aip_from_uncompressed_aip(self): + """ It should return an aip """ + package = models.Package.objects.get(uuid='0d4e739b-bf60-4b87-bc20-67a379b28cea') + basedir = package.get_base_directory() + output_path, extract_path = package.extract_file(extract_path=self.tmp_dir) + assert output_path==os.path.join(self.tmp_dir, basedir) + assert os.path.join(output_path, 'manifest-md5.txt') + + def test_extract_file_file_from_uncompressed_aip(self): + """ It should return a single file from an uncompressed aip """ + package = models.Package.objects.get(uuid='0d4e739b-bf60-4b87-bc20-67a379b28cea') + basedir = package.get_base_directory() + output_path, extract_path = package.extract_file(relative_path='working_bag/manifest-md5.txt', extract_path=self.tmp_dir) + assert output_path == os.path.join(self.tmp_dir, basedir, 'manifest-md5.txt') + assert os.path.isfile(output_path) + + def test_extract_file_file_from_compressed_aip(self): + """ It should return a single file from a 7zip compressed aip """ + package = models.Package.objects.get(uuid='88deec53-c7dc-4828-865c-7356386e9399') + basedir = package.get_base_directory() + output_path, extract_path = package.extract_file(relative_path='working_bag/manifest-md5.txt', extract_path=self.tmp_dir) + assert output_path == os.path.join(extract_path, basedir, 'manifest-md5.txt') + assert os.path.isfile(output_path) + + def test_extract_file_file_does_not_exist_compressed(self): + """ It should raise an error because the requested file does not exist""" + package = models.Package.objects.get(uuid='88deec53-c7dc-4828-865c-7356386e9399') + with pytest.raises(Exception) as e_info: + output_path, extract_path = package.extract_file(relative_path='working_bag/manifest-sha512.txt', extract_path=self.tmp_dir) + + assert e_info.value.message == 'Extraction error' + + +