Skip to content

Commit

Permalink
Adding tests for package.extract_file
Browse files Browse the repository at this point in the history
fixes #221

This commit adds 4 tests and fixes to make the tests pass.

There are 2 specific bugs that this commit fixes:

1) when calling extract_file with a relative_path (i.e request
to extract a single file, not an aip)  on an
uncompressed aip, a single file is now returned.  Previously the
entire aip was returned

2) when calling extract_file with a relative_path and the requested
file does not exist, an error is raised.
  • Loading branch information
jhsimpson committed Sep 5, 2017
1 parent 66a8036 commit 1d1f7bb
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 11 deletions.
20 changes: 19 additions & 1 deletion storage_service/locations/fixtures/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,25 @@
"status": "Uploaded",
"misc_attributes": "{}"
}
},{
},
{
"pk": 7,
"model": "locations.package",
"fields": {
"uuid": "88deec53-c7dc-4828-865c-7356386e9399",
"description": "Small bagged 7zipped package",
"origin_pipeline": null,
"current_location": "615103f0-0ee0-4a12-ba17-43192d1143ea",
"current_path": "working_bag.7z",
"pointer_file_location": "",
"pointer_file_path": "",
"size": 595,
"package_type": "AIP",
"status": "Uploaded",
"misc_attributes": "{}"
}
},
{
"pk": 1,
"model": "locations.file",
"fields": {
Expand Down
Binary file added storage_service/locations/fixtures/working_bag.7z
Binary file not shown.
25 changes: 15 additions & 10 deletions storage_service/locations/models/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,10 +510,11 @@ def extract_file(self, relative_path='', extract_path=None):
Returns path to the extracted file and a temp dir that needs to be
deleted.
"""
ss_internal = Location.active.get(purpose=Location.STORAGE_SERVICE_INTERNAL)
full_path = self.fetch_local_path()

if extract_path is None:
ss_internal = Location.active.get(purpose=Location.STORAGE_SERVICE_INTERNAL)
extract_path = tempfile.mkdtemp(dir=ss_internal.full_path)
full_path = self.fetch_local_path()

# The basename is the base directory containing a package
# like an AIP inside the compressed file.
Expand Down Expand Up @@ -553,17 +554,21 @@ def extract_file(self, relative_path='', extract_path=None):
if relative_path:
command.append(relative_path)
LOGGER.info('Extracting file with: %s to %s', command, output_path)
rc = subprocess.call(command)
LOGGER.debug('Extract file RC: %s', rc)
if rc:
rc = subprocess.check_output(command)
if 'No files extracted' in rc:
raise StorageException(_('Extraction error'))
else:
LOGGER.info('Copying AIP from: %s to %s', full_path, output_path)
shutil.copytree(full_path, output_path)
if relative_path:
# copy only one file out of aip
head, tail = os.path.split(full_path)
src = os.path.join(head, relative_path)
os.mkdir(os.path.join(extract_path, basename))
shutil.copy(src, output_path)
else:
src = full_path
shutil.copytree(full_path, output_path)

if not relative_path:
self.local_path_location = ss_internal
self.local_path = output_path
LOGGER.info('Copying from: %s to %s', src, output_path)

return (output_path, extract_path)

Expand Down
48 changes: 48 additions & 0 deletions storage_service/locations/tests/test_package.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import os
import pytest
import shutil
import tempfile
import vcr

from django.test import TestCase
Expand All @@ -25,6 +28,11 @@ def setUp(self):
# Arkivum space points at fixtures directory
models.Space.objects.filter(uuid='6fb34c82-4222-425e-b0ea-30acfd31f52e').update(path=FIXTURES_DIR)

self.tmp_dir = tempfile.mkdtemp()

def tearDown(self):
shutil.rmtree(self.tmp_dir)

def test_parsing_mets_data(self):
mets_data = self.package._parse_mets(prefix=self.mets_path)
assert mets_data['transfer_uuid'] == 'de1b31fa-97dd-48e0-8417-03be78359531'
Expand Down Expand Up @@ -134,3 +142,43 @@ def test_fixity_force_local(self):
assert failures == []
assert message == ''
assert timestamp is None

def test_extract_file_aip_from_uncompressed_aip(self):
""" It should return an aip """
package = models.Package.objects.get(uuid='0d4e739b-bf60-4b87-bc20-67a379b28cea')
basedir = package.get_base_directory()
output_path, extract_path = package.extract_file(extract_path=self.tmp_dir)
assert output_path == os.path.join(self.tmp_dir, basedir)
assert os.path.join(output_path, 'manifest-md5.txt')

def test_extract_file_file_from_uncompressed_aip(self):
""" It should return a single file from an uncompressed aip """
package = models.Package.objects.get(uuid='0d4e739b-bf60-4b87-bc20-67a379b28cea')
basedir = package.get_base_directory()
output_path, extract_path = package.extract_file(relative_path='working_bag/manifest-md5.txt', extract_path=self.tmp_dir)
assert output_path == os.path.join(self.tmp_dir, basedir, 'manifest-md5.txt')
assert os.path.isfile(output_path)

def test_extract_file_file_from_compressed_aip(self):
""" It should return a single file from a 7zip compressed aip """
package = models.Package.objects.get(uuid='88deec53-c7dc-4828-865c-7356386e9399')
basedir = package.get_base_directory()
output_path, extract_path = package.extract_file(relative_path='working_bag/manifest-md5.txt', extract_path=self.tmp_dir)
assert output_path == os.path.join(extract_path, basedir, 'manifest-md5.txt')
assert os.path.isfile(output_path)

def test_extract_file_file_does_not_exist_compressed(self):
""" It should raise an error because the requested file does not exist"""
package = models.Package.objects.get(uuid='88deec53-c7dc-4828-865c-7356386e9399')
with pytest.raises(Exception) as e_info:
output_path, extract_path = package.extract_file(relative_path='working_bag/manifest-sha512.txt', extract_path=self.tmp_dir)

assert e_info.value.message == 'Extraction error'

def test_extract_file_aip_from_compressed_aip(self):
""" It should return an aip """
package = models.Package.objects.get(uuid='88deec53-c7dc-4828-865c-7356386e9399')
basedir = package.get_base_directory()
output_path, extract_path = package.extract_file(extract_path=self.tmp_dir)
assert output_path == os.path.join(self.tmp_dir, basedir)
assert os.path.join(output_path, 'manifest-md5.txt')

0 comments on commit 1d1f7bb

Please sign in to comment.