Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prototype: Create TAR space in storage service #574

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions storage_service/common/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from __future__ import absolute_import
from __future__ import unicode_literals
from collections import namedtuple
import os
import shutil
import subprocess
import tarfile

from six import StringIO
import mock
Expand All @@ -18,6 +23,9 @@
COMPRESS_ORDER_ONE = "1"
COMPRESS_ORDER_TWO = "2"

ExTarCase = namedtuple("ExTarCase", "path isdir raises expected")
CrTarCase = namedtuple("CrTarCase", "path isfile istar raises expected")


@pytest.mark.parametrize(
"pronom,algorithm,compression",
Expand Down Expand Up @@ -290,3 +298,91 @@ def test_package_is_file(package_path, is_file):
see in the storage service.
"""
assert utils.package_is_file(package_path) == is_file


@pytest.mark.parametrize(
"path, will_be_dir, sp_raises, expected",
[
ExTarCase(path="/a/b/c", isdir=True, raises=False, expected="success"),
ExTarCase(path="/a/b/d", isdir=False, raises=True, expected="fail"),
ExTarCase(path="/a/b/c", isdir=True, raises=True, expected="fail"),
],
)
def test_extract_tar(mocker, path, will_be_dir, sp_raises, expected):
if sp_raises:
mocker.patch.object(subprocess, "check_output", side_effect=OSError("gotcha!"))
else:
mocker.patch.object(subprocess, "check_output")
mocker.patch.object(os, "rename")
mocker.patch.object(os, "remove")
if will_be_dir:
mocker.patch.object(os.path, "isdir", return_value=True)
else:
mocker.patch.object(os.path, "isdir", return_value=False)
tarpath_ext = "{}.tar".format(path)
dirname = os.path.dirname(tarpath_ext)
if expected == "success":
ret = utils.extract_tar(path)
assert ret is None
os.remove.assert_called_once_with(tarpath_ext)
else:
with pytest.raises(utils.TARException) as excinfo:
ret = utils.extract_tar(path)
assert "Failed to extract {}: gotcha!".format(path) == str(excinfo.value)
os.rename.assert_any_call(tarpath_ext, path)
assert not os.remove.called
os.rename.assert_any_call(path, tarpath_ext)
subprocess.check_output.assert_called_once_with(
["tar", "-xf", tarpath_ext, "-C", dirname]
)


@pytest.mark.parametrize(
"path, will_be_file, will_be_tar, sp_raises, expected",
[
CrTarCase(
path="/a/b/c", isfile=True, istar=True, raises=False, expected="success"
),
CrTarCase(
path="/a/b/c/", isfile=True, istar=True, raises=False, expected="success"
),
CrTarCase(
path="/a/b/c", isfile=True, istar=False, raises=False, expected="fail"
),
CrTarCase(
path="/a/b/c", isfile=False, istar=True, raises=False, expected="fail"
),
CrTarCase(
path="/a/b/c", isfile=False, istar=False, raises=True, expected="fail"
),
],
)
def test_create_tar(mocker, path, will_be_file, will_be_tar, sp_raises, expected):
if sp_raises:
mocker.patch.object(subprocess, "check_output", side_effect=OSError("gotcha!"))
else:
mocker.patch.object(subprocess, "check_output")
mocker.patch.object(os.path, "isfile", return_value=will_be_file)
mocker.patch.object(tarfile, "is_tarfile", return_value=will_be_tar)
mocker.patch.object(os, "rename")
mocker.patch.object(shutil, "rmtree")
fixed_path = path.rstrip("/")
tarpath = "{}.tar".format(fixed_path)
if expected == "success":
ret = utils.create_tar(path)
shutil.rmtree.assert_called_once_with(fixed_path)
os.rename.assert_called_once_with(tarpath, fixed_path)
tarfile.is_tarfile.assert_any_call(fixed_path)
assert ret is None
else:
with pytest.raises(utils.TARException) as excinfo:
ret = utils.create_tar(path)
assert "Failed to create a tarfile at {} for dir at {}".format(
tarpath, fixed_path
) == str(excinfo.value)
assert not shutil.rmtree.called
assert not os.rename.called
if not sp_raises:
os.path.isfile.assert_called_once_with(tarpath)
if will_be_file:
tarfile.is_tarfile.assert_any_call(tarpath)
92 changes: 90 additions & 2 deletions storage_service/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import os
import shutil
import subprocess
import tarfile
import uuid

import scandir
Expand Down Expand Up @@ -76,7 +77,9 @@
COMPRESS_EXTENSION_GZIP,
)

PACKAGE_EXTENSIONS = (".tar",) + COMPRESS_EXTENSIONS
TAR_EXTENSION = ".tar"

PACKAGE_EXTENSIONS = (TAR_EXTENSION,) + COMPRESS_EXTENSIONS

COMPRESS_PROGRAM_7Z = "7-Zip"
COMPRESS_PROGRAM_TAR = "tar"
Expand Down Expand Up @@ -348,7 +351,7 @@ def get_compress_command(compression, extract_path, basename, full_path):
`compressed_filename` is the full path to the compressed file
"""
if compression in (COMPRESSION_TAR, COMPRESSION_TAR_BZIP2, COMPRESSION_TAR_GZIP):
compressed_filename = os.path.join(extract_path, basename + ".tar")
compressed_filename = os.path.join(extract_path, basename + TAR_EXTENSION)
relative_path = os.path.dirname(full_path)
algo = ""
if compression == COMPRESSION_TAR_BZIP2:
Expand Down Expand Up @@ -553,6 +556,91 @@ def set_compression_transforms(aip, compression, transform_order):
return version, extension, program_name


# ########### TAR Packaging ############


class TARException(Exception):
pass


def _abort_create_tar(path, tarpath):
fail_msg = _(
"Failed to create a tarfile at %(tarpath)s for dir at %(path)s"
% {"tarpath": tarpath, "path": path}
)
LOGGER.error(fail_msg)
raise TARException(fail_msg)


def create_tar(path, extension=False):
"""Create a tarfile from the directory at ``path`` and overwrite
``path`` with that tarfile.

:param path: Path to directory or file to tar (str)
:param extension: Flag indicating whether to add .tar extension (bool)
"""
path = path.rstrip("/")
tarpath = "{}{}".format(path, TAR_EXTENSION)
changedir = os.path.dirname(tarpath)
source = os.path.basename(path)
cmd = ["tar", "-C", changedir, "-cf", tarpath, source]
LOGGER.info(
"creating archive of %s at %s, relative to %s", source, tarpath, changedir
)
try:
subprocess.check_output(cmd)
except (OSError, subprocess.CalledProcessError):
_abort_create_tar(path, tarpath)

# Providing the TAR is successfully created then remove the original.
if os.path.isfile(tarpath) and tarfile.is_tarfile(tarpath):
try:
shutil.rmtree(path)
except OSError:
# Remove a file-path as We're likely packaging a file, e.g. 7z.
os.remove(path)
if not extension:
os.rename(tarpath, path)
else:
_abort_create_tar(path, tarpath)

try:
if extension:
assert tarfile.is_tarfile(tarpath)
assert not os.path.exists(path)
else:
assert tarfile.is_tarfile(path)
assert not os.path.exists(tarpath)
except AssertionError:
_abort_create_tar(path, tarpath)


def _abort_extract_tar(tarpath, newtarpath, err):
fail_msg = _(
"Failed to extract %(tarpath)s: %(error)s" % {"tarpath": tarpath, "error": err}
)
LOGGER.error(fail_msg)
os.rename(newtarpath, tarpath)
raise TARException(fail_msg)


def extract_tar(tarpath):
"""Extract tarfile at ``path`` to a directory at ``path``.

:param tarpath: Path to tarfile to extract (str)
"""
newtarpath = tarpath
newtarpath = "{}{}".format(tarpath, TAR_EXTENSION)
os.rename(tarpath, newtarpath)
changedir = os.path.dirname(newtarpath)
cmd = ["tar", "-xf", newtarpath, "-C", changedir]
try:
subprocess.check_output(cmd)
except (OSError, subprocess.CalledProcessError) as err:
_abort_extract_tar(tarpath, newtarpath, err)
os.remove(newtarpath)


# ########### OTHER ############


Expand Down
5 changes: 5 additions & 0 deletions storage_service/locations/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@
"form": forms.NFSForm,
"fields": ["manually_mounted", "remote_name", "remote_path", "version"],
},
models.Space.OFFLINE_REPLICA_STAGING: {
"model": models.OfflineReplicaStaging,
"form": forms.OfflineReplicaStagingForm,
"fields": [],
},
models.Space.PIPELINE_LOCAL_FS: {
"model": models.PipelineLocalFS,
"form": forms.PipelineLocalFSForm,
Expand Down
6 changes: 6 additions & 0 deletions storage_service/locations/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,12 @@ class Meta:
)


class OfflineReplicaStagingForm(forms.ModelForm):
class Meta:
model = models.OfflineReplicaStaging
fields = ()


class LocationForm(forms.ModelForm):
default = forms.BooleanField(
required=False, label=_("Set as global default location for its purpose")
Expand Down
67 changes: 67 additions & 0 deletions storage_service/locations/migrations/0028_offline_replica_space.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-

"""Migration to add an Offline Replica Staging Space to the Storage Service."""

from __future__ import absolute_import, unicode_literals

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):
"""Entry point for OfflineReplicaStaging Space migration."""

dependencies = [("locations", "0027_update_default_transfer_source_description")]
operations = [
migrations.CreateModel(
name="OfflineReplicaStaging",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
],
options={
"verbose_name": "Write-Only Replica Staging on Local Filesystem",
},
),
migrations.AlterField(
model_name="space",
name="access_protocol",
field=models.CharField(
choices=[
(b"ARKIVUM", "Arkivum"),
(b"DV", "Dataverse"),
(b"DC", "DuraCloud"),
(b"DSPACE", "DSpace via SWORD2 API"),
(b"DSPC_RST", "DSpace via REST API"),
(b"FEDORA", "FEDORA via SWORD2"),
(b"GPG", "GPG encryption on Local Filesystem"),
(b"FS", "Local Filesystem"),
(b"LOM", "LOCKSS-o-matic"),
(b"NFS", "NFS"),
(b"PIPE_FS", "Pipeline Local Filesystem"),
(b"REPLICA", "Write-Only Replica Staging on Local Filesystem"),
(b"SWIFT", "Swift"),
(b"S3", "S3"),
],
help_text="How the space can be accessed.",
max_length=8,
verbose_name="Access protocol",
),
),
migrations.AddField(
model_name="offlinereplicastaging",
name="space",
field=models.OneToOneField(
on_delete=django.db.models.deletion.CASCADE,
to="locations.Space",
to_field=b"uuid",
),
),
]
1 change: 1 addition & 0 deletions storage_service/locations/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,6 @@ class StorageException(Exception):
from .lockssomatic import Lockssomatic
from .nfs import NFS
from .pipeline_local import PipelineLocalFS
from .replica_staging import OfflineReplicaStaging
from .swift import Swift
from .s3 import S3
Loading