Skip to content

Commit

Permalink
Create TAR space in storage service
Browse files Browse the repository at this point in the history
  • Loading branch information
ross-spencer committed Mar 8, 2021
1 parent 1726937 commit 500d543
Show file tree
Hide file tree
Showing 9 changed files with 221 additions and 1 deletion.
1 change: 1 addition & 0 deletions storage_service/locations/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,5 @@
"bucket",
],
},
models.Space.TAR: {"model": models.TAR, "form": forms.TARForm, "fields": []},
}
6 changes: 6 additions & 0 deletions storage_service/locations/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,12 @@ class Meta:
)


class TARForm(forms.ModelForm):
class Meta:
model = models.TAR
fields = ()


class LocationForm(forms.ModelForm):
default = forms.BooleanField(
required=False, label=_("Set as global default location for its purpose")
Expand Down
67 changes: 67 additions & 0 deletions storage_service/locations/migrations/0027_tape_archive_space.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-

"""Migration to add a Tape Archive Space to the Storage Service."""

from __future__ import absolute_import, unicode_literals

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):
"""Entry point for Tape Archive Space migration."""

dependencies = [("locations", "0026_update_package_status")]
operations = [
migrations.CreateModel(
name="TAR",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
],
options={
"verbose_name": "Tape Archive (TAR) on Local Filesystem",
},
),
migrations.AlterField(
model_name="space",
name="access_protocol",
field=models.CharField(
choices=[
(b"ARKIVUM", "Arkivum"),
(b"DV", "Dataverse"),
(b"DC", "DuraCloud"),
(b"DSPACE", "DSpace via SWORD2 API"),
(b"DSPC_RST", "DSpace via REST API"),
(b"FEDORA", "FEDORA via SWORD2"),
(b"GPG", "GPG encryption on Local Filesystem"),
(b"FS", "Local Filesystem"),
(b"LOM", "LOCKSS-o-matic"),
(b"NFS", "NFS"),
(b"PIPE_FS", "Pipeline Local Filesystem"),
(b"SWIFT", "Swift"),
(b"S3", "S3"),
(b"TAR", "Tape Archive (TAR) on Local Filesystem"),
],
help_text="How the space can be accessed.",
max_length=8,
verbose_name="Access protocol",
),
),
migrations.AddField(
model_name="tar",
name="space",
field=models.OneToOneField(
on_delete=django.db.models.deletion.CASCADE,
to="locations.Space",
to_field=b"uuid",
),
),
]
1 change: 1 addition & 0 deletions storage_service/locations/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,4 @@ class StorageException(Exception):
from .pipeline_local import PipelineLocalFS
from .swift import Swift
from .s3 import S3
from .tape_archive import TAR
Empty file.
85 changes: 85 additions & 0 deletions storage_service/locations/models/location_helpers/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-

from __future__ import absolute_import

import logging
import os
import shutil
import subprocess
import tarfile

from django.utils.translation import ugettext_lazy as _


LOGGER = logging.getLogger(__name__)


class TARException(Exception):
pass


def _abort_create_tar(path, tarpath):
fail_msg = _(
"Failed to create a tarfile at %(tarpath)s for dir at %(path)s"
% {"tarpath": tarpath, "path": path}
)
LOGGER.error(fail_msg)
raise TARException(fail_msg)


def create_tar(path):
"""Create a tarfile from the directory at ``path`` and overwrite
``path`` with that tarfile.
"""
path = path.rstrip("/")
tarpath = "{}.tar".format(path)
changedir = os.path.dirname(tarpath)
source = os.path.basename(path)
cmd = ["tar", "-C", changedir, "-cf", tarpath, source]
LOGGER.info(
"creating archive of %s at %s, relative to %s", source, tarpath, changedir
)
try:
subprocess.check_output(cmd)
except (OSError, subprocess.CalledProcessError):
_abort_create_tar(path, tarpath)

# Providing the TAR is successfully created then remove the original.
if os.path.isfile(tarpath) and tarfile.is_tarfile(tarpath):
try:
shutil.rmtree(path)
except OSError:
# Remove a file-path as We're likely packaging a file, e.g. 7z.
os.remove(path)
os.rename(tarpath, path)
else:
_abort_create_tar(path, tarpath)
try:
assert tarfile.is_tarfile(path)
assert not os.path.exists(tarpath)
except AssertionError:
_abort_create_tar(path, tarpath)


def _abort_extract_tar(tarpath, newtarpath, err):
fail_msg = _(
"Failed to extract %(tarpath)s: %(error)s" % {"tarpath": tarpath, "error": err}
)
LOGGER.error(fail_msg)
os.rename(newtarpath, tarpath)
raise TARException(fail_msg)


def extract_tar(tarpath):
"""Extract tarfile at ``path`` to a directory at ``path``."""
newtarpath = "{}.tar".format(tarpath)
os.rename(tarpath, newtarpath)
changedir = os.path.dirname(newtarpath)
cmd = ["tar", "-xf", newtarpath, "-C", changedir]
try:
subprocess.check_output(cmd)
except (OSError, subprocess.CalledProcessError) as err:
_abort_extract_tar(tarpath, newtarpath, err)
# TODO: GPG treats this differently because it only ever expects to
# TAR a directory but we actually want to TAR file-types as well.
os.remove(newtarpath)
18 changes: 17 additions & 1 deletion storage_service/locations/models/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,16 @@ def is_encrypted(self, local_path):
is_file = os.path.isfile(local_path)
return space_is_encr and is_file

def is_packaged(self, local_path):
"""Determines whether or not the package at ``local_path`` is
packaged.
"""
space_is_packaged = getattr(
self.current_location.space.get_child_space(), "packaged_space", False
)
is_file = os.path.isfile(local_path)
return space_is_packaged and is_file

@property
def is_compressed(self):
""" Determines whether or not the package is a compressed file. """
Expand Down Expand Up @@ -327,9 +337,15 @@ def fetch_local_path(self):
:returns: Local path to this package.
"""

local_path = self.get_local_path()
if local_path and not self.is_encrypted(local_path):
if (
local_path
and not self.is_encrypted(local_path)
and not self.is_packaged(local_path)
):
return local_path

# Not locally accessible, so copy to SS internal temp dir
ss_internal = Location.active.get(purpose=Location.STORAGE_SERVICE_INTERNAL)
temp_dir = tempfile.mkdtemp(dir=ss_internal.full_path)
Expand Down
2 changes: 2 additions & 0 deletions storage_service/locations/models/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ class Space(models.Model):
SWIFT = "SWIFT"
GPG = "GPG"
S3 = "S3"
TAR = "TAR"
# These will not be displayed in the Space Create GUI (see locations/forms.py)
BETA_PROTOCOLS = {}
OBJECT_STORAGE = {DATAVERSE, DSPACE, DSPACE_REST, DURACLOUD, SWIFT, S3}
Expand All @@ -164,6 +165,7 @@ class Space(models.Model):
(PIPELINE_LOCAL_FS, _("Pipeline Local Filesystem")),
(SWIFT, _("Swift")),
(S3, _("S3")),
(TAR, _("Tape Archive (TAR) on Local Filesystem")),
)
access_protocol = models.CharField(
max_length=8,
Expand Down
42 changes: 42 additions & 0 deletions storage_service/locations/models/tape_archive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-

"""Tape Archive Space created for the Norwegian Health Archive."""

from __future__ import absolute_import


from django.db import models
from django.utils.translation import ugettext_lazy as _

from .location import Location
from .location_helpers.helpers import create_tar, extract_tar


class TAR(models.Model):
"""Space for storing packages as a Tape Archive File."""

# Package will use this attribute to determine whether the Space
# is for storing Tape Archive objects.
packaged_space = True

space = models.OneToOneField("Space", to_field="uuid", on_delete=models.CASCADE)

class Meta:
verbose_name = _("Tape Archive (TAR) on Local Filesystem")
app_label = _("locations")

ALLOWED_LOCATION_PURPOSE = [Location.AIP_STORAGE, Location.REPLICATOR]

def move_to_storage_service(self, src_path, dest_path, dest_space):
""" Moves src_path to dest_space.staging_path/dest_path. """
self.space.create_local_directory(dest_path)
self.space.move_rsync(src_path, dest_path, try_mv_local=True)
extract_tar(dest_path)

def move_from_storage_service(self, src_path, dest_path, package=None):
""" Moves self.staging_path/src_path to dest_path. """
self.space.create_local_directory(dest_path)
self.space.move_rsync(src_path, dest_path)
create_tar(dest_path)
if package.should_have_pointer_file():
"""Update the pointer file to represent the TAR packaging."""

0 comments on commit 500d543

Please sign in to comment.