diff --git a/storage_service/locations/constants.py b/storage_service/locations/constants.py index d365a36c1..bba4bb6ed 100644 --- a/storage_service/locations/constants.py +++ b/storage_service/locations/constants.py @@ -108,4 +108,5 @@ "bucket", ], }, + models.Space.TAR: {"model": models.TAR, "form": forms.TARForm, "fields": []}, } diff --git a/storage_service/locations/forms.py b/storage_service/locations/forms.py index 13e8f820b..33181aa8d 100644 --- a/storage_service/locations/forms.py +++ b/storage_service/locations/forms.py @@ -249,6 +249,12 @@ class Meta: ) +class TARForm(forms.ModelForm): + class Meta: + model = models.TAR + fields = () + + class LocationForm(forms.ModelForm): default = forms.BooleanField( required=False, label=_("Set as global default location for its purpose") diff --git a/storage_service/locations/migrations/0027_tape_archive_space.py b/storage_service/locations/migrations/0027_tape_archive_space.py new file mode 100644 index 000000000..4cbbb3f3e --- /dev/null +++ b/storage_service/locations/migrations/0027_tape_archive_space.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- + +"""Migration to add a Tape Archive Space to the Storage Service.""" + +from __future__ import absolute_import, unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + """Entry point for Tape Archive Space migration.""" + + dependencies = [("locations", "0026_update_package_status")] + operations = [ + migrations.CreateModel( + name="TAR", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ], + options={ + "verbose_name": "Tape Archive (TAR) on Local Filesystem", + }, + ), + migrations.AlterField( + model_name="space", + name="access_protocol", + field=models.CharField( + choices=[ + (b"ARKIVUM", "Arkivum"), + (b"DV", "Dataverse"), + (b"DC", "DuraCloud"), + (b"DSPACE", "DSpace via SWORD2 API"), + (b"DSPC_RST", "DSpace via REST API"), + (b"FEDORA", "FEDORA via SWORD2"), + (b"GPG", "GPG encryption on Local Filesystem"), + (b"FS", "Local Filesystem"), + (b"LOM", "LOCKSS-o-matic"), + (b"NFS", "NFS"), + (b"PIPE_FS", "Pipeline Local Filesystem"), + (b"SWIFT", "Swift"), + (b"S3", "S3"), + (b"TAR", "Tape Archive (TAR) on Local Filesystem"), + ], + help_text="How the space can be accessed.", + max_length=8, + verbose_name="Access protocol", + ), + ), + migrations.AddField( + model_name="tar", + name="space", + field=models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + to="locations.Space", + to_field=b"uuid", + ), + ), + ] diff --git a/storage_service/locations/models/__init__.py b/storage_service/locations/models/__init__.py index 58e5d010a..376cc129a 100644 --- a/storage_service/locations/models/__init__.py +++ b/storage_service/locations/models/__init__.py @@ -42,3 +42,4 @@ class StorageException(Exception): from .pipeline_local import PipelineLocalFS from .swift import Swift from .s3 import S3 +from .tape_archive import TAR diff --git a/storage_service/locations/models/location_helpers/__init__.py b/storage_service/locations/models/location_helpers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/storage_service/locations/models/location_helpers/helpers.py b/storage_service/locations/models/location_helpers/helpers.py new file mode 100644 index 000000000..675024826 --- /dev/null +++ b/storage_service/locations/models/location_helpers/helpers.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +import logging +import os +import shutil +import subprocess +import tarfile + +from django.utils.translation import ugettext_lazy as _ + + +LOGGER = logging.getLogger(__name__) + + +class TARException(Exception): + pass + + +def _abort_create_tar(path, tarpath): + fail_msg = _( + "Failed to create a tarfile at %(tarpath)s for dir at %(path)s" + % {"tarpath": tarpath, "path": path} + ) + LOGGER.error(fail_msg) + raise TARException(fail_msg) + + +def create_tar(path): + """Create a tarfile from the directory at ``path`` and overwrite + ``path`` with that tarfile. + """ + path = path.rstrip("/") + tarpath = "{}.tar".format(path) + changedir = os.path.dirname(tarpath) + source = os.path.basename(path) + cmd = ["tar", "-C", changedir, "-cf", tarpath, source] + LOGGER.info( + "creating archive of %s at %s, relative to %s", source, tarpath, changedir + ) + try: + subprocess.check_output(cmd) + except (OSError, subprocess.CalledProcessError): + _abort_create_tar(path, tarpath) + + # Providing the TAR is successfully created then remove the original. + if os.path.isfile(tarpath) and tarfile.is_tarfile(tarpath): + try: + shutil.rmtree(path) + except OSError: + # Remove a file-path as We're likely packaging a file, e.g. 7z. + os.remove(path) + os.rename(tarpath, path) + else: + _abort_create_tar(path, tarpath) + try: + assert tarfile.is_tarfile(path) + assert not os.path.exists(tarpath) + except AssertionError: + _abort_create_tar(path, tarpath) + + +def _abort_extract_tar(tarpath, newtarpath, err): + fail_msg = _( + "Failed to extract %(tarpath)s: %(error)s" % {"tarpath": tarpath, "error": err} + ) + LOGGER.error(fail_msg) + os.rename(newtarpath, tarpath) + raise TARException(fail_msg) + + +def extract_tar(tarpath): + """Extract tarfile at ``path`` to a directory at ``path``.""" + newtarpath = "{}.tar".format(tarpath) + os.rename(tarpath, newtarpath) + changedir = os.path.dirname(newtarpath) + cmd = ["tar", "-xf", newtarpath, "-C", changedir] + try: + subprocess.check_output(cmd) + except (OSError, subprocess.CalledProcessError) as err: + _abort_extract_tar(tarpath, newtarpath, err) + # TODO: GPG treats this differently because it only ever expects to + # TAR a directory but we actually want to TAR file-types as well. + os.remove(newtarpath) diff --git a/storage_service/locations/models/package.py b/storage_service/locations/models/package.py index a99ea9ccf..c34a140bf 100644 --- a/storage_service/locations/models/package.py +++ b/storage_service/locations/models/package.py @@ -241,6 +241,16 @@ def is_encrypted(self, local_path): is_file = os.path.isfile(local_path) return space_is_encr and is_file + def is_packaged(self, local_path): + """Determines whether or not the package at ``local_path`` is + packaged. + """ + space_is_packaged = getattr( + self.current_location.space.get_child_space(), "packaged_space", False + ) + is_file = os.path.isfile(local_path) + return space_is_packaged and is_file + @property def is_compressed(self): """ Determines whether or not the package is a compressed file. """ @@ -327,9 +337,15 @@ def fetch_local_path(self): :returns: Local path to this package. """ + local_path = self.get_local_path() - if local_path and not self.is_encrypted(local_path): + if ( + local_path + and not self.is_encrypted(local_path) + and not self.is_packaged(local_path) + ): return local_path + # Not locally accessible, so copy to SS internal temp dir ss_internal = Location.active.get(purpose=Location.STORAGE_SERVICE_INTERNAL) temp_dir = tempfile.mkdtemp(dir=ss_internal.full_path) diff --git a/storage_service/locations/models/space.py b/storage_service/locations/models/space.py index 9ad10670f..abc9ec2ef 100644 --- a/storage_service/locations/models/space.py +++ b/storage_service/locations/models/space.py @@ -147,6 +147,7 @@ class Space(models.Model): SWIFT = "SWIFT" GPG = "GPG" S3 = "S3" + TAR = "TAR" # These will not be displayed in the Space Create GUI (see locations/forms.py) BETA_PROTOCOLS = {} OBJECT_STORAGE = {DATAVERSE, DSPACE, DSPACE_REST, DURACLOUD, SWIFT, S3} @@ -164,6 +165,7 @@ class Space(models.Model): (PIPELINE_LOCAL_FS, _("Pipeline Local Filesystem")), (SWIFT, _("Swift")), (S3, _("S3")), + (TAR, _("Tape Archive (TAR) on Local Filesystem")), ) access_protocol = models.CharField( max_length=8, diff --git a/storage_service/locations/models/tape_archive.py b/storage_service/locations/models/tape_archive.py new file mode 100644 index 000000000..bfb64d203 --- /dev/null +++ b/storage_service/locations/models/tape_archive.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +"""Tape Archive Space created for the Norwegian Health Archive.""" + +from __future__ import absolute_import + + +from django.db import models +from django.utils.translation import ugettext_lazy as _ + +from .location import Location +from .location_helpers.helpers import create_tar, extract_tar + + +class TAR(models.Model): + """Space for storing packages as a Tape Archive File.""" + + # Package will use this attribute to determine whether the Space + # is for storing Tape Archive objects. + packaged_space = True + + space = models.OneToOneField("Space", to_field="uuid", on_delete=models.CASCADE) + + class Meta: + verbose_name = _("Tape Archive (TAR) on Local Filesystem") + app_label = _("locations") + + ALLOWED_LOCATION_PURPOSE = [Location.AIP_STORAGE, Location.REPLICATOR] + + def move_to_storage_service(self, src_path, dest_path, dest_space): + """ Moves src_path to dest_space.staging_path/dest_path. """ + self.space.create_local_directory(dest_path) + self.space.move_rsync(src_path, dest_path, try_mv_local=True) + extract_tar(dest_path) + + def move_from_storage_service(self, src_path, dest_path, package=None): + """ Moves self.staging_path/src_path to dest_path. """ + self.space.create_local_directory(dest_path) + self.space.move_rsync(src_path, dest_path) + create_tar(dest_path) + if package.should_have_pointer_file(): + """Update the pointer file to represent the TAR packaging."""