diff --git a/storage_service/common/utils.py b/storage_service/common/utils.py index f52d16234..e25e1df4e 100644 --- a/storage_service/common/utils.py +++ b/storage_service/common/utils.py @@ -77,7 +77,9 @@ COMPRESS_EXTENSION_GZIP, ) -PACKAGE_EXTENSIONS = (".tar",) + COMPRESS_EXTENSIONS +TAR_EXTENSION = ".tar" + +PACKAGE_EXTENSIONS = (TAR_EXTENSION,) + COMPRESS_EXTENSIONS COMPRESS_PROGRAM_7Z = "7-Zip" COMPRESS_PROGRAM_TAR = "tar" @@ -349,7 +351,7 @@ def get_compress_command(compression, extract_path, basename, full_path): `compressed_filename` is the full path to the compressed file """ if compression in (COMPRESSION_TAR, COMPRESSION_TAR_BZIP2, COMPRESSION_TAR_GZIP): - compressed_filename = os.path.join(extract_path, basename + ".tar") + compressed_filename = os.path.join(extract_path, basename + TAR_EXTENSION) relative_path = os.path.dirname(full_path) algo = "" if compression == COMPRESSION_TAR_BZIP2: @@ -570,12 +572,15 @@ def _abort_create_tar(path, tarpath): raise TARException(fail_msg) -def create_tar(path): +def create_tar(path, extension=False): """Create a tarfile from the directory at ``path`` and overwrite ``path`` with that tarfile. + + :param path: Path to directory or file to tar (str) + :param extension: Flag indicating whether to add .tar extension (bool) """ path = path.rstrip("/") - tarpath = "{}.tar".format(path) + tarpath = "{}{}".format(path, TAR_EXTENSION) changedir = os.path.dirname(tarpath) source = os.path.basename(path) cmd = ["tar", "-C", changedir, "-cf", tarpath, source] @@ -594,12 +599,18 @@ def create_tar(path): except OSError: # Remove a file-path as We're likely packaging a file, e.g. 7z. os.remove(path) - os.rename(tarpath, path) + if not extension: + os.rename(tarpath, path) else: _abort_create_tar(path, tarpath) + try: - assert tarfile.is_tarfile(path) - assert not os.path.exists(tarpath) + if extension: + assert tarfile.is_tarfile(tarpath) + assert not os.path.exists(path) + else: + assert tarfile.is_tarfile(path) + assert not os.path.exists(tarpath) except AssertionError: _abort_create_tar(path, tarpath) @@ -614,8 +625,12 @@ def _abort_extract_tar(tarpath, newtarpath, err): def extract_tar(tarpath): - """Extract tarfile at ``path`` to a directory at ``path``.""" - newtarpath = "{}.tar".format(tarpath) + """Extract tarfile at ``path`` to a directory at ``path``. + + :param tarpath: Path to tarfile to extract (str) + """ + newtarpath = tarpath + newtarpath = "{}{}".format(tarpath, TAR_EXTENSION) os.rename(tarpath, newtarpath) changedir = os.path.dirname(newtarpath) cmd = ["tar", "-xf", newtarpath, "-C", changedir] @@ -623,8 +638,6 @@ def extract_tar(tarpath): subprocess.check_output(cmd) except (OSError, subprocess.CalledProcessError) as err: _abort_extract_tar(tarpath, newtarpath, err) - # TODO: GPG treats this differently because it only ever expects to - # TAR a directory but we actually want to TAR file-types as well. os.remove(newtarpath) diff --git a/storage_service/locations/constants.py b/storage_service/locations/constants.py index bba4bb6ed..d07eaba04 100644 --- a/storage_service/locations/constants.py +++ b/storage_service/locations/constants.py @@ -74,6 +74,11 @@ "form": forms.NFSForm, "fields": ["manually_mounted", "remote_name", "remote_path", "version"], }, + models.Space.OFFLINE_REPLICA_STAGING: { + "model": models.OfflineReplicaStaging, + "form": forms.OfflineReplicaStagingForm, + "fields": [], + }, models.Space.PIPELINE_LOCAL_FS: { "model": models.PipelineLocalFS, "form": forms.PipelineLocalFSForm, @@ -108,5 +113,4 @@ "bucket", ], }, - models.Space.TAR: {"model": models.TAR, "form": forms.TARForm, "fields": []}, } diff --git a/storage_service/locations/forms.py b/storage_service/locations/forms.py index 3ee16587e..7b1522a0d 100644 --- a/storage_service/locations/forms.py +++ b/storage_service/locations/forms.py @@ -249,9 +249,9 @@ class Meta: ) -class TARForm(forms.ModelForm): +class OfflineReplicaStagingForm(forms.ModelForm): class Meta: - model = models.TAR + model = models.OfflineReplicaStaging fields = () diff --git a/storage_service/locations/migrations/0027_tape_archive_space.py b/storage_service/locations/migrations/0028_offline_replica_space.py similarity index 79% rename from storage_service/locations/migrations/0027_tape_archive_space.py rename to storage_service/locations/migrations/0028_offline_replica_space.py index 4cbbb3f3e..bedadbe85 100644 --- a/storage_service/locations/migrations/0027_tape_archive_space.py +++ b/storage_service/locations/migrations/0028_offline_replica_space.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -"""Migration to add a Tape Archive Space to the Storage Service.""" +"""Migration to add an Offline Replica Staging Space to the Storage Service.""" from __future__ import absolute_import, unicode_literals @@ -9,12 +9,12 @@ class Migration(migrations.Migration): - """Entry point for Tape Archive Space migration.""" + """Entry point for OfflineReplicaStaging Space migration.""" - dependencies = [("locations", "0026_update_package_status")] + dependencies = [("locations", "0027_update_default_transfer_source_description")] operations = [ migrations.CreateModel( - name="TAR", + name="OfflineReplicaStaging", fields=[ ( "id", @@ -27,7 +27,7 @@ class Migration(migrations.Migration): ), ], options={ - "verbose_name": "Tape Archive (TAR) on Local Filesystem", + "verbose_name": "Write-Only Replica Staging on Local Filesystem", }, ), migrations.AlterField( @@ -46,9 +46,9 @@ class Migration(migrations.Migration): (b"LOM", "LOCKSS-o-matic"), (b"NFS", "NFS"), (b"PIPE_FS", "Pipeline Local Filesystem"), + (b"REPLICA", "Write-Only Replica Staging on Local Filesystem"), (b"SWIFT", "Swift"), (b"S3", "S3"), - (b"TAR", "Tape Archive (TAR) on Local Filesystem"), ], help_text="How the space can be accessed.", max_length=8, @@ -56,7 +56,7 @@ class Migration(migrations.Migration): ), ), migrations.AddField( - model_name="tar", + model_name="offlinereplicastaging", name="space", field=models.OneToOneField( on_delete=django.db.models.deletion.CASCADE, diff --git a/storage_service/locations/models/__init__.py b/storage_service/locations/models/__init__.py index 376cc129a..8022f5320 100644 --- a/storage_service/locations/models/__init__.py +++ b/storage_service/locations/models/__init__.py @@ -40,6 +40,6 @@ class StorageException(Exception): from .lockssomatic import Lockssomatic from .nfs import NFS from .pipeline_local import PipelineLocalFS +from .replica_staging import OfflineReplicaStaging from .swift import Swift from .s3 import S3 -from .tape_archive import TAR diff --git a/storage_service/locations/models/replica_staging.py b/storage_service/locations/models/replica_staging.py new file mode 100644 index 000000000..6ad6f1d15 --- /dev/null +++ b/storage_service/locations/models/replica_staging.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +import logging +import os + +from django.db import models +from django.utils.translation import ugettext_lazy as _ + +from common import utils + +from .location import Location + +LOGGER = logging.getLogger(__name__) + + +class OfflineReplicaStaging(models.Model): + """Space for storing packages for write-only offline replication. + + Uncompressed packages in this Space will be packaged as a tarball + prior to storing. + """ + + packaged_space = True + + space = models.OneToOneField("Space", to_field="uuid", on_delete=models.CASCADE) + + class Meta: + verbose_name = _("Write-Only Replica Staging on Local Filesystem") + app_label = _("locations") + + ALLOWED_LOCATION_PURPOSE = [Location.REPLICATOR] + + def browse(self, path): + raise NotImplementedError( + _("Write-Only Offline Staging does not implement browse") + ) + + def delete_path(self, delete_path): + raise NotImplementedError( + _("Write-Only Offline Staging does not implement deletion") + ) + + def move_to_storage_service(self, src_path, dest_path, dest_space): + """ Moves src_path to dest_space.staging_path/dest_path. """ + raise NotImplementedError( + _("Write-Only Offline Staging does not implement fetching packages") + ) + + def move_from_storage_service(self, src_path, dest_path, package=None): + """ Moves self.staging_path/src_path to dest_path.""" + self.space.create_local_directory(dest_path) + if not package.is_packaged(src_path): + try: + return self._store_tar_replica(src_path, dest_path, package) + except utils.TARException: + raise + self.space.move_rsync(src_path, dest_path) + + def _store_tar_replica(self, src_path, dest_path, package): + """Create and store TAR replica.""" + tar_src_path = src_path.rstrip("/") + utils.TAR_EXTENSION + tar_dest_path = dest_path.rstrip("/") + utils.TAR_EXTENSION + try: + utils.create_tar(src_path, extension=True) + except utils.TARException: + raise + package.current_path = tar_dest_path + self.space.move_rsync(tar_src_path, tar_dest_path) + + # Cleanup empty directory created by space.create_local_directory. + os.rmdir(dest_path) diff --git a/storage_service/locations/models/space.py b/storage_service/locations/models/space.py index abc9ec2ef..4dbab68ed 100644 --- a/storage_service/locations/models/space.py +++ b/storage_service/locations/models/space.py @@ -143,11 +143,11 @@ class Space(models.Model): LOCAL_FILESYSTEM = "FS" LOM = "LOM" NFS = "NFS" + OFFLINE_REPLICA_STAGING = "REPLICA" PIPELINE_LOCAL_FS = "PIPE_FS" SWIFT = "SWIFT" GPG = "GPG" S3 = "S3" - TAR = "TAR" # These will not be displayed in the Space Create GUI (see locations/forms.py) BETA_PROTOCOLS = {} OBJECT_STORAGE = {DATAVERSE, DSPACE, DSPACE_REST, DURACLOUD, SWIFT, S3} @@ -162,10 +162,10 @@ class Space(models.Model): (LOCAL_FILESYSTEM, _("Local Filesystem")), (LOM, _("LOCKSS-o-matic")), (NFS, _("NFS")), + (OFFLINE_REPLICA_STAGING, _("Write-Only Replica Staging on Local Filesystem")), (PIPELINE_LOCAL_FS, _("Pipeline Local Filesystem")), (SWIFT, _("Swift")), (S3, _("S3")), - (TAR, _("Tape Archive (TAR) on Local Filesystem")), ) access_protocol = models.CharField( max_length=8, diff --git a/storage_service/locations/models/tape_archive.py b/storage_service/locations/models/tape_archive.py deleted file mode 100644 index 937ef5a37..000000000 --- a/storage_service/locations/models/tape_archive.py +++ /dev/null @@ -1,42 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Tape Archive Space created for the Norwegian Health Archive.""" - -from __future__ import absolute_import - -from django.db import models -from django.utils.translation import ugettext_lazy as _ - -from common import utils - -from .location import Location - - -class TAR(models.Model): - """Space for storing packages as a Tape Archive File.""" - - # Package will use this attribute to determine whether the Space - # is for storing Tape Archive objects. - packaged_space = True - - space = models.OneToOneField("Space", to_field="uuid", on_delete=models.CASCADE) - - class Meta: - verbose_name = _("Tape Archive (TAR) on Local Filesystem") - app_label = _("locations") - - ALLOWED_LOCATION_PURPOSE = [Location.AIP_STORAGE, Location.REPLICATOR] - - def move_to_storage_service(self, src_path, dest_path, dest_space): - """ Moves src_path to dest_space.staging_path/dest_path. """ - self.space.create_local_directory(dest_path) - self.space.move_rsync(src_path, dest_path, try_mv_local=True) - utils.extract_tar(dest_path) - - def move_from_storage_service(self, src_path, dest_path, package=None): - """ Moves self.staging_path/src_path to dest_path. """ - self.space.create_local_directory(dest_path) - self.space.move_rsync(src_path, dest_path) - utils.create_tar(dest_path) - if package.should_have_pointer_file(): - """Update the pointer file to represent the TAR packaging."""