diff --git a/CHANGES/4777.bugfix b/CHANGES/4777.bugfix new file mode 100644 index 0000000000..3480e07441 --- /dev/null +++ b/CHANGES/4777.bugfix @@ -0,0 +1 @@ +Fix a bug in import/export that could result in a division-by-zero during import. diff --git a/pulpcore/app/serializers/exporter.py b/pulpcore/app/serializers/exporter.py index e92d0a7555..0030b34e19 100644 --- a/pulpcore/app/serializers/exporter.py +++ b/pulpcore/app/serializers/exporter.py @@ -1,6 +1,6 @@ import os -import re from gettext import gettext as _ +import re from rest_framework import serializers from rest_framework.validators import UniqueValidator @@ -19,6 +19,16 @@ from pulpcore.constants import FS_EXPORT_CHOICES, FS_EXPORT_METHODS +def parse_human_readable_file_size(size: str): + # based on https://stackoverflow.com/a/42865957/2002471 + units = {"B": 1, "KB": 2**10, "MB": 2**20, "GB": 2**30, "TB": 2**40} + size = size.upper() + if not re.match(r" ", size): + size = re.sub(r"([KMGT]?B)", r" \1", size) + number, unit = [string.strip() for string in size.split()] + return int(float(number) * units[unit]) + + class ExporterSerializer(ModelSerializer): """ Base serializer for Exporters. @@ -208,23 +218,13 @@ def validate(self, data): ) return super().validate(data) - @staticmethod - def _parse_size(size): + def validate_chunk_size(self, chunk_size): try: - # based on https://stackoverflow.com/a/42865957/2002471 - units = {"B": 1, "KB": 2**10, "MB": 2**20, "GB": 2**30, "TB": 2**40} - size = size.upper() - if not re.match(r" ", size): - size = re.sub(r"([KMGT]?B)", r" \1", size) - number, unit = [string.strip() for string in size.split()] - return int(float(number) * units[unit]) + the_size = parse_human_readable_file_size(chunk_size) except ValueError: raise serializers.ValidationError( - _("chunk_size '{}' is not valid (valid units are B/KB/MB/GB/TB)").format(size) + _("chunk_size '{}' is not valid (valid units are B/KB/MB/GB/TB)").format(chunk_size) ) - - def validate_chunk_size(self, chunk_size): - the_size = self._parse_size(chunk_size) if the_size <= 0: raise serializers.ValidationError( _("Chunk size {} is not greater than zero!").format(the_size) diff --git a/pulpcore/app/tasks/export.py b/pulpcore/app/tasks/export.py index ce56e0f1ab..57e23caef9 100644 --- a/pulpcore/app/tasks/export.py +++ b/pulpcore/app/tasks/export.py @@ -422,12 +422,10 @@ def pulp_export(exporter_pk, params): os.remove(pathname) raise # compute the hashes - global_hash = hasher() paths = sorted([str(Path(p)) for p in glob(tarfile_fp + ".*")]) for a_file in paths: - a_hash = compute_file_hash(a_file, hasher=hasher(), cumulative_hash=global_hash) + a_hash = compute_file_hash(a_file, hasher=hasher()) rslts[a_file] = a_hash - tarfile_hash = global_hash.hexdigest() else: # write into the file @@ -450,23 +448,20 @@ def pulp_export(exporter_pk, params): # write outputfile/hash info to a file 'next to' the output file(s) output_file_info_path = tarfile_fp.replace(".tar", "-toc.json") with open(output_file_info_path, "w") as outfile: - if the_export.validated_chunk_size: - chunk_size = the_export.validated_chunk_size - else: - chunk_size = 0 - chunk_toc = { + table_of_contents = { "meta": { - "chunk_size": chunk_size, - "file": os.path.basename(tarfile_fp), - "global_hash": tarfile_hash, "checksum_type": checksum_type, }, "files": {}, } + + if the_export.validated_chunk_size: + table_of_contents["meta"]["chunk_size"] = the_export.validated_chunk_size + # Build a toc with just filenames (not the path on the exporter-machine) for a_path in rslts.keys(): - chunk_toc["files"][os.path.basename(a_path)] = rslts[a_path] - json.dump(chunk_toc, outfile) + table_of_contents["files"][os.path.basename(a_path)] = rslts[a_path] + json.dump(table_of_contents, outfile) # store toc info toc_hash = compute_file_hash(output_file_info_path) diff --git a/pulpcore/app/tasks/importer.py b/pulpcore/app/tasks/importer.py index 50a3aaf3ea..d26316fbeb 100644 --- a/pulpcore/app/tasks/importer.py +++ b/pulpcore/app/tasks/importer.py @@ -76,12 +76,17 @@ def __init__(self, toc_path): raise ValidationError(_("Missing 'files' or 'meta' keys in table-of-contents!")) toc_dir = os.path.dirname(toc_path) - self.chunk_size = int(self.toc["meta"]["chunk_size"]) # sorting-by-filename is REALLY IMPORTANT here # keys are of the form .00...NN, # and must be reassembled IN ORDER self.chunk_names = sorted(self.toc["files"].keys()) self.chunk_paths = [os.path.join(toc_dir, chunk_name) for chunk_name in self.chunk_names] + self.chunk_size = int(self.toc["meta"].get("chunk_size", 0)) + if not self.chunk_size: + assert ( + len(self.toc["files"]) == 1 + ), "chunk_size must exist and be non-zero if more than one chunk exists" + self.chunk_size = os.path.getsize(self.chunk_paths[0]) def __enter__(self): assert not hasattr(self, "chunks"), "ChunkedFile is not reentrant."