From 3e1b61ea62864e8bbe7eec11d57961ab4c61be2c Mon Sep 17 00:00:00 2001 From: Dennis Kliban Date: Thu, 30 Nov 2023 13:43:41 -0500 Subject: [PATCH] Fixed repository version repair with S3 This patch also adds repair API tests for S3 storage backend. fixes: #4776 fixes: #4806 --- CHANGES/4776.bugfix | 2 + CHANGES/4806.bugfix | 2 + pulpcore/app/tasks/repository.py | 18 ++++----- .../api/using_plugin/test_repair.py | 38 +++++++++++++------ 4 files changed, 39 insertions(+), 21 deletions(-) create mode 100644 CHANGES/4776.bugfix create mode 100644 CHANGES/4806.bugfix diff --git a/CHANGES/4776.bugfix b/CHANGES/4776.bugfix new file mode 100644 index 00000000000..d42e0b6edde --- /dev/null +++ b/CHANGES/4776.bugfix @@ -0,0 +1,2 @@ +Fixed a bug where the repository version repair operation would accidently try to look for a file +in the default domain. diff --git a/CHANGES/4806.bugfix b/CHANGES/4806.bugfix new file mode 100644 index 00000000000..6ddeec28bbd --- /dev/null +++ b/CHANGES/4806.bugfix @@ -0,0 +1,2 @@ +Fixed a bug where the repository version repair operation would always report a digest mismatch when +using S3 storage. diff --git a/pulpcore/app/tasks/repository.py b/pulpcore/app/tasks/repository.py index f949d07d290..6fd6e596986 100644 --- a/pulpcore/app/tasks/repository.py +++ b/pulpcore/app/tasks/repository.py @@ -5,6 +5,7 @@ import hashlib from asgiref.sync import sync_to_async +from botocore.exceptions import ClientError from django.db import transaction from rest_framework.serializers import ValidationError @@ -96,15 +97,14 @@ async def _repair_ca(content_artifact, repaired=None): def _verify_artifact(artifact): - try: - # verify files digest - hasher = hashlib.sha256() - with artifact.file as fp: - for chunk in fp.chunks(CHUNK_SIZE): - hasher.update(chunk) - return hasher.hexdigest() == artifact.sha256 - except FileNotFoundError: - return False + domain = artifact.pulp_domain + storage = domain.get_storage() + hasher = hashlib.sha256() + with storage.open(artifact.file.name) as fp: + for chunk in fp.chunks(CHUNK_SIZE): + hasher.update(chunk) + + return hasher.hexdigest() == artifact.sha256 async def _repair_artifacts_for_content(subset=None, verify_checksums=True): diff --git a/pulpcore/tests/functional/api/using_plugin/test_repair.py b/pulpcore/tests/functional/api/using_plugin/test_repair.py index bc6a43ae298..0db61ea367d 100644 --- a/pulpcore/tests/functional/api/using_plugin/test_repair.py +++ b/pulpcore/tests/functional/api/using_plugin/test_repair.py @@ -1,6 +1,7 @@ import pytest import os +from django.core.files.storage import default_storage from random import sample from pulpcore.client.pulpcore import Repair @@ -14,6 +15,7 @@ SUPPORTED_STORAGE_FRAMEWORKS = [ "django.core.files.storage.FileSystemStorage", "pulpcore.app.models.storage.FileSystem", + "storages.backends.s3boto3.S3Boto3Storage", ] pytestmark = pytest.mark.skipif( @@ -42,18 +44,30 @@ def repository_with_corrupted_artifacts( # STEP 2: sample artifacts that will be modified on the filesystem later on content1, content2 = sample(get_files_in_manifest(remote.url), 2) - # Modify one artifact on disk. - artifact1_path = os.path.join( - settings.MEDIA_ROOT, artifacts_api_client.list(sha256=content1[1]).results[0].file - ) - with open(artifact1_path, "r+b") as f: - f.write(b"$a bit rot") - - # Delete another one from disk. - artifact2_path = os.path.join( - settings.MEDIA_ROOT, artifacts_api_client.list(sha256=content2[1]).results[0].file - ) - os.remove(artifact2_path) + if settings.DEFAULT_FILE_STORAGE == "pulpcore.app.models.storage.FileSystem": + # Modify one artifact on disk + artifact1_path = os.path.join( + settings.MEDIA_ROOT, artifacts_api_client.list(sha256=content1[1]).results[0].file + ) + with open(artifact1_path, "r+b") as f: + f.write(b"$a bit rot") + + # Delete another one from disk. + artifact2_path = os.path.join( + settings.MEDIA_ROOT, artifacts_api_client.list(sha256=content2[1]).results[0].file + ) + os.remove(artifact2_path) + elif settings.DEFAULT_FILE_STORAGE == "storages.backends.s3boto3.S3Boto3Storage": + # Modify one object in S3 + storage = default_storage + path1 = artifacts_api_client.list(sha256=content1[1]).results[0].file + s3_obj = storage.connection.Object(storage.bucket_name, path1) + s3_obj.put() + + # Delete an object from S3 + path2 = artifacts_api_client.list(sha256=content2[1]).results[0].file + s3_obj = storage.connection.Object(storage.bucket_name, path2) + s3_obj.delete() return repo