diff --git a/dandiapi/analytics/tasks/__init__.py b/dandiapi/analytics/tasks/__init__.py index 2f3037c0c..fe0c3cdc7 100644 --- a/dandiapi/analytics/tasks/__init__.py +++ b/dandiapi/analytics/tasks/__init__.py @@ -1,7 +1,6 @@ from __future__ import annotations from collections import Counter -from pathlib import Path from typing import TYPE_CHECKING from celery.app import shared_task @@ -78,7 +77,7 @@ def process_s3_log_file_task(bucket: LogBucket, s3_log_key: str) -> None: # short circuit if the log file has already been processed. note that this doesn't guarantee # exactly once processing, that's what the unique constraint on ProcessedS3Log is for. - if ProcessedS3Log.objects.filter(name=Path(s3_log_key).name, embargoed=embargoed).exists(): + if ProcessedS3Log.objects.filter(name=s3_log_key.split('/')[-1], embargoed=embargoed).exists(): return s3 = get_boto_client(get_storage() if not embargoed else get_embargo_storage()) @@ -94,7 +93,7 @@ def process_s3_log_file_task(bucket: LogBucket, s3_log_key: str) -> None: with transaction.atomic(): try: - log = ProcessedS3Log(name=Path(s3_log_key).name, embargoed=embargoed) + log = ProcessedS3Log(name=s3_log_key.split('/')[-1], embargoed=embargoed) # disable constraint validation checking so duplicate errors can be detected and # ignored. the rest of the full_clean errors should still be raised. log.full_clean(validate_constraints=False) diff --git a/dandiapi/api/tests/test_asset.py b/dandiapi/api/tests/test_asset.py index f1f18ccb3..715c5fc82 100644 --- a/dandiapi/api/tests/test_asset.py +++ b/dandiapi/api/tests/test_asset.py @@ -1,7 +1,6 @@ from __future__ import annotations import json -import os.path from uuid import uuid4 from django.conf import settings @@ -1351,7 +1350,7 @@ def test_asset_download(api_client, storage, version, asset): download = requests.get(download_url, timeout=5) cd_header = download.headers.get('Content-Disposition') - assert cd_header == f'attachment; filename="{os.path.basename(asset.path)}"' + assert cd_header == f'attachment; filename="{asset.path.split("/")[-1]}"' with asset.blob.blob.file.open('rb') as reader: assert download.content == reader.read() @@ -1397,7 +1396,7 @@ def test_asset_download_embargo( download = requests.get(download_url, timeout=5) cd_header = download.headers.get('Content-Disposition') - assert cd_header == f'attachment; filename="{os.path.basename(asset.path)}"' + assert cd_header == f'attachment; filename="{asset.path.split("/")[-1]}"' with asset.embargoed_blob.blob.file.open('rb') as reader: assert download.content == reader.read() @@ -1432,7 +1431,7 @@ def test_asset_direct_download(api_client, storage, version, asset): download = requests.get(download_url, timeout=5) cd_header = download.headers.get('Content-Disposition') - assert cd_header == f'attachment; filename="{os.path.basename(asset.path)}"' + assert cd_header == f'attachment; filename="{asset.path.split("/")[-1]}"' with asset.blob.blob.file.open('rb') as reader: assert download.content == reader.read() @@ -1464,7 +1463,7 @@ def test_asset_direct_download_head(api_client, storage, version, asset): download = requests.get(download_url, timeout=5) cd_header = download.headers.get('Content-Disposition') - assert cd_header == f'attachment; filename="{os.path.basename(asset.path)}"' + assert cd_header == f'attachment; filename="{asset.path.split("/")[-1]}"' with asset.blob.blob.file.open('rb') as reader: assert download.content == reader.read() diff --git a/dandiapi/api/tests/test_upload.py b/dandiapi/api/tests/test_upload.py index 6bd3abf49..ee0be5e30 100644 --- a/dandiapi/api/tests/test_upload.py +++ b/dandiapi/api/tests/test_upload.py @@ -1,6 +1,5 @@ from __future__ import annotations -import os import uuid from django.core.files.base import ContentFile @@ -538,7 +537,7 @@ def test_upload_validate_wrong_size(api_client, user, upload): api_client.force_authenticate(user=user) wrong_content = b'not 100 bytes' - upload.blob.save(os.path.basename(upload.blob.name), ContentFile(wrong_content)) + upload.blob.save(upload.blob.name.split('/')[-1], ContentFile(wrong_content)) resp = api_client.post(f'/api/uploads/{upload.upload_id}/validate/') assert resp.status_code == 400 diff --git a/dandiapi/api/views/asset.py b/dandiapi/api/views/asset.py index 8bb97b2cb..34d35e8f1 100644 --- a/dandiapi/api/views/asset.py +++ b/dandiapi/api/views/asset.py @@ -22,7 +22,6 @@ # This should only be used for type interrogation, never instantiation MinioStorage = type('FakeMinioStorage', (), {}) -import os.path from typing import TYPE_CHECKING from django.conf import settings @@ -146,7 +145,7 @@ def download(self, request, *args, **kwargs): serializer.is_valid(raise_exception=True) content_disposition = serializer.validated_data['content_disposition'] content_type = asset.metadata.get('encodingFormat', 'application/octet-stream') - asset_basename = os.path.basename(asset.path) + asset_basename = asset.path.split('/')[-1] if content_disposition == 'attachment': return HttpResponseRedirect( diff --git a/dandiapi/zarr/models.py b/dandiapi/zarr/models.py index be4291ac2..50cb8a571 100644 --- a/dandiapi/zarr/models.py +++ b/dandiapi/zarr/models.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging -from typing import TYPE_CHECKING from urllib.parse import urlparse, urlunparse from uuid import uuid4 @@ -13,9 +12,6 @@ from dandiapi.api.models import Dandiset from dandiapi.api.storage import get_embargo_storage, get_storage -if TYPE_CHECKING: - from pathlib import Path - logger = logging.getLogger(name=__name__) @@ -103,7 +99,7 @@ class ZarrArchive(BaseZarrArchive): storage = get_storage() dandiset = models.ForeignKey(Dandiset, related_name='zarr_archives', on_delete=models.CASCADE) - def s3_path(self, zarr_path: str | Path): + def s3_path(self, zarr_path: str) -> str: """Generate a full S3 object path from a path in this zarr_archive.""" return ( f'{settings.DANDI_DANDISETS_BUCKET_PREFIX}{settings.DANDI_ZARR_PREFIX_NAME}/' @@ -117,7 +113,7 @@ class EmbargoedZarrArchive(BaseZarrArchive): Dandiset, related_name='embargoed_zarr_archives', on_delete=models.CASCADE ) - def s3_path(self, zarr_path: str | Path): + def s3_path(self, zarr_path: str) -> str: """Generate a full S3 object path from a path in this zarr_archive.""" return ( f'{settings.DANDI_DANDISETS_EMBARGO_BUCKET_PREFIX}{settings.DANDI_ZARR_PREFIX_NAME}/' diff --git a/dandiapi/zarr/tests/test_zarr.py b/dandiapi/zarr/tests/test_zarr.py index dfbc651d4..93c91b728 100644 --- a/dandiapi/zarr/tests/test_zarr.py +++ b/dandiapi/zarr/tests/test_zarr.py @@ -237,7 +237,7 @@ def test_zarr_rest_delete_file( f'/api/zarr/{zarr_archive.zarr_id}/files/', [{'path': str(zarr_file.path)}] ) assert resp.status_code == 204 - assert not zarr_archive.storage.exists(zarr_archive.s3_path(zarr_file.path)) + assert not zarr_archive.storage.exists(zarr_archive.s3_path(str(zarr_file.path))) # Assert zarr is back in pending state zarr_archive.refresh_from_db() @@ -336,7 +336,7 @@ def test_zarr_rest_delete_multiple_files( # Assert not found for file in zarr_files: - assert not zarr_archive.storage.exists(zarr_archive.s3_path(file)) + assert not zarr_archive.storage.exists(zarr_archive.s3_path(str(file.path))) ingest_zarr_archive(zarr_archive.zarr_id) zarr_archive.refresh_from_db() @@ -369,7 +369,7 @@ def test_zarr_rest_delete_missing_file( assert resp.json() == [ f'File test-prefix/test-zarr/{zarr_archive.zarr_id}/does/not/exist does not exist.' ] - assert zarr_archive.storage.exists(zarr_archive.s3_path(zarr_file.path)) + assert zarr_archive.storage.exists(zarr_archive.s3_path(str(zarr_file.path))) # Ingest zarr_archive.status = ZarrArchiveStatus.UPLOADED diff --git a/dandiapi/zarr/views/__init__.py b/dandiapi/zarr/views/__init__.py index 8c5a60585..61a1057c6 100644 --- a/dandiapi/zarr/views/__init__.py +++ b/dandiapi/zarr/views/__init__.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging -from pathlib import Path from typing import TYPE_CHECKING from django.db import IntegrityError, transaction @@ -196,17 +195,18 @@ def files(self, request, zarr_id: str): serializer.is_valid(raise_exception=True) # The root path for this zarr in s3 - base_path = Path(zarr_archive.s3_path('')) + # This will contain a trailing slash, due to the empty string argument + base_path = zarr_archive.s3_path('') # Retrieve and join query params limit = serializer.validated_data['limit'] download = serializer.validated_data['download'] - raw_prefix = serializer.validated_data['prefix'] - full_prefix = str(base_path / raw_prefix) + raw_prefix = serializer.validated_data['prefix'].lstrip('/') + full_prefix = (base_path + raw_prefix).rstrip('/') - _after = serializer.validated_data['after'] - after = str(base_path / _after) if _after else '' + raw_after = serializer.validated_data['after'].lstrip('/') + after = (base_path + raw_after).rstrip('/') if raw_after else '' # Handle head request redirects if request.method == 'HEAD': @@ -232,7 +232,7 @@ def files(self, request, zarr_id: str): # Map/filter listing results = [ { - 'Key': str(Path(obj['Key']).relative_to(base_path)), + 'Key': obj['Key'].removeprefix(base_path), 'LastModified': obj['LastModified'], 'ETag': obj['ETag'].strip('"'), 'Size': obj['Size'],