Skip to content

Commit

Permalink
Merge pull request #1782 from dandi/filesystem-url
Browse files Browse the repository at this point in the history
Don't use filesystem APIs to manipulate URLs
  • Loading branch information
brianhelba authored Feb 9, 2024
2 parents 1e7419f + c527d4b commit fa82455
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 28 deletions.
5 changes: 2 additions & 3 deletions dandiapi/analytics/tasks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

from collections import Counter
from pathlib import Path
from typing import TYPE_CHECKING

from celery.app import shared_task
Expand Down Expand Up @@ -78,7 +77,7 @@ def process_s3_log_file_task(bucket: LogBucket, s3_log_key: str) -> None:

# short circuit if the log file has already been processed. note that this doesn't guarantee
# exactly once processing, that's what the unique constraint on ProcessedS3Log is for.
if ProcessedS3Log.objects.filter(name=Path(s3_log_key).name, embargoed=embargoed).exists():
if ProcessedS3Log.objects.filter(name=s3_log_key.split('/')[-1], embargoed=embargoed).exists():
return

s3 = get_boto_client(get_storage() if not embargoed else get_embargo_storage())
Expand All @@ -94,7 +93,7 @@ def process_s3_log_file_task(bucket: LogBucket, s3_log_key: str) -> None:

with transaction.atomic():
try:
log = ProcessedS3Log(name=Path(s3_log_key).name, embargoed=embargoed)
log = ProcessedS3Log(name=s3_log_key.split('/')[-1], embargoed=embargoed)
# disable constraint validation checking so duplicate errors can be detected and
# ignored. the rest of the full_clean errors should still be raised.
log.full_clean(validate_constraints=False)
Expand Down
9 changes: 4 additions & 5 deletions dandiapi/api/tests/test_asset.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import json
import os.path
from uuid import uuid4

from django.conf import settings
Expand Down Expand Up @@ -1351,7 +1350,7 @@ def test_asset_download(api_client, storage, version, asset):
download = requests.get(download_url, timeout=5)
cd_header = download.headers.get('Content-Disposition')

assert cd_header == f'attachment; filename="{os.path.basename(asset.path)}"'
assert cd_header == f'attachment; filename="{asset.path.split("/")[-1]}"'

with asset.blob.blob.file.open('rb') as reader:
assert download.content == reader.read()
Expand Down Expand Up @@ -1397,7 +1396,7 @@ def test_asset_download_embargo(
download = requests.get(download_url, timeout=5)
cd_header = download.headers.get('Content-Disposition')

assert cd_header == f'attachment; filename="{os.path.basename(asset.path)}"'
assert cd_header == f'attachment; filename="{asset.path.split("/")[-1]}"'

with asset.embargoed_blob.blob.file.open('rb') as reader:
assert download.content == reader.read()
Expand Down Expand Up @@ -1432,7 +1431,7 @@ def test_asset_direct_download(api_client, storage, version, asset):
download = requests.get(download_url, timeout=5)
cd_header = download.headers.get('Content-Disposition')

assert cd_header == f'attachment; filename="{os.path.basename(asset.path)}"'
assert cd_header == f'attachment; filename="{asset.path.split("/")[-1]}"'

with asset.blob.blob.file.open('rb') as reader:
assert download.content == reader.read()
Expand Down Expand Up @@ -1464,7 +1463,7 @@ def test_asset_direct_download_head(api_client, storage, version, asset):
download = requests.get(download_url, timeout=5)
cd_header = download.headers.get('Content-Disposition')

assert cd_header == f'attachment; filename="{os.path.basename(asset.path)}"'
assert cd_header == f'attachment; filename="{asset.path.split("/")[-1]}"'

with asset.blob.blob.file.open('rb') as reader:
assert download.content == reader.read()
Expand Down
3 changes: 1 addition & 2 deletions dandiapi/api/tests/test_upload.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import os
import uuid

from django.core.files.base import ContentFile
Expand Down Expand Up @@ -538,7 +537,7 @@ def test_upload_validate_wrong_size(api_client, user, upload):
api_client.force_authenticate(user=user)

wrong_content = b'not 100 bytes'
upload.blob.save(os.path.basename(upload.blob.name), ContentFile(wrong_content))
upload.blob.save(upload.blob.name.split('/')[-1], ContentFile(wrong_content))

resp = api_client.post(f'/api/uploads/{upload.upload_id}/validate/')
assert resp.status_code == 400
Expand Down
3 changes: 1 addition & 2 deletions dandiapi/api/views/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
# This should only be used for type interrogation, never instantiation
MinioStorage = type('FakeMinioStorage', (), {})

import os.path
from typing import TYPE_CHECKING

from django.conf import settings
Expand Down Expand Up @@ -146,7 +145,7 @@ def download(self, request, *args, **kwargs):
serializer.is_valid(raise_exception=True)
content_disposition = serializer.validated_data['content_disposition']
content_type = asset.metadata.get('encodingFormat', 'application/octet-stream')
asset_basename = os.path.basename(asset.path)
asset_basename = asset.path.split('/')[-1]

if content_disposition == 'attachment':
return HttpResponseRedirect(
Expand Down
8 changes: 2 additions & 6 deletions dandiapi/zarr/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import logging
from typing import TYPE_CHECKING
from urllib.parse import urlparse, urlunparse
from uuid import uuid4

Expand All @@ -13,9 +12,6 @@
from dandiapi.api.models import Dandiset
from dandiapi.api.storage import get_embargo_storage, get_storage

if TYPE_CHECKING:
from pathlib import Path

logger = logging.getLogger(name=__name__)


Expand Down Expand Up @@ -103,7 +99,7 @@ class ZarrArchive(BaseZarrArchive):
storage = get_storage()
dandiset = models.ForeignKey(Dandiset, related_name='zarr_archives', on_delete=models.CASCADE)

def s3_path(self, zarr_path: str | Path):
def s3_path(self, zarr_path: str) -> str:
"""Generate a full S3 object path from a path in this zarr_archive."""
return (
f'{settings.DANDI_DANDISETS_BUCKET_PREFIX}{settings.DANDI_ZARR_PREFIX_NAME}/'
Expand All @@ -117,7 +113,7 @@ class EmbargoedZarrArchive(BaseZarrArchive):
Dandiset, related_name='embargoed_zarr_archives', on_delete=models.CASCADE
)

def s3_path(self, zarr_path: str | Path):
def s3_path(self, zarr_path: str) -> str:
"""Generate a full S3 object path from a path in this zarr_archive."""
return (
f'{settings.DANDI_DANDISETS_EMBARGO_BUCKET_PREFIX}{settings.DANDI_ZARR_PREFIX_NAME}/'
Expand Down
6 changes: 3 additions & 3 deletions dandiapi/zarr/tests/test_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def test_zarr_rest_delete_file(
f'/api/zarr/{zarr_archive.zarr_id}/files/', [{'path': str(zarr_file.path)}]
)
assert resp.status_code == 204
assert not zarr_archive.storage.exists(zarr_archive.s3_path(zarr_file.path))
assert not zarr_archive.storage.exists(zarr_archive.s3_path(str(zarr_file.path)))

# Assert zarr is back in pending state
zarr_archive.refresh_from_db()
Expand Down Expand Up @@ -336,7 +336,7 @@ def test_zarr_rest_delete_multiple_files(

# Assert not found
for file in zarr_files:
assert not zarr_archive.storage.exists(zarr_archive.s3_path(file))
assert not zarr_archive.storage.exists(zarr_archive.s3_path(str(file.path)))

ingest_zarr_archive(zarr_archive.zarr_id)
zarr_archive.refresh_from_db()
Expand Down Expand Up @@ -369,7 +369,7 @@ def test_zarr_rest_delete_missing_file(
assert resp.json() == [
f'File test-prefix/test-zarr/{zarr_archive.zarr_id}/does/not/exist does not exist.'
]
assert zarr_archive.storage.exists(zarr_archive.s3_path(zarr_file.path))
assert zarr_archive.storage.exists(zarr_archive.s3_path(str(zarr_file.path)))

# Ingest
zarr_archive.status = ZarrArchiveStatus.UPLOADED
Expand Down
14 changes: 7 additions & 7 deletions dandiapi/zarr/views/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import logging
from pathlib import Path
from typing import TYPE_CHECKING

from django.db import IntegrityError, transaction
Expand Down Expand Up @@ -196,17 +195,18 @@ def files(self, request, zarr_id: str):
serializer.is_valid(raise_exception=True)

# The root path for this zarr in s3
base_path = Path(zarr_archive.s3_path(''))
# This will contain a trailing slash, due to the empty string argument
base_path = zarr_archive.s3_path('')

# Retrieve and join query params
limit = serializer.validated_data['limit']
download = serializer.validated_data['download']

raw_prefix = serializer.validated_data['prefix']
full_prefix = str(base_path / raw_prefix)
raw_prefix = serializer.validated_data['prefix'].lstrip('/')
full_prefix = (base_path + raw_prefix).rstrip('/')

_after = serializer.validated_data['after']
after = str(base_path / _after) if _after else ''
raw_after = serializer.validated_data['after'].lstrip('/')
after = (base_path + raw_after).rstrip('/') if raw_after else ''

# Handle head request redirects
if request.method == 'HEAD':
Expand All @@ -232,7 +232,7 @@ def files(self, request, zarr_id: str):
# Map/filter listing
results = [
{
'Key': str(Path(obj['Key']).relative_to(base_path)),
'Key': obj['Key'].removeprefix(base_path),
'LastModified': obj['LastModified'],
'ETag': obj['ETag'].strip('"'),
'Size': obj['Size'],
Expand Down

0 comments on commit fa82455

Please sign in to comment.