Skip to content

Commit

Permalink
Merge pull request #1638 from dandi/dynamic-metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
jjnesbitt authored Jul 19, 2023
2 parents 97e8742 + 4e25fad commit cc833d3
Show file tree
Hide file tree
Showing 19 changed files with 186 additions and 139 deletions.
9 changes: 0 additions & 9 deletions DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,6 @@ In API endpoint calls, add the `Authorization` HTTP header with a value of

For frequent deployment administration tasks, `django-extensions` provides a convenient way to write and run scripts that execute in the Django context.

### refresh_metadata

```
python manage.py refresh_metadata
```

This will save all `Version`s and `Asset`s, forcing them to recompute their metadata.
This is useful any time changes are made to the precomputed metadata in `_populate_metadata`.

### create_dev_dandiset

```
Expand Down
26 changes: 0 additions & 26 deletions dandiapi/api/management/commands/refresh_metadata.py

This file was deleted.

16 changes: 12 additions & 4 deletions dandiapi/api/manifests.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from rest_framework.renderers import JSONRenderer
import yaml

from dandiapi.api.models import AssetBlob, Version
from dandiapi.api.models import Asset, AssetBlob, Version
from dandiapi.api.storage import create_s3_storage


Expand Down Expand Up @@ -85,9 +85,11 @@ def write_dandiset_jsonld(version: Version):


def write_assets_jsonld(version: Version):
# Use full metadata when writing externally
assets_metadata = (asset.full_metadata for asset in version.assets.iterator())
with streaming_file_upload(assets_jsonld_path(version)) as stream:
stream.write('[')
for i, obj in enumerate(version.assets.values_list('metadata', flat=True).iterator()):
for i, obj in enumerate(assets_metadata):
if i > 0:
stream.write(',')
stream.write(JSONRenderer().render(obj).decode())
Expand Down Expand Up @@ -116,11 +118,17 @@ def _yaml_dump_sequence_from_generator(stream, generator):
def write_assets_yaml(version: Version):
with streaming_file_upload(assets_yaml_path(version)) as stream:
_yaml_dump_sequence_from_generator(
stream, version.assets.values_list('metadata', flat=True).order_by('created').iterator()
stream,
# Use full metadata when writing externally
(asset.full_metadata for asset in version.assets.order_by('created').iterator()),
)


def write_collection_jsonld(version: Version):
asset_ids = [
Asset.dandi_asset_id(asset_id)
for asset_id in version.assets.values_list('asset_id', flat=True)
]
with streaming_file_upload(collection_jsonld_path(version)) as stream:
stream.write(
JSONRenderer()
Expand All @@ -129,7 +137,7 @@ def write_collection_jsonld(version: Version):
'@context': version.metadata['@context'],
'id': version.metadata['id'],
'@type': 'prov:Collection',
'hasMember': list(version.assets.values_list('metadata__id', flat=True)),
'hasMember': asset_ids,
},
)
.decode()
Expand Down
22 changes: 22 additions & 0 deletions dandiapi/api/migrations/0042_asset_remove_computed_fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 4.1.1 on 2023-06-27 17:29

from django.db import migrations, models

from dandiapi.api.models.asset import ASSET_COMPUTED_FIELDS


def remove_unpublished_asset_fields(apps, schema_editor):
Asset = apps.get_model('api', 'Asset')
Asset.objects.filter(published=False, metadata__has_any_keys=ASSET_COMPUTED_FIELDS).update(
metadata=models.F('metadata') - ASSET_COMPUTED_FIELDS
)


class Migration(migrations.Migration):
dependencies = [
('api', '0041_assetblob_download_count_and_more'),
]

operations = [
migrations.RunPython(remove_unpublished_asset_fields),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Generated by Django 4.1.1 on 2023-06-27 17:50

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
('api', '0042_asset_remove_computed_fields'),
]

operations = [
migrations.AddConstraint(
model_name='asset',
constraint=models.CheckConstraint(
check=models.Q(
models.Q(
('published', False),
models.Q(
(
'metadata__has_any_keys',
[
'id',
'path',
'identifier',
'contentUrl',
'contentSize',
'digest',
'datePublished',
'publishedBy',
],
),
_negated=True,
),
),
models.Q(
('published', True),
(
'metadata__has_keys',
[
'id',
'path',
'identifier',
'contentUrl',
'contentSize',
'digest',
'datePublished',
'publishedBy',
],
),
),
_connector='OR',
),
name='asset_metadata_no_computed_keys_or_published',
),
),
]
55 changes: 29 additions & 26 deletions dandiapi/api/models/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@

ASSET_CHARS_REGEX = r'[A-z0-9(),&\s#+~_=-]'
ASSET_PATH_REGEX = fr'^({ASSET_CHARS_REGEX}?\/?\.?{ASSET_CHARS_REGEX})+$'
ASSET_COMPUTED_FIELDS = [
'id',
'path',
'identifier',
'contentUrl',
'contentSize',
'digest',
'datePublished',
'publishedBy',
]


def validate_asset_path(path: str):
Expand Down Expand Up @@ -167,6 +177,15 @@ class Meta:
models.CheckConstraint(
name='asset_path_no_leading_slash', check=~Q(path__startswith='/')
),
# Ensure that if the asset is published, its metadata must contain the computed fields
# Otherwise, ensure its metadata contains none of the computed fields
models.CheckConstraint(
name='asset_metadata_no_computed_keys_or_published',
check=(
(Q(published=False) & ~Q(metadata__has_any_keys=ASSET_COMPUTED_FIELDS))
| (Q(published=True) & Q(metadata__has_keys=ASSET_COMPUTED_FIELDS))
),
),
]

@property
Expand Down Expand Up @@ -249,24 +268,22 @@ def is_different_from(

return False

def _populate_metadata(self):
@staticmethod
def dandi_asset_id(asset_id: str | uuid.UUID):
return f'dandiasset:{asset_id}'

@property
def full_metadata(self):
download_url = settings.DANDI_API_URL + reverse(
'asset-download',
kwargs={'asset_id': str(self.asset_id)},
)
if self.is_blob:
s3_url = self.blob.s3_url
elif self.is_embargoed_blob:
s3_url = self.embargoed_blob.s3_url
else:
s3_url = self.zarr.s3_url

metadata = {
**self.metadata,
'id': f'dandiasset:{self.asset_id}',
'id': self.dandi_asset_id(self.asset_id),
'path': self.path,
'identifier': str(self.asset_id),
'contentUrl': [download_url, s3_url],
'contentUrl': [download_url, self.s3_url],
'contentSize': self.size,
'digest': self.digest,
}
Expand All @@ -284,29 +301,15 @@ def published_metadata(self):
now = datetime.datetime.now(datetime.timezone.utc)
# Inject the publishedBy and datePublished fields
return {
**self.metadata,
**self.full_metadata,
'publishedBy': self.published_by(now),
'datePublished': now.isoformat(),
}

def save(self, *args, **kwargs):
self.metadata = self._populate_metadata()
super().save(*args, **kwargs)

@classmethod
def strip_metadata(cls, metadata):
"""Strip away computed fields from a metadata dict."""
computed_fields = [
'id',
'path',
'identifier',
'contentUrl',
'contentSize',
'digest',
'datePublished',
'publishedBy',
]
return {key: metadata[key] for key in metadata if key not in computed_fields}
return {key: metadata[key] for key in metadata if key not in ASSET_COMPUTED_FIELDS}

def __str__(self) -> str:
return self.path
Expand Down
11 changes: 0 additions & 11 deletions dandiapi/api/services/asset/metadata.py

This file was deleted.

14 changes: 5 additions & 9 deletions dandiapi/api/services/metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,7 @@ def validate_asset_metadata(*, asset: Asset) -> bool:

updated_asset = Asset.objects.filter(
id=asset.id, status=asset_state, metadata=asset.metadata, published=False
).update(
status=asset.status,
validation_errors=asset.validation_errors,
# include metadata in update since we're bypassing .save()
metadata=asset._populate_metadata(),
)
).update(status=asset.status, validation_errors=asset.validation_errors)
if updated_asset:
# Update modified timestamps on all draft versions this asset belongs to
asset.versions.filter(version='draft').update(modified=timezone.now())
Expand All @@ -84,9 +79,10 @@ def version_aggregate_assets_summary(version: Version) -> None:
raise VersionHasBeenPublished()

version.metadata['assetsSummary'] = aggregate_assets_summary(
version.assets.filter(status=Asset.Status.VALID)
.values_list('metadata', flat=True)
.iterator()
(
asset.full_metadata
for asset in version.assets.filter(status=Asset.Status.VALID).iterator()
)
)

Version.objects.filter(id=version.id, version='draft').update(
Expand Down
2 changes: 2 additions & 0 deletions dandiapi/api/services/publish/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ def _publish_dandiset(dandiset_id: int) -> None:
for draft_asset in draft_assets.iterator():
publish_asset(asset=draft_asset)

# Since all assets in new_version are published, their metadata is already compliant,
# and there is no need to use `.full_metadata`
new_version.metadata['assetsSummary'] = aggregate_assets_summary(
new_version.assets.values_list('metadata', flat=True).iterator()
)
Expand Down
Loading

0 comments on commit cc833d3

Please sign in to comment.