From ff5af87f7bc36d3a4c5df9178bb0bcc45f515b76 Mon Sep 17 00:00:00 2001 From: Wei Ouyang Date: Thu, 31 Oct 2024 10:32:28 -0700 Subject: [PATCH] Use parent artifact config to store zenodo token (#706) Use parent artifacts to store zenodo token --- docs/artifact-manager.md | 22 ++++++++++- hypha/artifact.py | 82 ++++++++++++++++++++++------------------ hypha/server.py | 16 -------- tests/conftest.py | 2 - tests/test_artifact.py | 21 +++++++++- 5 files changed, 84 insertions(+), 59 deletions(-) diff --git a/docs/artifact-manager.md b/docs/artifact-manager.md index 133f57e5..5d968b29 100644 --- a/docs/artifact-manager.md +++ b/docs/artifact-manager.md @@ -213,7 +213,7 @@ print("Valid dataset committed.") ## API References -### `create(prefix: str, manifest: dict, permissions: dict=None, config: dict=None, stage: bool = False, orphan: bool = False) -> None` +### `create(prefix: str, manifest: dict, permissions: dict=None, config: dict=None, stage: bool = False, orphan: bool = False, publish_to: str = None) -> None` Creates a new artifact or collection with the specified manifest. The artifact is staged until committed. For collections, the `collection` field should be an empty list. @@ -231,10 +231,11 @@ Creates a new artifact or collection with the specified manifest. The artifact i - `collection_schema`: Optional. A JSON schema that defines the structure of child artifacts in the collection. This schema is used to validate child artifacts when they are created or edited. If a child artifact does not conform to the schema, the creation or edit operation will fail. - `summary_fields`: Optional. A list of fields to include in the summary for each child artifact when calling `list(prefix)`. If not specified, the default summary fields (`id`, `type`, `name`) are used. To include all the fields in the summary, add `"*"` to the list. If you want to include internal fields such as `.created_at`, `.last_modified`, or other download/view statistics such as `.download_count`, you can also specify them individually in the `summary_fields`. If you want to include all fields, you can add `".*"` to the list. - `id_parts`: Optional. A dictionary of id name parts to be used in generating the id for child artifacts. For example: `{"animals": ["dog", "cat", ...], "colors": ["red", "blue", ...]}`. This can be used for creating child artifacts with auto-generated ids based on the id parts. For example, when calling `create`, you can specify the prefix as `collections/my-collection/{colors}-{animals}`, and the id will be generated based on the id parts, e.g., `collections/my-collection/red-dog`. - + - `archives`: Optional. Configurations for the public archive servers such as Zenodo for the collection. For example, `"archives": {"sandbox_zenodo": {"access_token": "your sandbox zenodo token"}}, "zenodo": {"access_token": "your zenodo token"}}`. This is used for publishing artifacts to Zenodo. - `permissions`: Optional. A dictionary containing user permissions. For example `{"*": "r+"}` gives read and create access to everyone, `{"@": "rw+"}` allows all authenticated users to read/write/create, and `{"user_id_1": "r+"}` grants read and create permissions to a specific user. You can also set permissions for specific operations, such as `{"user_id_1": ["read", "create"]}`. See detailed explanation about permissions below. - `stage`: Optional. A boolean flag to stage the artifact. Default is `False`. If it's set to `True`, the artifact will be staged and not committed. You will need to call `commit()` to finalize the artifact. - `orphan`: Optional. A boolean flag to create the artifact without a parent collection. Default is `False`. If `True`, the artifact will not be associated with any collection. This is mainly used for creating top-level collections, and making sure the artifact is not associated with any parent collection (with inheritance of permissions). To create an orphan artifact, you will need write permissions on the workspace. +- `publish_to`: Optional. A string specifying the target platform to publish the artifact. Supported values are `zenodo` and `sandbox_zenodo`. If set, the artifact will be published to the specified platform. The artifact must have a valid Zenodo metadata schema to be published. **Note 1: If you set `stage=True`, you must call `commit()` to finalize the artifact.** @@ -550,6 +551,23 @@ await artifact_manager.reset_stats(prefix="collections/dataset-gallery/example-d --- +### `publish(prefix: str, to: str = None) -> None` + +Publishes the artifact to a specified platform, such as Zenodo. The artifact must have a valid Zenodo metadata schema to be published. + +**Parameters:** + +- `prefix`: The path of the artifact, it can be a prefix relative to the current workspace (e.g., `"collections/dataset-gallery/example-dataset"`) or an absolute prefix with the workspace id (e.g., `"/my_workspace_id/collections/dataset-gallery/example-dataset"`). +- `to`: Optional, the target platform to publish the artifact. Supported values are `zenodo` and `sandbox_zenodo`. This parameter should be the same as the `publish_to` parameter in the `create` function or left empty to use the platform specified in the artifact's metadata. + +**Example:** + +```python +await artifact_manager.publish(prefix="collections/dataset-gallery/example-dataset", to="sandbox_zenodo") +``` + +--- + ## Example Usage of API Functions Here's a simple sequence of API function calls to manage a dataset gallery: diff --git a/hypha/artifact.py b/hypha/artifact.py index 6c8d245a..fb95cf8b 100644 --- a/hypha/artifact.py +++ b/hypha/artifact.py @@ -21,6 +21,7 @@ or_, ) from hypha.utils import remove_objects_async, list_objects_async, safe_join +from hypha.utils.zenodo import ZenodoClient, Deposition from botocore.exceptions import ClientError from sqlalchemy import update from sqlalchemy.ext.declarative import declarative_base @@ -39,7 +40,6 @@ ApplicationArtifact, WorkspaceInfo, ) -from hypha.utils.zenodo import ZenodoClient, Deposition from hypha_rpc.utils import ObjectProxy from jsonschema import validate from typing import Union, List @@ -142,8 +142,6 @@ def __init__( self, store, s3_controller, - zenodo_token=None, - sandbox_zenodo_token=None, workspace_bucket="hypha-workspaces", ): """Set up controller with SQLAlchemy database and S3 for file storage.""" @@ -154,17 +152,6 @@ def __init__( self.s3_controller = s3_controller self.workspace_bucket = workspace_bucket self.store = store - if zenodo_token: - self.zenodo_client = ZenodoClient(zenodo_token, "https://zenodo.org") - else: - self.zenodo_client = None - - if sandbox_zenodo_token: - self.sandbox_zenodo_client = ZenodoClient( - sandbox_zenodo_token, "https://sandbox.zenodo.org" - ) - else: - self.sandbox_zenodo_client = None router = APIRouter() @router.get("/{workspace}/artifacts/{prefix:path}") @@ -254,8 +241,6 @@ async def get_artifact( self.store.set_artifact_manager(self) artifact_service = self.get_artifact_service() - if self.zenodo_client or self.sandbox_zenodo_client: - artifact_service["publish"] = self.publish self.store.register_public_service(artifact_service) self.store.register_router(router) @@ -550,6 +535,31 @@ async def _get_artifact_with_permission( f"User does not have permission to perform the operation '{operation}' on the artifact." ) + def _get_zenodo_client(self, parent_artifact, publish_to): + archives_config = parent_artifact.config.get("archives") + if publish_to == "zenodo": + if ( + "zenodo" not in archives_config + or "access_token" not in archives_config["zenodo"] + ): + raise ValueError("Zenodo access token is not configured.") + zenodo_token = archives_config["zenodo"]["access_token"] + zenodo_client = ZenodoClient(zenodo_token, "https://zenodo.org") + assert zenodo_client, "Zenodo access token is not configured." + elif publish_to == "sandbox_zenodo": + if ( + "sandbox_zenodo" not in archives_config + or "access_token" not in archives_config["sandbox_zenodo"] + ): + raise ValueError("Sandbox Zenodo access token is not configured.") + sandbox_zenodo_token = archives_config["sandbox_zenodo"]["access_token"] + zenodo_client = ZenodoClient( + sandbox_zenodo_token, "https://sandbox.zenodo.org" + ) + else: + raise ValueError(f"Publishing to '{publish_to}' is not supported.") + return zenodo_client + async def create( self, prefix, @@ -640,18 +650,8 @@ async def create( ) if publish_to: - if publish_to == "zenodo": - zenodo_client = self.zenodo_client - assert zenodo_client, "Zenodo access token is not configured." - elif publish_to == "sandbox_zenodo": - zenodo_client = self.sandbox_zenodo_client - assert ( - zenodo_client - ), "Sandbox Zenodo access token is not configured." - else: - raise ValueError( - f"Publishing to '{publish_to}' is not supported." - ) + assert parent_artifact, "A parent artifact is required" + zenodo_client = self._get_zenodo_client(parent_artifact, publish_to) deposition_info = await zenodo_client.create_deposition() if config: assert ( @@ -1476,6 +1476,23 @@ async def publish( increment_view_count=False, include_metadata=True, ) + + parent_prefix = "/".join(prefix.split("/")[:-1]) + assert parent_prefix, "Parent artifact not found." + session = await self._get_session() + try: + async with session.begin(): + parent_artifact = await self._get_artifact(session, ws, parent_prefix) + zenodo_client = self._get_zenodo_client(parent_artifact, to) + if not parent_artifact: + raise ValueError( + f"Parent artifact under prefix '{parent_prefix}' not found." + ) + except Exception as e: + raise e + finally: + await session.close() + config = artifact.config or {} if config and "zenodo" in config: # Infer the target Zenodo instance from the existing config @@ -1493,15 +1510,6 @@ async def publish( ), "Cannot publish to Sandbox Zenodo from Zenodo." to = "zenodo" - if to == "zenodo": - zenodo_client = self.zenodo_client - assert zenodo_client, "Zenodo access token is not configured." - elif to == "sandbox_zenodo": - zenodo_client = self.sandbox_zenodo_client - assert zenodo_client, "Sandbox Zenodo access token is not configured." - else: - raise ValueError(f"Publishing to '{to}' is not supported.") - if "zenodo" in config: deposition_id = config["zenodo"]["id"] deposition_info = await zenodo_client.load_deposition(deposition_id) diff --git a/hypha/server.py b/hypha/server.py index 60b0d0df..0ff892e0 100644 --- a/hypha/server.py +++ b/hypha/server.py @@ -103,8 +103,6 @@ def start_builtin_services( artifact_manager = ArtifactController( store, s3_controller=s3_controller, - zenodo_token=args.zenodo_token, - sandbox_zenodo_token=args.sandbox_zenodo_token, workspace_bucket=args.workspace_bucket, ) @@ -417,20 +415,6 @@ def get_argparser(add_help=True): help="enable S3 proxy for serving pre-signed URLs", ) - parser.add_argument( - "--zenodo-token", - type=str, - default=None, - help="Access token for zenodo: https://zenodo.org", - ) - - parser.add_argument( - "--sandbox-zenodo-token", - type=str, - default=None, - help="Access token for sandbox version of zenodo: https://sandbox.zenodo.org", - ) - return parser diff --git a/tests/conftest.py b/tests/conftest.py index 475f1db3..2edf78e4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -171,8 +171,6 @@ def fastapi_server_fixture(minio_server): f"--access-key-id={MINIO_ROOT_USER}", f"--secret-access-key={MINIO_ROOT_PASSWORD}", f"--endpoint-url-public={MINIO_SERVER_URL_PUBLIC}", - f"--zenodo-token={os.environ.get('ZENODO_TOKEN', '')}", - f"--sandbox-zenodo-token={os.environ.get('SANDBOX_ZENODO_TOKEN', '')}", "--enable-s3-proxy", f"--workspace-bucket=my-workspaces", "--s3-admin-type=minio", diff --git a/tests/test_artifact.py b/tests/test_artifact.py index 5ca1df4a..2351dfdf 100644 --- a/tests/test_artifact.py +++ b/tests/test_artifact.py @@ -1,7 +1,7 @@ """Test Artifact services.""" import pytest import requests -import time +import os from hypha_rpc import connect_to_server from . import SERVER_URL, find_item @@ -241,6 +241,24 @@ async def test_publish_artifact(minio_server, fastapi_server): """Test publishing an artifact.""" api = await connect_to_server({"name": "test-client", "server_url": SERVER_URL}) artifact_manager = await api.get_service("public/artifact-manager") + # Create a collection for testing publishing + collection_manifest = { + "name": "Publish Test Collection", + "description": "A collection to test publishing", + "type": "collection", + } + + access_token = os.environ.get("SANDBOX_ZENODO_TOKEN") + assert access_token, "Please set SANDBOX_ZENODO_TOKEN environment variable" + collection_config = {"archives": {"sandbox_zenodo": {"access_token": access_token}}} + await artifact_manager.create( + prefix="test-collection", + manifest=collection_manifest, + config=collection_config, + stage=False, + orphan=True, + ) + # Create an artifact (orphan) dataset_manifest = { "name": "Test Dataset", @@ -252,7 +270,6 @@ async def test_publish_artifact(minio_server, fastapi_server): prefix="test-collection/{zenodo_id}", manifest=dataset_manifest, stage=True, - orphan=True, publish_to="sandbox_zenodo", ) # add files