Skip to content

Commit

Permalink
Merge pull request #1395 from Sage-Bionetworks/develop-annotations-op…
Browse files Browse the repository at this point in the history
…tional

feat: Added `file-annotations-upload` parameter when submitting a manifest
  • Loading branch information
linglp authored Apr 10, 2024
2 parents 00b28f2 + cd9d0da commit 77fbaf7
Show file tree
Hide file tree
Showing 8 changed files with 595 additions and 113 deletions.
9 changes: 9 additions & 0 deletions schematic/models/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,13 @@ def model(ctx, config): # use as `schematic model ...`
is_flag=True,
help=query_dict(model_commands, ("model", "validate", "restrict_rules")),
)
@click.option(
"--file_annotations_upload/--no-file_annotations_upload",
"-fa/-no-fa",
default=True,
is_flag=True,
help=query_dict(model_commands, ("model", "submit", "file_annotations_upload")),
)
@click.option(
"-ps",
"--project_scope",
Expand Down Expand Up @@ -147,6 +154,7 @@ def submit_manifest(
data_model_labels,
table_column_names,
annotation_keys,
file_annotations_upload: bool,
):
"""
Running CLI with manifest validation (optional) and submission options.
Expand All @@ -173,6 +181,7 @@ def submit_manifest(
table_manipulation=table_manipulation,
table_column_names=table_column_names,
annotation_keys=annotation_keys,
file_annotations_upload=file_annotations_upload,
)

if manifest_id:
Expand Down
6 changes: 6 additions & 0 deletions schematic/models/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ def submit_metadata_manifest(
restrict_rules: bool,
access_token: Optional[str] = None,
validate_component: Optional[str] = None,
file_annotations_upload: bool = True,
hide_blanks: bool = False,
project_scope: List = None,
table_manipulation: str = "replace",
Expand All @@ -336,6 +337,7 @@ def submit_metadata_manifest(
manifest_path: Path to the manifest file, which contains the metadata.
dataset_id: Synapse ID of the dataset on Synapse containing the metadata manifest file.
validate_component: Component from the schema.org schema based on which the manifest template has been generated.
file_annotations_upload (bool): Default to True. If false, do not add annotations to files.
Returns:
Manifest ID: If both validation and association were successful.
Exceptions:
Expand Down Expand Up @@ -389,6 +391,7 @@ def submit_metadata_manifest(
table_manipulation=table_manipulation,
table_column_names=table_column_names,
annotation_keys=annotation_keys,
file_annotations_upload=file_annotations_upload,
)
restrict_maniest = True

Expand All @@ -402,6 +405,7 @@ def submit_metadata_manifest(
table_manipulation=table_manipulation,
table_column_names=table_column_names,
annotation_keys=annotation_keys,
file_annotations_upload=file_annotations_upload,
)

logger.info(f"No validation errors occured during validation.")
Expand All @@ -424,6 +428,7 @@ def submit_metadata_manifest(
table_manipulation=table_manipulation,
table_column_names=table_column_names,
annotation_keys=annotation_keys,
file_annotations_upload=file_annotations_upload,
)
restrict_maniest = True

Expand All @@ -437,6 +442,7 @@ def submit_metadata_manifest(
table_manipulation=table_manipulation,
table_column_names=table_column_names,
annotation_keys=annotation_keys,
file_annotations_upload=file_annotations_upload,
)

logger.debug(
Expand Down
77 changes: 44 additions & 33 deletions schematic/store/synapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
# allows specifying explicit variable types
from typing import Dict, List, Tuple, Sequence, Union, Optional


from synapseclient import (
Synapse,
File,
Expand All @@ -46,7 +45,6 @@
SynapseHTTPError,
)
import synapseutils
from synapseutils.copy_functions import changeFileMetaData

from schematic_db.rdb.synapse_database import SynapseDatabase

Expand Down Expand Up @@ -1327,11 +1325,11 @@ def upload_manifest_file(
parent=datasetId,
name=file_name_new,
)

manifest_synapse_file_id = self.syn.store(
manifestSynapseFile, isRestricted=restrict_manifest
).id
changeFileMetaData(

synapseutils.copy_functions.changeFileMetaData(
syn=self.syn, entity=manifest_synapse_file_id, downloadAs=file_name_new
)

Expand Down Expand Up @@ -1677,11 +1675,11 @@ def add_annotations_to_entities_files(
name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain
display label formatting while ensuring the label is formatted properly for Synapse annotations.
Returns:
manifest (pd.DataFrame): modified to add entitiyId as appropriate.
manifest (pd.DataFrame): modified to add entitiyId as appropriate
"""

# Expected behavior is to annotate files if `Filename` is present regardless of `-mrt` setting
# Expected behavior is to annotate files if `Filename` is present and if file_annotations_upload is set to True regardless of `-mrt` setting
if "filename" in [col.lower() for col in manifest.columns]:
# get current list of files and store as dataframe
dataset_files = self.getFilesInStorageDataset(datasetId)
Expand Down Expand Up @@ -1733,6 +1731,7 @@ def upload_manifest_as_table(
table_manipulation: str,
table_column_names: str,
annotation_keys: str,
file_annotations_upload: bool = True,
):
"""Upload manifest to Synapse as a table and csv.
Args:
Expand All @@ -1752,6 +1751,7 @@ def upload_manifest_as_table(
annotation_keys: (str) display_label/class_label (default), Sets labeling syle for annotation keys. class_label will format the display
name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain
display label formatting while ensuring the label is formatted properly for Synapse annotations.
file_annotations_upload (bool): Default to True. If false, do not add annotations to files.
Return:
manifest_synapse_file_id: SynID of manifest csv uploaded to synapse.
"""
Expand All @@ -1766,15 +1766,16 @@ def upload_manifest_as_table(
table_column_names=table_column_names,
)

manifest = self.add_annotations_to_entities_files(
dmge,
manifest,
manifest_record_type,
datasetId,
hideBlanks,
manifest_synapse_table_id,
annotation_keys,
)
if file_annotations_upload:
manifest = self.add_annotations_to_entities_files(
dmge,
manifest,
manifest_record_type,
datasetId,
hideBlanks,
manifest_synapse_table_id,
annotation_keys,
)
# Load manifest to synapse as a CSV File
manifest_synapse_file_id = self.upload_manifest_file(
manifest,
Expand Down Expand Up @@ -1820,6 +1821,7 @@ def upload_manifest_as_csv(
hideBlanks,
component_name,
annotation_keys: str,
file_annotations_upload: bool = True,
):
"""Upload manifest to Synapse as a csv only.
Args:
Expand All @@ -1833,17 +1835,19 @@ def upload_manifest_as_csv(
annotation_keys: (str) display_label/class_label (default), Sets labeling syle for annotation keys. class_label will format the display
name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain
display label formatting while ensuring the label is formatted properly for Synapse annotations.
file_annotations_upload (bool): Default to True. If false, do not add annotations to files.
Return:
manifest_synapse_file_id (str): SynID of manifest csv uploaded to synapse.
"""
manifest = self.add_annotations_to_entities_files(
dmge,
manifest,
manifest_record_type,
datasetId,
hideBlanks,
annotation_keys=annotation_keys,
)
if file_annotations_upload:
manifest = self.add_annotations_to_entities_files(
dmge,
manifest,
manifest_record_type,
datasetId,
hideBlanks,
annotation_keys=annotation_keys,
)

# Load manifest to synapse as a CSV File
manifest_synapse_file_id = self.upload_manifest_file(
Expand Down Expand Up @@ -1878,6 +1882,7 @@ def upload_manifest_combo(
table_manipulation,
table_column_names: str,
annotation_keys: str,
file_annotations_upload: bool = True,
):
"""Upload manifest to Synapse as a table and CSV with entities.
Args:
Expand All @@ -1897,6 +1902,7 @@ def upload_manifest_combo(
annotation_keys: (str) display_label/class_label (default), Sets labeling syle for annotation keys. class_label will format the display
name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain
display label formatting while ensuring the label is formatted properly for Synapse annotations.
file_annotations_upload (bool): Default to True. If false, do not add annotations to files.
Return:
manifest_synapse_file_id (str): SynID of manifest csv uploaded to synapse.
"""
Expand All @@ -1910,15 +1916,16 @@ def upload_manifest_combo(
table_column_names=table_column_names,
)

manifest = self.add_annotations_to_entities_files(
dmge,
manifest,
manifest_record_type,
datasetId,
hideBlanks,
manifest_synapse_table_id,
annotation_keys=annotation_keys,
)
if file_annotations_upload:
manifest = self.add_annotations_to_entities_files(
dmge,
manifest,
manifest_record_type,
datasetId,
hideBlanks,
manifest_synapse_table_id,
annotation_keys=annotation_keys,
)

# Load manifest to synapse as a CSV File
manifest_synapse_file_id = self.upload_manifest_file(
Expand Down Expand Up @@ -1961,6 +1968,7 @@ def associateMetadataWithFiles(
table_manipulation: str = "replace",
table_column_names: str = "class_label",
annotation_keys: str = "class_label",
file_annotations_upload: bool = True,
) -> str:
"""Associate metadata with files in a storage dataset already on Synapse.
Upload metadataManifest in the storage dataset folder on Synapse as well. Return synapseId of the uploaded manifest file.
Expand Down Expand Up @@ -2000,7 +2008,6 @@ def associateMetadataWithFiles(
table_name, component_name = self._generate_table_name(manifest)

# Upload manifest to synapse based on user input (manifest_record_type)

if manifest_record_type == "file_only":
manifest_synapse_file_id = self.upload_manifest_as_csv(
dmge,
Expand All @@ -2012,6 +2019,7 @@ def associateMetadataWithFiles(
manifest_record_type=manifest_record_type,
component_name=component_name,
annotation_keys=annotation_keys,
file_annotations_upload=file_annotations_upload,
)
elif manifest_record_type == "table_and_file":
manifest_synapse_file_id = self.upload_manifest_as_table(
Expand All @@ -2027,6 +2035,7 @@ def associateMetadataWithFiles(
table_manipulation=table_manipulation,
table_column_names=table_column_names,
annotation_keys=annotation_keys,
file_annotations_upload=file_annotations_upload,
)
elif manifest_record_type == "file_and_entities":
manifest_synapse_file_id = self.upload_manifest_as_csv(
Expand All @@ -2039,6 +2048,7 @@ def associateMetadataWithFiles(
manifest_record_type=manifest_record_type,
component_name=component_name,
annotation_keys=annotation_keys,
file_annotations_upload=file_annotations_upload,
)
elif manifest_record_type == "table_file_and_entities":
manifest_synapse_file_id = self.upload_manifest_combo(
Expand All @@ -2054,6 +2064,7 @@ def associateMetadataWithFiles(
table_manipulation=table_manipulation,
table_column_names=table_column_names,
annotation_keys=annotation_keys,
file_annotations_upload=file_annotations_upload,
)
else:
raise ValueError("Please enter a valid manifest_record_type.")
Expand Down
7 changes: 7 additions & 0 deletions schematic_api/api/openapi/api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,13 @@ paths:
enum: ["display_label", "class_label"]
default: "class_label"
required: false
- in: query
name: file_annotations_upload
schema:
type: boolean
default: true
description: if false, do not add annotations when submitting file-based manifests.
required: false
- in: query
name: project_scope
schema:
Expand Down
3 changes: 3 additions & 0 deletions schematic_api/api/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import pandas as pd
import json
from typing import Optional

from schematic.configuration.configuration import CONFIG
from schematic.visualization.attributes_explorer import AttributesExplorer
Expand Down Expand Up @@ -392,6 +393,7 @@ def submit_manifest_route(
project_scope=None,
table_column_names=None,
annotation_keys=None,
file_annotations_upload:bool=True,
):
# call config_handler()
config_handler(asset_view=asset_view)
Expand Down Expand Up @@ -449,6 +451,7 @@ def submit_manifest_route(
project_scope=project_scope,
table_column_names=table_column_names,
annotation_keys=annotation_keys,
file_annotations_upload=file_annotations_upload
)

return manifest_id
Expand Down
Loading

0 comments on commit 77fbaf7

Please sign in to comment.