Sage-Bionetworks · afwillia · Apr 11, 2024 · Apr 11, 2024 · Apr 11, 2024 · Apr 11, 2024
@@ -21,6 +21,7 @@
     build_service_account_creds,
 )
 from schematic.utils.df_utils import update_df, load_df
+from schematic.utils.io_utils import read_pickle
 from schematic.utils.schema_utils import extract_component_validation_rules
 from schematic.utils.validate_utils import rule_in_rule_list
 from schematic.utils.schema_utils import DisplayLabelType
@@ -1642,11 +1643,15 @@ def create_manifests(
         title: Optional[str] = None,
         strict: Optional[bool] = True,
         use_annotations: Optional[bool] = False,
+        graph_data_model: Optional[nx.MultiDiGraph] = None,
+        data_model_graph_pickle: Optional[str] = None,
     ) -> Union[List[str], List[pd.DataFrame]]:
         """Create multiple manifests
 
         Args:
             path_to_data_model (str): str path to data model
+            data_model_graph_pickle (str): A data model graph as csv or pickle
+            graph_data_model (str): An networkx MultiDiGraph object
             data_types (list): a list of data types
             access_token (str, optional): synapse access token. Required when getting an existing manifest. Defaults to None.
             dataset_ids (list, optional): a list of dataset ids when generating an existing manifest. Defaults to None.
@@ -1677,16 +1682,25 @@ def create_manifests(
                     "Please check your submission and try again."
                 )
 
-        data_model_parser = DataModelParser(path_to_data_model=path_to_data_model)
+        if not graph_data_model:
+            if data_model_graph_pickle:
+                """What if pickle file does not fit in memory?"""
+                graph_data_model = read_pickle(data_model_graph_pickle)
+            else:
+                data_model_parser = DataModelParser(
+                    path_to_data_model=path_to_data_model
+                )
 
-        # Parse Model
-        parsed_data_model = data_model_parser.parse_model()
+                # Parse Model
+                parsed_data_model = data_model_parser.parse_model()
 
-        # Instantiate DataModelGraph
-        data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels)
+                # Instantiate DataModelGraph
+                data_model_grapher = DataModelGraph(
+                    parsed_data_model, data_model_labels
+                )
 
-        # Generate graph
-        graph_data_model = data_model_grapher.graph
+                # Generate graph
+                graph_data_model = data_model_grapher.graph
 
         # Gather all returned result urls
         all_results = []

@@ -17,6 +17,7 @@
 from schematic.store.synapse import SynapseStorage
 
 from schematic.utils.df_utils import load_df
+from schematic.utils.io_utils import read_pickle
 
 from schematic.models.validate_manifest import validate_all
 from opentelemetry import trace
@@ -42,12 +43,14 @@ def __init__(
         inputMModelLocation: str,
         inputMModelLocationType: str,
         data_model_labels: str,
+        data_model_graph_pickle: Optional[str] = None,
     ) -> None:
         """Instantiates a MetadataModel object.
 
         Args:
             inputMModelLocation: local path, uri, synapse entity id (e.g. gs://, syn123, /User/x/…); present location
             inputMModelLocationType: specifier to indicate where the metadata model resource can be found (e.g. 'local' if file/JSON-LD is on local machine)
+            data_model_graph_pickle: filepath to a data model graph stored as pickle file.
         """
         # extract extension of 'inputMModelLocation'
         # ensure that it is necessarily pointing to a '.jsonld' file
@@ -60,17 +63,24 @@ def __init__(
         self.inputMModelLocation = inputMModelLocation
         self.path_to_json_ld = inputMModelLocation
 
-        data_model_parser = DataModelParser(path_to_data_model=self.inputMModelLocation)
-        # Parse Model
-        parsed_data_model = data_model_parser.parse_model()
+        # Use graph, if provided. Otherwise parse data model for graph.
+        if data_model_graph_pickle:
+            self.graph_data_model = read_pickle(data_model_graph_pickle)
+            self.dmge = DataModelGraphExplorer(self.graph_data_model)
+        else:
+            data_model_parser = DataModelParser(
+                path_to_data_model=self.inputMModelLocation
+            )
+            # Parse Model
+            parsed_data_model = data_model_parser.parse_model()
 
-        # Instantiate DataModelGraph
-        data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels)
+            # Instantiate DataModelGraph
+            data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels)
 
-        # Generate graph
-        self.graph_data_model = data_model_grapher.graph
+            # Generate graph
+            self.graph_data_model = data_model_grapher.graph
 
-        self.dmge = DataModelGraphExplorer(self.graph_data_model)
+            self.dmge = DataModelGraphExplorer(self.graph_data_model)
 
         # check if the type of MModel file is "local"
         # currently, the application only supports reading from local JSON-LD files

@@ -3,6 +3,7 @@
 from typing import Any
 import json
 import urllib.request
+import pickle
 from schematic import LOADER
 
 
@@ -40,3 +41,10 @@ def load_schemaorg() -> Any:
     data_path = "data_models/schema_org.model.jsonld"
     schema_org_path = LOADER.filename(data_path)
     return load_json(schema_org_path)
+
+
+def read_pickle(file_path: str) -> Any:
+    """Read pickle file"""
+    with open(file_path, "rb") as fle:
+        data = pickle.load(fle)
+        return data
@@ -5,12 +5,13 @@
 from typing import Optional, no_type_check
 import numpy as np
 import pandas as pd
+import networkx as nx  # type: ignore
 
 from schematic.schemas.data_model_parser import DataModelParser
 from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer
 from schematic.schemas.data_model_json_schema import DataModelJSONSchema
 from schematic.utils.schema_utils import DisplayLabelType
-from schematic.utils.io_utils import load_json
+from schematic.utils.io_utils import load_json, read_pickle
 
 logger = logging.getLogger(__name__)
 
@@ -22,34 +23,39 @@ class AttributesExplorer:
     def __init__(
         self,
         path_to_jsonld: str,
-        data_model_labels: DisplayLabelType,
+        data_model_labels: DisplayLabelType = "class_label",
         data_model_grapher: Optional[DataModelGraph] = None,
         data_model_graph_explorer: Optional[DataModelGraphExplorer] = None,
         parsed_data_model: Optional[dict] = None,
+        graph_data_model: Optional[nx.MultiDiGraph] = None,
+        data_model_graph_pickle: Optional[str] = None,
     ) -> None:
         self.path_to_jsonld = path_to_jsonld
 
         self.jsonld = load_json(self.path_to_jsonld)
+        if graph_data_model is not None:
+            self.graph_data_model = graph_data_model
+        elif data_model_graph_pickle is not None:
+            self.graph_data_model = read_pickle(data_model_graph_pickle)
 
         # Parse Model
-        if not parsed_data_model:
+        if parsed_data_model is None:
             data_model_parser = DataModelParser(
                 path_to_data_model=self.path_to_jsonld,
             )
             parsed_data_model = data_model_parser.parse_model()
 
         # Instantiate DataModelGraph
-        if not data_model_grapher:
+        if data_model_grapher is None:
             data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels)
-
-        # Generate graph
-        self.graph_data_model = data_model_grapher.graph
+            # Generate graph
+            self.graph_data_model = data_model_grapher.graph
 
         # Instantiate Data Model Graph Explorer
-        if not data_model_graph_explorer:
-            self.dmge = DataModelGraphExplorer(self.graph_data_model)
-        else:
+        if data_model_graph_explorer is not None:
             self.dmge = data_model_graph_explorer
+        else:
+            self.dmge = DataModelGraphExplorer(self.graph_data_model)
 
         # Instantiate Data Model Json Schema
         self.data_model_js = DataModelJSONSchema(

@@ -18,7 +18,7 @@
 from schematic.visualization.attributes_explorer import AttributesExplorer
 from schematic.schemas.data_model_parser import DataModelParser
 from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer
-from schematic.utils.io_utils import load_json
+from schematic.utils.io_utils import load_json, read_pickle
 from schematic.utils.schema_utils import DisplayLabelType
 
 
@@ -43,14 +43,15 @@ class Node(TypedDict):
     children: list[str]
 
 
-class TangledTree:  # pylint: disable=too-many-instance-attributes
+class TangledTree:  # pylint: disable=too-many-instance-attributes disable=too-many-arguments
     """Tangled tree class"""
 
     def __init__(
         self,
         path_to_json_ld: str,
         figure_type: FigureType,
-        data_model_labels: DisplayLabelType,
+        data_model_labels: DisplayLabelType = "class_label",
+        data_model_graph_pickle: Optional[str] = None,
     ) -> None:
         # Load jsonld
         self.path_to_json_ld = path_to_json_ld
@@ -59,19 +60,26 @@ def __init__(
         # Parse schema name
         self.schema_name = path.basename(self.path_to_json_ld).split(".model.jsonld")[0]
 
+        parsed_data_model = None
+
         # Instantiate Data Model Parser
-        data_model_parser = DataModelParser(
-            path_to_data_model=self.path_to_json_ld,
-        )
+        if data_model_graph_pickle is None:
+            data_model_parser = DataModelParser(
+                path_to_data_model=self.path_to_json_ld,
+            )
 
-        # Parse Model
-        parsed_data_model = data_model_parser.parse_model()
+            # Parse Model
+            parsed_data_model = data_model_parser.parse_model()
 
-        # Instantiate DataModelGraph
-        data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels)
+            # Instantiate DataModelGraph
+            data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels)
 
-        # Generate graph
-        self.graph_data_model = data_model_grapher.graph
+            # Generate graph
+            self.graph_data_model = data_model_grapher.graph
+
+        else:
+            self.graph_data_model = read_pickle(data_model_graph_pickle)
+            data_model_grapher = self.graph_data_model
 
         # Instantiate Data Model Graph Explorer
         self.dmge = DataModelGraphExplorer(self.graph_data_model)
@@ -91,6 +99,8 @@ def __init__(
             data_model_grapher=data_model_grapher,
             data_model_graph_explorer=self.dmge,
             parsed_data_model=parsed_data_model,
+            graph_data_model=self.graph_data_model,
+            data_model_graph_pickle=data_model_graph_pickle,
         )
 
         # Create output paths.

@@ -20,6 +20,20 @@ def metadata_model(helpers, data_model_labels):
         inputMModelLocation=helpers.get_data_path("example.model.jsonld"),
         data_model_labels=data_model_labels,
         inputMModelLocationType="local",
+        data_model_graph_pickle=helpers.get_data_path("example.model.pickle"),
+    )
+
+    return metadata_model
+
+
+def metadata_model_display(helpers, data_model_labels):
+    metadata_model = MetadataModel(
+        inputMModelLocation=helpers.get_data_path("example.model.jsonld"),
+        data_model_labels=data_model_labels,
+        inputMModelLocationType="local",
+        data_model_graph_pickle=helpers.get_data_path(
+            "example.display.label.model.pickle"
+        ),
     )
 
     return metadata_model
@@ -34,7 +48,10 @@ class TestMetadataModel:
     )
     def test_get_component_requirements(self, helpers, as_graph, data_model_labels):
         # Instantiate MetadataModel
-        meta_data_model = metadata_model(helpers, data_model_labels)
+        if data_model_labels == "class_label":
+            meta_data_model = metadata_model(helpers, data_model_labels)
+        else:
+            meta_data_model = metadata_model_display(helpers, data_model_labels)
 
         if data_model_labels == "display_label":
             source_component = "BulkRNAseqAssay"

@@ -13,30 +13,63 @@
 logger = logging.getLogger(__name__)
 
 
-@pytest.fixture
-def attributes_explorer(helpers):
+@pytest.fixture(
+    params=[
+        ("example.model.jsonld", "example.model.pickle"),
+        ("example.model.jsonld", ""),
+        pytest.param(("", ""), marks=pytest.mark.xfail),
+        pytest.param(("", "example.model.pickle"), marks=pytest.mark.xfail),
+    ]
+)
+def attributes_explorer(request, helpers):
     # Get JSONLD file path
-    path_to_jsonld = helpers.get_data_path("example.model.jsonld")
+    param1, param2 = request.param
+    path_to_jsonld = helpers.get_data_path(param1)
+    path_to_graph = helpers.get_data_path(param2)
 
     # Initialize TangledTree
-    attributes_explorer = AttributesExplorer(
-        path_to_jsonld,
-        data_model_labels="class_label",
-    )
+    if param2 != "":
+        attributes_explorer = AttributesExplorer(
+            path_to_jsonld,
+            data_model_graph_pickle=path_to_graph,
+            data_model_labels="class_label",
+        )
+    else:
+        attributes_explorer = AttributesExplorer(
+            path_to_jsonld,
+            data_model_labels="class_label",
+        )
     yield attributes_explorer
 
 
-@pytest.fixture
-def tangled_tree(helpers):
+@pytest.fixture(
+    params=[
+        ("example.model.jsonld", "example.model.pickle"),
+        ("example.model.jsonld", ""),
+        pytest.param(("", ""), marks=pytest.mark.xfail),
+        pytest.param(("", "example.model.pickle"), marks=pytest.mark.xfail),
+    ]
+)
+def tangled_tree(helpers, request):
     figure_type = "component"
 
     # Get JSONLD file path
-    path_to_jsonld = helpers.get_data_path("example.model.jsonld")
+    param1, param2 = request.param
+    path_to_jsonld = helpers.get_data_path(param1)
+    path_to_graph = helpers.get_data_path(param2)
 
     # Initialize TangledTree
-    tangled_tree = TangledTree(
-        path_to_jsonld, figure_type, data_model_labels="class_label"
-    )
+    if param2 == "":
+        tangled_tree = TangledTree(
+            path_to_jsonld, figure_type, data_model_labels="class_label"
+        )
+    else:
+        tangled_tree = TangledTree(
+            path_to_jsonld,
+            figure_type,
+            data_model_labels="class_label",
+            data_model_graph_pickle=path_to_graph,
+        )
     yield tangled_tree