Skip to content

Commit

Permalink
Merge pull request #1430 from Sage-Bionetworks/develop-tracing
Browse files Browse the repository at this point in the history
feat: added tracing to schematic code base for manifest generate and manifest submission
  • Loading branch information
linglp authored Jun 20, 2024
2 parents 925e174 + 9229884 commit 64aba90
Show file tree
Hide file tree
Showing 9 changed files with 3,083 additions and 2,866 deletions.
84 changes: 83 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,11 @@ Flask = {version = "2.1.3", optional = true}
Flask-Cors = {version = "^3.0.10", optional = true}
uWSGI = {version = "^2.0.21", optional = true}
Jinja2 = {version = ">2.11.3", optional = true}
jaeger-client = {version = "^4.8.0", optional = true}
flask-opentracing = {version="^2.0.0", optional = true}

[tool.poetry.extras]
api = ["connexion", "Flask", "Flask-Cors", "Jinja2", "pyopenssl"]
api = ["connexion", "Flask", "Flask-Cors", "Jinja2", "pyopenssl", "jaeger-client", "flask-opentracing"]
aws = ["uWSGI"]


Expand Down
13 changes: 13 additions & 0 deletions schematic/manifest/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@
from schematic.configuration.configuration import CONFIG
from schematic.utils.google_api_utils import export_manifest_drive_service

from opentelemetry import trace

logger = logging.getLogger(__name__)
tracer = trace.get_tracer("Schematic")


class ManifestGenerator(object):
Expand Down Expand Up @@ -1289,6 +1291,7 @@ def _gather_all_fields(self, fields, json_schema):
)
return required_metadata_fields

@tracer.start_as_current_span("ManifestGenerator::get_empty_manifest")
def get_empty_manifest(
self,
strict: Optional[bool],
Expand Down Expand Up @@ -1334,6 +1337,7 @@ def _get_missing_columns(self, headers_1: list, headers_2: list) -> list:
"""
return set(headers_1) - set(headers_2)

@tracer.start_as_current_span("ManifestGenerator::set_dataframe_by_url")
def set_dataframe_by_url(
self,
manifest_url: str,
Expand Down Expand Up @@ -1425,6 +1429,7 @@ def map_annotation_names_to_display_names(
# Use the above dictionary to rename columns in question
return annotations.rename(columns=label_map)

@tracer.start_as_current_span("ManifestGenerator::get_manifest_with_annotations")
def get_manifest_with_annotations(
self, annotations: pd.DataFrame, strict: Optional[bool] = None
) -> Tuple[ps.Spreadsheet, pd.DataFrame]:
Expand Down Expand Up @@ -1465,6 +1470,7 @@ def get_manifest_with_annotations(

return manifest_url, manifest_df

@tracer.start_as_current_span("ManifestGenerator::export_sheet_to_excel")
def export_sheet_to_excel(
self, title: str = None, manifest_url: str = None, output_location: str = None
) -> str:
Expand Down Expand Up @@ -1514,6 +1520,7 @@ def export_sheet_to_excel(

return output_excel_file_path

@tracer.start_as_current_span("ManifestGenerator::_handle_output_format_logic")
def _handle_output_format_logic(
self,
output_format: str = None,
Expand Down Expand Up @@ -1570,6 +1577,7 @@ def _handle_output_format_logic(
return dataframe

@staticmethod
@tracer.start_as_current_span("ManifestGenerator::create_single_manifest")
def create_single_manifest(
path_to_data_model: str,
graph_data_model: nx.MultiDiGraph,
Expand Down Expand Up @@ -1623,6 +1631,7 @@ def create_single_manifest(
return result

@staticmethod
@tracer.start_as_current_span("ManifestGenerator::create_manifests")
def create_manifests(
path_to_data_model: str,
data_types: list,
Expand Down Expand Up @@ -1751,6 +1760,7 @@ def create_manifests(

return all_results

@tracer.start_as_current_span("ManifestGenerator::get_manifest")
def get_manifest(
self,
dataset_id: str = None,
Expand Down Expand Up @@ -1998,6 +2008,9 @@ def _format_new_excel_column(self, worksheet, new_column_index: int, col: str):
)
return worksheet

@tracer.start_as_current_span(
"ManifestGenerator::populate_existing_excel_spreadsheet"
)
def populate_existing_excel_spreadsheet(
self, existing_excel_path: str = None, additional_df: pd.DataFrame = None
):
Expand Down
4 changes: 4 additions & 0 deletions schematic/models/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@
from schematic.utils.df_utils import load_df

from schematic.models.validate_manifest import validate_all
from opentelemetry import trace

logger = logging.getLogger(__name__)

tracer = trace.get_tracer("Schematic")


class MetadataModel(object):
"""Metadata model wrapper around schema.org specification graph.
Expand Down Expand Up @@ -317,6 +320,7 @@ def populateModelManifest(
manifestPath, emptyManifestURL, return_excel=return_excel, title=title
)

@tracer.start_as_current_span("MetadataModel::submit_metadata_manifest")
def submit_metadata_manifest( # pylint: disable=too-many-arguments, too-many-locals
self,
manifest_path: str,
Expand Down
5 changes: 5 additions & 0 deletions schematic/schemas/data_model_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import networkx as nx # type: ignore
import graphviz # type: ignore
from opentelemetry import trace

from schematic.schemas.data_model_edges import DataModelEdges
from schematic.schemas.data_model_nodes import DataModelNodes
Expand All @@ -20,8 +21,11 @@
from schematic.utils.viz_utils import visualize
from schematic.utils.validate_utils import rule_in_rule_list

logger = logging.getLogger(__name__)


logger = logging.getLogger(__name__)
tracer = trace.get_tracer("Schematic")


class DataModelGraphMeta: # pylint: disable=too-few-public-methods
Expand Down Expand Up @@ -85,6 +89,7 @@ def __init__(
)
self.graph = self.generate_data_model_graph()

@tracer.start_as_current_span("DataModelGraph::generate_data_model_graph")
def generate_data_model_graph(self) -> nx.MultiDiGraph:
"""
Generate NetworkX Graph from the Relationships/attributes dictionary, the graph is built
Expand Down
8 changes: 7 additions & 1 deletion schematic/schemas/data_model_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import logging
import pandas as pd
from opentelemetry import trace

from schematic.utils.df_utils import load_df
from schematic.utils.io_utils import load_json
Expand All @@ -14,7 +15,9 @@

from schematic import LOADER

logger = logging.getLogger("Synapse storage")
logger = logging.getLogger("Schemas")

tracer = trace.get_tracer("Schematic")


class DataModelParser:
Expand Down Expand Up @@ -84,6 +87,7 @@ def parse_base_model(self) -> dict:
base_model = jsonld_parser.parse_jsonld_model(base_model_path)
return base_model

@tracer.start_as_current_span("DataModelParser::parse_model")
def parse_model(self) -> dict[str, dict[str, Any]]:
"""Given a data model type, instantiate and call the appropriate data model parser.
Returns:
Expand Down Expand Up @@ -230,6 +234,7 @@ def gather_csv_attributes_relationships(
)
return attr_rel_dictionary

@tracer.start_as_current_span("Schemas::DataModelCSVParser::parse_csv_model")
def parse_csv_model(
self,
path_to_data_model: str,
Expand Down Expand Up @@ -529,6 +534,7 @@ def gather_jsonld_attributes_relationships(self, model_jsonld: list[dict]) -> di
)
return attr_rel_dictionary

@tracer.start_as_current_span("Schemas::DataModelJSONLDParser::parse_jsonld_model")
def parse_jsonld_model(
self,
path_to_data_model: str,
Expand Down
Loading

0 comments on commit 64aba90

Please sign in to comment.