From 963181f0f3b99a1edd521e090315db950f3ce40d Mon Sep 17 00:00:00 2001 From: Binh Vu Date: Sat, 13 Apr 2024 16:02:03 -0700 Subject: [PATCH] Importing a dataset, if the readable labels are not available for nodes/edges in the semantic descriptions, users can generate a default one via `add-missing-readable-label` flag. --- CHANGELOG.md | 1 + sand/commands/load.py | 65 +++++++++++++++++++++++++++++++++++++++++-- sand/container.py | 1 + 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index afb92a0..a3772bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - SAND Entity Editor UI, when users link new cell, it will automatically do an initial search using the cell as the query to save users time from re-entering the same information. - SAND Entity Editor UI, we can apply search results to multiple cells at once. - Users can export the linked entities +- Importing a dataset, if the readable labels are not available for nodes/edges in the semantic descriptions, users can generate a default one via `add-missing-readable-label` flag. ## [4.1.0] - 2024-04-13 diff --git a/sand/commands/load.py b/sand/commands/load.py index f0fdd6a..639ba88 100644 --- a/sand/commands/load.py +++ b/sand/commands/load.py @@ -1,14 +1,17 @@ from __future__ import annotations from pathlib import Path -from typing import List, Tuple +from typing import List, Optional, Tuple import click +from dependency_injector.wiring import Provide, inject from sm.dataset import Dataset, Example, FullTable from sm.misc.funcs import import_func -from sm.outputs.semantic_model import DataNode +from sm.outputs.semantic_model import ClassNode, DataNode from tqdm.auto import tqdm +from sand.container import use_container +from sand.helpers.dependency_injection import use_auto_inject from sand.models import ( ContextPage, Link, @@ -20,10 +23,16 @@ ) from sand.models import db as dbconn from sand.models import init_db +from sand.models.ontology import OntClassAR, OntPropertyAR @click.command(name="load") @click.option("-d", "--db", required=True, help="smc database file") +@click.option( + "-c", + "--config", + help="Path to the configuration file", +) @click.option("-p", "--project", default="default", help="Project name") @click.option( "--dataset", @@ -37,7 +46,19 @@ type=int, help="Number of tables to load (negative number or zero to load all)", ) -def load_dataset(db: str, project: str, dataset: str, n_tables: int): +@click.option( + "--add-missing-readable-label", + is_flag=True, + help="Attempt to add readable label for nodes and edges that don't have them", +) +def load_dataset( + db: str, + config: Optional[str], + project: str, + dataset: str, + n_tables: int, + add_missing_readable_label: bool, +): """Load a dataset into a project""" init_db(db) @@ -50,6 +71,44 @@ def load_dataset(db: str, project: str, dataset: str, n_tables: int): if n_tables > 0: examples = examples[:n_tables] + with use_container(config) as container: + with use_auto_inject(container): + import_examples( + project, + examples, + add_missing_readable_label, + ) + + +@inject +def import_examples( + project: str, + examples: list[Example[FullTable]], + add_missing_readable_label: bool, + ontclass_ar: OntClassAR = Provide["classes"], + ontprop_ar: OntPropertyAR = Provide["properties"], +): + if add_missing_readable_label: + for ex in tqdm(examples, "add missing readable labels"): + for sm in ex.sms: + for n in sm.iter_nodes(): + if isinstance(n, ClassNode): + if n.readable_label is None: + n.readable_label = ( + tmp.readable_label + if (tmp := ontclass_ar.get_by_uri(n.abs_uri)) + is not None + else None + ) + for e in sm.iter_edges(): + if e.readable_label is None: + if e.readable_label is None: + e.readable_label = ( + tmp.readable_label + if (tmp := ontprop_ar.get_by_uri(e.abs_uri)) is not None + else None + ) + with dbconn: p = Project.get(name=project) for e in tqdm(examples, desc="Loading examples"): diff --git a/sand/container.py b/sand/container.py index bd9d8cf..ca0a585 100644 --- a/sand/container.py +++ b/sand/container.py @@ -67,6 +67,7 @@ def use_container(config_file: Optional[Path | str] = None): "sand.serializer", "sand.helpers", "sand.app", + "sand.commands.load", ], modules=["sand.extensions.export.drepr.main"], )