diff --git a/deepsearch/artifacts/artifact_manager.py b/deepsearch/artifacts/artifact_manager.py index 5b2e90d0..da00267b 100644 --- a/deepsearch/artifacts/artifact_manager.py +++ b/deepsearch/artifacts/artifact_manager.py @@ -1,3 +1,7 @@ +import logging + +logger = logging.getLogger("root.artifacts") + import json import os import shutil @@ -40,6 +44,7 @@ def get_index_path(self) -> Path: def get_artifact_path_in_cache(self, artifact_name: str) -> Path: artifact_path = self._cache_path / artifact_name if not artifact_path.exists(): + logger.error(f'Artifact "{artifact_name}" not in cache') raise FileNotFoundError(f'Artifact "{artifact_name}" not in cache') return artifact_path @@ -52,13 +57,18 @@ def download_artifact_to_cache( ) -> None: artifact_path = self._cache_path / artifact_name if artifact_path.exists(): + logger.info(f"Artifact already in cache using {hit_strategy=}") if hit_strategy == self.HitStrategy.RAISE: + logger.error(f'Artifact "{artifact_name}" already in cache') raise ValueError(f'Artifact "{artifact_name}" already in cache') elif hit_strategy == self.HitStrategy.PASS: + logger.info(f"Skipped artifact") return elif hit_strategy == self.HitStrategy.OVERWRITE: + logger.info(f"Overwriting artifact") shutil.rmtree(artifact_path) else: + logger.error(f'Unexcpected value "{hit_strategy=}"') raise RuntimeError(f'Unexcpected value "{hit_strategy=}"') artifact_path.mkdir(exist_ok=False) @@ -70,6 +80,7 @@ def download_artifact_to_cache( download_url = artifact_meta[ARTF_META_URL_FIELD] with tempfile.TemporaryDirectory() as temp_dir: + logger.info("Downloading artifact to temporary directory") download_path = self._download_file( artifact_name=artifact_name, download_url=download_url, @@ -108,6 +119,7 @@ def _download_file( with_progress_bar: bool, ) -> Path: response = requests.get(download_url, stream=True) + logger.info(f"{response.status_code} response from {download_url}") response.raise_for_status() dl_filename = None @@ -123,10 +135,13 @@ def _download_file( dl_filename = "=".join(split_param[1:]).strip().strip("'\"") break - # otherwise, use name from URL: - if dl_filename is None: + if dl_filename: + logger.info(f"Resolved filename from response header {dl_filename}") + else: + # otherwise, use name from URL: parsed_url = urlparse(download_url) dl_filename = Path(parsed_url.path).name + logger.info(f"Resolved filename from url {dl_filename}") total_size = int(response.headers.get("content-length", 0)) block_size = 1024 # 1 KB @@ -168,13 +183,16 @@ def _finalize_download( attempt_unpack = True if attempt_unpack: + logger.info("Unpacking archive and moving to destination") shutil.unpack_archive(dl_path_str, target_path) else: + logger.info("Moving archive to destination") shutil.move(dl_path_str, target_path / "") def _get_artifact_meta(self, artifact_name: str) -> Dict: file_path = self._index_path / artifact_name / ARTF_META_FILENAME if not file_path.exists(): + logger.error(f'File "{file_path}" does not exist') raise FileNotFoundError(f'File "{file_path}" does not exist') with open(file_path, "r") as file: meta_info = json.load(file) diff --git a/deepsearch/artifacts/cli/main.py b/deepsearch/artifacts/cli/main.py index bceab192..c7654ba9 100644 --- a/deepsearch/artifacts/cli/main.py +++ b/deepsearch/artifacts/cli/main.py @@ -1,3 +1,6 @@ +import logging + +logger = logging.getLogger("root.artifacts") import typer from deepsearch.artifacts.artifact_manager import ( @@ -36,6 +39,7 @@ def list_index( index: str = INDEX_OPTION, ): + logger.info("Listing artifacts in index") artf_mgr = ArtifactManager(index=index) artifacts = artf_mgr.get_artifacts_in_index() for artf in artifacts: @@ -47,6 +51,7 @@ def list_index( def list_cache( cache: str = CACHE_OPTION, ): + logger.info("Listing artifacts in cache") artf_mgr = ArtifactManager(cache=cache) artifacts = artf_mgr.get_artifacts_in_cache() for artf in artifacts: @@ -56,6 +61,7 @@ def list_cache( @app.command(help="Show cache path") @cli_handler() def locate_default_cache(): + logger.info("Resolving cache path") artf_mgr = ArtifactManager() path_str = str(artf_mgr.get_cache_path().resolve()) typer.echo(path_str) @@ -83,6 +89,7 @@ def download( unpack: bool = typer.Option(True), progress_bar: bool = typer.Option(True), ): + logger.info(f"Attempting to download {artifact_name=} from {index=}") artf_mgr = ArtifactManager(index=index, cache=cache) artf_mgr.download_artifact_to_cache( artifact_name=artifact_name, @@ -101,6 +108,7 @@ def download_all( unpack: bool = typer.Option(True), progress_bar: bool = typer.Option(True), ): + logger.info(f"Attempting to download all artifacts from {index=}") artf_mgr = ArtifactManager(index=index, cache=cache) for artf_name in artf_mgr.get_artifacts_in_index(): artf_mgr.download_artifact_to_cache( diff --git a/deepsearch/cli.py b/deepsearch/cli.py index 00ed5fb5..4e9243aa 100644 --- a/deepsearch/cli.py +++ b/deepsearch/cli.py @@ -1,3 +1,38 @@ +import logging +from pathlib import Path +from typing import List + +from deepsearch.core.client.settings_manager import settings_mgr + + +def setup_logger(): + # Setting up root logger + log_target_file, log_to_console = settings_mgr.get_logging_conf() + + p = Path(log_target_file) + if not p.parent.is_dir(): + p.parent.mkdir(parents=True) + + p = p.joinpath("deepsearch.log") + + handlers: List[logging.Handler] = [ + logging.FileHandler(p), + ] + if log_to_console: + handlers.append(logging.StreamHandler()) + formatter = logging.Formatter( + "%(asctime)s %(name)s — %(levelname)s — %(module)s:%(funcName)s:%(lineno)d — %(message)s" + ) + logger = logging.getLogger("root") + [h.setFormatter(formatter) for h in handlers] # type: ignore + [logger.addHandler(h) for h in handlers] # type: ignore + [e.setLevel(logging.DEBUG) for e in (logger, *handlers)] # type: ignore + + return logger + + +logger = setup_logger() + from deepsearch.artifacts.cli.main import app as artifacts_app from deepsearch.core.cli.main import app from deepsearch.core.cli.plugins import get_cli_groups @@ -16,7 +51,7 @@ for group in get_cli_groups(): app.add_typer(group) - +logger.info("Root module finished initialization") if __name__ == "__main__": app() diff --git a/deepsearch/core/cli/main.py b/deepsearch/core/cli/main.py index 96129e21..0c35fe12 100644 --- a/deepsearch/core/cli/main.py +++ b/deepsearch/core/cli/main.py @@ -1,3 +1,8 @@ +import logging + +logger = logging.getLogger("root.core") + + import typer import deepsearch as ds @@ -13,10 +18,12 @@ ) app.add_typer(profile_app, name="profile", help="Manage profile configuration") app.add_typer(login_app, name="login", help=MSG_LOGIN_DEPRECATION) +logger.info("Core module finished initialization") @app.command(name="version", help=f"Print the client and server version") def get_version(): + logger.info("Getting DeepSearch version") versions = ds.version() typer.echo(f"Client: {versions.client}") typer.echo(f"Server: {versions.server}") diff --git a/deepsearch/core/cli/plugins.py b/deepsearch/core/cli/plugins.py index 8b82554d..82a9396a 100644 --- a/deepsearch/core/cli/plugins.py +++ b/deepsearch/core/cli/plugins.py @@ -1,11 +1,16 @@ +import logging from typing import List +logger = logging.getLogger("root.core.plugins") + import pluggy import typer deepsearch_cli_hookspec = pluggy.HookspecMarker("deepsearch_cli") deepsearch_cli_hookimpl = pluggy.HookimplMarker("deepsearch_cli") +logger.info("Plugins module initialized") + class DeepsearchCliPlugin: @deepsearch_cli_hookspec @@ -15,10 +20,12 @@ def deepsearch_cli_add_group(self) -> typer.Typer: :return: A typer.Typer instance with a name set. """ + logger.error("Feature not implemented") raise NotImplementedError def get_cli_plugin_manager(): + logger.info("getting cli plugin manager") manager = pluggy.PluginManager("deepsearch_cli") manager.add_hookspecs(DeepsearchCliPlugin) @@ -34,6 +41,7 @@ def get_cli_groups() -> List[typer.Typer]: for app in apps: if not app.info.name: + logger.error(f"All registered apps must have names, but {app} doesn't") raise ValueError(f"All registered apps must have names, but {app} doesn't") return apps diff --git a/deepsearch/core/client/settings.py b/deepsearch/core/client/settings.py index ee09283f..249d1aa1 100644 --- a/deepsearch/core/client/settings.py +++ b/deepsearch/core/client/settings.py @@ -4,6 +4,7 @@ from pathlib import Path from typing import Dict, Optional, Union +import platformdirs from pydantic import BaseSettings, SecretStr @@ -56,6 +57,8 @@ class MainSettings(DumpableSettings): profile: Optional[str] = None # None only when profiles not yet iniitialized show_cli_stack_traces: bool = False + log_file: str = platformdirs.user_log_dir("DeepSearch", "IBM") + log_to_console: bool = False class Config: env_prefix = "DEEPSEARCH_" diff --git a/deepsearch/core/client/settings_manager.py b/deepsearch/core/client/settings_manager.py index 0cce3a07..bb6fcb89 100644 --- a/deepsearch/core/client/settings_manager.py +++ b/deepsearch/core/client/settings_manager.py @@ -97,6 +97,9 @@ def _migrate_legacy_config(self) -> None: def _get_profile_path(self, profile_name: str) -> Path: return self._profile_root_path / f"{profile_name}.env" + def get_logging_conf(self) -> tuple[str, bool]: + return self._main_settings.log_file, self._main_settings.log_to_console + def get_all_profile_settings(self) -> Dict[str, ProfileSettings]: return {k: self._profile_cache[k].settings for k in self._profile_cache} diff --git a/deepsearch/cps/cli/data_indices_typer.py b/deepsearch/cps/cli/data_indices_typer.py index ade0f569..4a9d3b7e 100644 --- a/deepsearch/cps/cli/data_indices_typer.py +++ b/deepsearch/cps/cli/data_indices_typer.py @@ -1,3 +1,7 @@ +import logging + +logger = logging.getLogger("root.cps.data_indices") + import json from enum import Enum from pathlib import Path @@ -41,6 +45,7 @@ def list( output: OutputEnum = OutputOption, ): api = CpsApi.from_env() + logger.info(f"Listing data indices in project {proj_key}") try: indices = api.data_indices.list(proj_key=proj_key) @@ -55,6 +60,7 @@ def list( for index in indices ] except ValueError as e: + logger.error(e) print(f"Error occurred: {e}") cli_output(results, output, headers="keys") @@ -75,6 +81,7 @@ def create( ), ): api = CpsApi.from_env() + logger.info(f"Create data index in project {proj_key}, {name=}, {desc=}, {type=}") try: api.data_indices.create( @@ -85,6 +92,7 @@ def create( ) typer.echo("Data Index Created.") except ValueError as e: + logger.error(e) typer.echo(f"Error occurred: {e}") typer.echo(ERROR_MSG) raise typer.Abort() @@ -98,19 +106,23 @@ def delete_data_index( index_key: str = INDEX_KEY, ): api = CpsApi.from_env() + logger.info(f"Deleting data index from project {proj_key}, {index_key=}") delete = typer.confirm("Are you sure you want to delete this data index?") coords = ElasticProjectDataCollectionSource(proj_key=proj_key, index_key=index_key) if not delete: typer.echo("Cancelling delete operation.") + logger.info("Cancelling delete operation.") raise typer.Abort() elif delete: # get confirmation token try: api.data_indices.delete(coords) typer.echo("Deleted!") + logger.info("Index deleted") except ApiException as e: + logger.error(e) typer.echo(f"Error occurred: {e}") typer.echo(ERROR_MSG) raise typer.Abort() @@ -121,7 +133,7 @@ def get_urls(path: Path) -> List[str]: """ Returns list of url from input file. """ - + logger.info(f"Getting url list from {path}") lines = path.read_text() urls = [line.strip() for line in lines.split("\n") if line.strip() != ""] return urls @@ -142,6 +154,9 @@ def upload_files( api = CpsApi.from_env() + logger.info( + f"Uploading files/urls to {proj_key=} in {index_key=}. {url=} {local_file=}" + ) urls = None if url is not None: p = Path(url) @@ -164,6 +179,9 @@ def upload_files( local_file=local_file, s3_coordinates=cos_coordinates, ) + # TODO this looks bugged ? urls is never used only the unprocessed url + logger.info(f"Uploading to {coords=}") + return @app.command( @@ -181,6 +199,9 @@ def add_attachment( Add attachment to a index item """ api = CpsApi.from_env() + logger.info( + f"Adding attachment to index item {proj_key=} {index_key=}, {index_item_id=}, {attachment_key=} {attachment_path=}" + ) # get indices of the project indices = api.data_indices.list(proj_key) @@ -196,13 +217,16 @@ def add_attachment( attachment_path=attachment_path, attachment_key=attachment_key, ) + logger.info(f"Attachment added successfully.") typer.echo("Attachment added successfully.") except ValueError as e: + logger.error(e) typer.echo(f"Error occurred: {e}") typer.echo(ERROR_MSG) raise typer.Abort() return else: + logger.info("Index key not found") typer.echo("Index key not found") raise typer.Abort() diff --git a/deepsearch/cps/cli/elastic_data.py b/deepsearch/cps/cli/elastic_data.py index c841c2a7..c00a6c34 100644 --- a/deepsearch/cps/cli/elastic_data.py +++ b/deepsearch/cps/cli/elastic_data.py @@ -1,3 +1,7 @@ +import logging + +logger = logging.getLogger("root.cps.elastic_data") + import typer from deepsearch.core.cli.utils import cli_handler @@ -15,6 +19,7 @@ def list( output: OutputEnum = OutputOption, ): api = CpsApi.from_env() + logger.info(f"Listing elastic data collections {domain=}") collections = api.elastic.list(domain=domain) results = [ diff --git a/deepsearch/cps/cli/kgs.py b/deepsearch/cps/cli/kgs.py index 5d22cf4b..f7ce8cc9 100644 --- a/deepsearch/cps/cli/kgs.py +++ b/deepsearch/cps/cli/kgs.py @@ -1,3 +1,7 @@ +import logging + +logger = logging.getLogger("root.cps.kgs") + from typing import Dict, List, Optional import typer @@ -24,6 +28,7 @@ def list_knowledge_graphs( output: OutputEnum = OutputOption, ): api = CpsApi.from_env() + logger.info(f"Listing KGs in {proj_key}") kgs = api.knowledge_graphs.list(project=proj_key) results = [{"key": kg.key, "name": kg.name} for kg in kgs] # TODO: augment with topology details @@ -37,6 +42,7 @@ def list_flavours( output: OutputEnum = OutputOption, ): api = CpsApi.from_env() + logger.info(f"Listing KG flavours for project {proj_key}") flavours = api.knowledge_graphs.list_flavours(proj_key) @@ -62,8 +68,12 @@ def save_snapshot_of_data_flow( ), snapshot_name: Optional[str] = typer.Option(None, "--snapshot-name"), ): + logger.info( + f"Saving snapshot from assembled data-set {proj_key=} {kg_key=} {snapshot_name=}" + ) if len(flavour_names) == 0: + logger.error("At least one flavour is required --flavour-name") raise typer.BadParameter( "At least one flavour is required", param_hint="--flavour-name" ) @@ -78,11 +88,15 @@ def save_snapshot_of_data_flow( flavour = next((f for f in all_flavours if f.name == name), None) if flavour is None: + logger.error(f"Unknown flavour {name!r} --flavour-name") raise typer.BadParameter( f"Unknown flavour {name!r}", param_hint="--flavour-name" ) if flavour.backend in flavours: + logger.error( + f"A flavour for backend {flavour.backend!r} has already been set: {flavours[flavour.backend]!r} --flavour-name" + ) raise typer.BadParameter( f"A flavour for backend {flavour.backend!r} has already been set: {flavours[flavour.backend]!r}", param_hint="--flavour-name", @@ -93,6 +107,7 @@ def save_snapshot_of_data_flow( kg = api.knowledge_graphs.get(proj_key, kg_key) if kg is None: + logger.error(f"Unknown Knowledge Graph {kg_key!r} in project {proj_key!r} -k") raise typer.BadParameter( f"Unknown Knowledge Graph {kg_key!r} in project {proj_key!r}", param_hint="-k", @@ -121,8 +136,10 @@ def download_knowledge_graph( output: OutputEnum = OutputOption, ): api = CpsApi.from_env() + logger.info("Downloading KG") kg = api.knowledge_graphs.get(project=proj_key, key=kg_key) if kg is None: + logger.error(f"Unknown Knowledge Graph {kg_key!r} in project {proj_key!r}") raise typer.BadParameter( f"Unknown Knowledge Graph {kg_key!r} in project {proj_key!r}" ) diff --git a/deepsearch/cps/cli/main.py b/deepsearch/cps/cli/main.py index a7b4b0f3..d557f9bc 100644 --- a/deepsearch/cps/cli/main.py +++ b/deepsearch/cps/cli/main.py @@ -1,3 +1,7 @@ +import logging + +logger = logging.getLogger("root.cps") + import typer from .data_indices_typer import app as data_indices_app @@ -10,6 +14,7 @@ app.add_typer(elastic_app, name="elastic-data", help="Manage Elastic data collections") app.add_typer(kgs_app, name="kgs", help="Manage CPS KGs") app.add_typer(data_indices_app, name="data-indices", help="Manage CPS data indices") +logger.info("CPS module initialized") if __name__ == "__main__": diff --git a/deepsearch/cps/cli/projects.py b/deepsearch/cps/cli/projects.py index 7b0e6eb7..fdd81904 100644 --- a/deepsearch/cps/cli/projects.py +++ b/deepsearch/cps/cli/projects.py @@ -1,3 +1,7 @@ +import logging + +logger = logging.getLogger("root.cps.projects") + import typer from deepsearch.core.cli.utils import cli_handler @@ -14,6 +18,7 @@ def list( output: OutputEnum = OutputOption, ): api = CpsApi.from_env() + logger.info("Listing projects") projects = api.projects.list() results = [{"key": proj.key, "name": proj.name} for proj in projects] @@ -27,6 +32,7 @@ def create( output: OutputEnum = OutputOption, ): api = CpsApi.from_env() + logger.info(f"Creating project {proj_name}") proj = api.projects.create(name=proj_name) results = [{"key": proj.key, "name": proj.name}] @@ -41,6 +47,7 @@ def assign_user( role: RoleEnum = typer.Argument(RoleEnum.viewer), ): api = CpsApi.from_env() + logger.info(f"Assigning {username} to {proj_key=}") project = api.projects.get(key=proj_key) if project is not None: api.projects.assign_user( @@ -49,6 +56,7 @@ def assign_user( role=role, ) else: + logger.error("Project not found") print("Project not found") raise typer.Exit(code=1) @@ -59,10 +67,12 @@ def remove( proj_key: str, ): api = CpsApi.from_env() + logger.info(f"Removing project {proj_key}") project = api.projects.get(key=proj_key) if project is not None: api.projects.remove(project=project) else: + logger.error("Project not found") print("Project not found") raise typer.Exit(code=1) diff --git a/deepsearch/model/examples/dummy_nlp_annotator/model.py b/deepsearch/model/examples/dummy_nlp_annotator/model.py index abc3ff89..df5b26b6 100644 --- a/deepsearch/model/examples/dummy_nlp_annotator/model.py +++ b/deepsearch/model/examples/dummy_nlp_annotator/model.py @@ -1,3 +1,8 @@ +# Models can include toolkit provided loggers by inherithing from root.core.model +import logging + +logger = logging.getLogger("root.model.DummyNLP") + from typing import List, Optional from fastapi import HTTPException, status @@ -17,6 +22,7 @@ class DummyNLPAnnotator(BaseNLPModel): def __init__(self) -> None: + logger.info("Initialized DummyNLPAnnotator") super().__init__() self._config = NLPConfig( diff --git a/deepsearch/model/kinds/nlp/controller.py b/deepsearch/model/kinds/nlp/controller.py index c98de4b2..27f37bce 100644 --- a/deepsearch/model/kinds/nlp/controller.py +++ b/deepsearch/model/kinds/nlp/controller.py @@ -1,3 +1,7 @@ +import logging + +logger = logging.getLogger("root.model") + from fastapi import HTTPException, status from deepsearch.model.base.controller import BaseController @@ -25,6 +29,7 @@ class NLPController(BaseController): def __init__(self, model: BaseNLPModel): self._model = model + logger.info("NLPController Initialized") def get_info(self) -> NLPInfoOutput: cfg = self._model.get_nlp_config() @@ -44,12 +49,15 @@ def get_info(self) -> NLPInfoOutput: return NLPInfoOutput(definitions=definitions) def get_kind(self) -> str: + logger.info("NLPController return kind") return Kind.NLPModel def _get_model(self) -> BaseDSModel: + logger.info("NLPController return model") return self._model def dispatch_predict(self, spec: CtrlPredInput) -> CtrlPredOutput: + logger.info("NLPModel Dispatching predict") cfg = self._model.get_nlp_config() type_ok = True @@ -59,6 +67,7 @@ def dispatch_predict(self, spec: CtrlPredInput) -> CtrlPredOutput: and isinstance(spec.findEntities, FindEntitiesText) and (type_ok := (spec.findEntities.objectType in cfg.supported_types)) ): + logger.info("NLPModel Annotating batched entities") entities = self._model.annotate_batched_entities( object_type=spec.findEntities.objectType, items=spec.findEntities.texts, @@ -70,6 +79,7 @@ def dispatch_predict(self, spec: CtrlPredInput) -> CtrlPredOutput: and isinstance(spec.findRelationships, FindRelationshipsText) and (type_ok := (spec.findRelationships.objectType in cfg.supported_types)) ): + logger.info("NLPModel Annotating batched relationships") relationships = self._model.annotate_batched_relationships( object_type=spec.findRelationships.objectType, items=spec.findRelationships.texts, @@ -86,6 +96,7 @@ def dispatch_predict(self, spec: CtrlPredInput) -> CtrlPredOutput: entities = [{}] * len(spec.findProperties.texts) else: entities = spec.findProperties.entities + logger.info("NLPModel Annotating batched properties") properties = self._model.annotate_batched_properties( object_type=spec.findProperties.objectType, items=spec.findProperties.texts, @@ -94,11 +105,13 @@ def dispatch_predict(self, spec: CtrlPredInput) -> CtrlPredOutput: ) return NLPPropsCtrlPredOutput(properties=properties) elif not type_ok: + logger.error("Requested object type not supported by model") raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Requested object type not supported by model", ) else: + logger.error("Unexpected spec type") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Unexpected spec type", diff --git a/deepsearch/model/kinds/qagen/controller.py b/deepsearch/model/kinds/qagen/controller.py index d0be4836..994b0cf4 100644 --- a/deepsearch/model/kinds/qagen/controller.py +++ b/deepsearch/model/kinds/qagen/controller.py @@ -1,5 +1,12 @@ +from __future__ import annotations + +import logging + from fastapi import HTTPException, status +logger = logging.getLogger("root.model") + + from deepsearch.model.base.controller import BaseController from deepsearch.model.base.model import BaseDSModel from deepsearch.model.base.types import Kind, ModelInfoOutputDefsSpec @@ -16,6 +23,7 @@ class QAGenController(BaseController): def __init__(self, model: BaseQAGenerator): self._model = model + logger.info("QAGenController Initialized") def get_info(self) -> QAGenInfoOutput: spec = ModelInfoOutputDefsSpec( @@ -30,21 +38,26 @@ def get_info(self) -> QAGenInfoOutput: return QAGenInfoOutput(definitions=definitions) def _get_model(self) -> BaseDSModel: + logger.info("QAGenController return model") return self._model def get_kind(self) -> str: + logger.info("QAGenController return kind") return Kind.QAGenModel def dispatch_predict(self, spec: CtrlPredInput) -> CtrlPredOutput: + logger.info("QAGenController Dispatching predict") if isinstance(spec, QAGenReqSpec): gen_answers = spec.generateAnswers answers = self._model.generate_answers( [(c, q) for c, q in zip(gen_answers.contexts, gen_answers.questions)] ) + logger.info("QAGenController returning predictions") return QAGenCtrlPredOutput( answers=answers, ) else: + logger.error("Unexpected spec type") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Unexpected spec type", diff --git a/deepsearch/model/server/model_app.py b/deepsearch/model/server/model_app.py index 3f8a69f9..db552c53 100644 --- a/deepsearch/model/server/model_app.py +++ b/deepsearch/model/server/model_app.py @@ -1,3 +1,12 @@ +import logging + +from deepsearch.core.cli.main import logger as dummy_import + +# TODO this does not work because this doesn't run from the regular typer path therefore +# the root logger does not get instantiated in deepsearch/cli.py +# Fixed by the dirty import to the typer app above +logger = logging.getLogger("root.model") + import asyncio import logging import os @@ -20,26 +29,36 @@ from deepsearch.model.server.controller_factory import ControllerFactory from deepsearch.model.server.inference_types import AppModelInfoOutput, AppPredInput -logger = logging.getLogger("cps-fastapi") - class ModelApp: def __init__(self, settings: Settings): + logger.info("Configuring fast API Instance") self._settings = settings + logger.info("Started fast API Instance") self.app = FastAPI() self._controllers: Dict[str, BaseController] = {} self._contr_factory = ControllerFactory() + logger.info(f"Instantiated Base controller successfully") @self.app.on_event("startup") async def startup_event(): + # TODO this is probably a bad ideia, do our own logging # do some initialization here + _loggers_to_overwrite = [ + logging.getLogger("uvicorn"), + logging.getLogger("uvicorn.access"), + ] + for _logger in _loggers_to_overwrite: + [_logger.addHandler(handler) for handler in logger.parent.handlers] + RunVar("_default_thread_limiter").set(CapacityLimiter(1)) @self.app.exception_handler(RequestValidationError) async def validation_exception_handler( request: Request, exc: RequestValidationError ): + logger.error(exc) return JSONResponse( content=jsonable_encoder({"errors": exc.errors()}), status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, @@ -79,6 +98,7 @@ async def predict( ) deadline_ts = float(deadline.timestamp()) if deadline_ts < curr_time: + logger.error("Requested deadline lies in the past") raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Requested deadline lies in the past", @@ -86,6 +106,9 @@ async def predict( expected_completion_ts = curr_time + controller.get_model_exec_time() if deadline_ts < expected_completion_ts: + logger.error( + "Expected completion time lies beyond requested deadline" + ) raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Expected completion time lies beyond requested deadline", @@ -117,10 +140,12 @@ async def predict( "X-Request-Reject-Time": str(time.time()), } if isinstance(e, asyncio.TimeoutError): + logger.error("Request timed out, too many requests") raise HTTPException( status_code=status.HTTP_429_TOO_MANY_REQUESTS, headers=headers ) else: + logger.error(e.detail) raise HTTPException( status_code=e.status_code, detail=e.detail, headers=headers ) @@ -157,6 +182,7 @@ def _auth(self, header_api_key: str = Security(APIKeyHeader(name="Authorization" .strip() ) if request_api_key != self._settings.api_key.get_secret_value(): + logger.error("Invalid API key") raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API key" ) @@ -164,6 +190,7 @@ def _auth(self, header_api_key: str = Security(APIKeyHeader(name="Authorization" def _get_controller(self, model_name: str) -> BaseController: controller = self._controllers.get(model_name) if controller is None: + logger.error(f"Invalid model '{model_name}'") raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail=f"Invalid model '{model_name}'", @@ -188,6 +215,7 @@ def register_model( self._validate_controller_kind(controller=contr, model=model) key = name or contr.get_model_name() self._controllers[key] = contr + logger.info(f"Registering {model=} under {name=} with {contr=}") def run(self, host: str = "127.0.0.1", port: int = 8000, **kwargs) -> None: uvicorn.run(self.app, host=host, port=port, **kwargs) @@ -196,12 +224,14 @@ def _validate_controller_kind( self, controller: BaseController, model: BaseDSModel ) -> None: if controller.get_kind() != model.get_config().kind: + logger.error("Controller kind does not match model") raise RuntimeError("Controller kind does not match model") def _validate_request_kind( self, request: AppPredInput, controller: BaseController ) -> None: if request.kind != controller.get_kind(): + logger.error("Request kind does not match controller") raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Request kind does not match controller", diff --git a/deepsearch/query/cli/main.py b/deepsearch/query/cli/main.py index 2ad020cb..1fdfe1ad 100644 --- a/deepsearch/query/cli/main.py +++ b/deepsearch/query/cli/main.py @@ -1,3 +1,7 @@ +import logging + +logger = logging.getLogger("root.query") + import json import typing from pathlib import Path @@ -46,6 +50,7 @@ def query_raw( output: OutputEnum = OutputOption, ): api = CpsApi.from_env() + logger.info(f"Launching a raw flow query {input_file=}") query_flow = json.loads(input_file.read_text()) results = api.queries.run(query_flow) @@ -67,8 +72,10 @@ def query_wf( output: OutputEnum = OutputOption, ): api = CpsApi.from_env() + logger.info(f"Launching CPS KG workflow query {input_file=}, {proj_key=} {kg_key=}") kg = api.knowledge_graphs.get(proj_key, kg_key) if kg is None: + logger.error(f"Kg with proj_key={proj_key} and kg_key={kg_key} not found.") raise typer.BadParameter( f"Kg with proj_key={proj_key} and kg_key={kg_key} not found." ) @@ -90,8 +97,12 @@ def query_fts( output: OutputEnum = OutputOption, ): api = CpsApi.from_env() + logger.info( + f"Launching a KG full text search {proj_key=} {kg_key=}, with {search_query=}" + ) kg = api.knowledge_graphs.get(proj_key, kg_key) if kg is None: + logger.error(f"Kg with proj_key={proj_key} and kg_key={kg_key} not found.") raise typer.BadParameter( f"Kg with proj_key={proj_key} and kg_key={kg_key} not found." ) @@ -113,6 +124,9 @@ def query_data( output: OutputEnum = OutputOption, ): api = CpsApi.from_env() + logger.info( + f"Launching a DeepSearch data query {proj_key=} {instance=} {index=}, {search_query=}" + ) coords: Resource if proj_key is not None and instance is None: @@ -120,6 +134,7 @@ def query_data( elif instance is not None and proj_key is None: coords = ElasticDataCollectionSource(elastic_id=instance, index_key=index) else: + logger.error("Only of proj-key+index or instance+index can be defined") raise typer.BadParameter( "Only of proj-key+index or instance+index can be defined" )