From 90add9aa037b7939e1756711ce2a8c86fee6c34b Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Fri, 19 Jul 2024 11:27:03 -0700 Subject: [PATCH] /* PR_START p--query-resolution-perf 13 */ Add logging framework for convenient / efficient logging of objects. --- .../mf_logging/logger_configuration.py | 65 +++++++++++ .../mf_logging/mf_logger.py | 103 ++++++++++++++++++ .../mf_logging/pretty_printing_logger.py | 42 +++++++ 3 files changed, 210 insertions(+) create mode 100644 metricflow-semantics/metricflow_semantics/mf_logging/logger_configuration.py create mode 100644 metricflow-semantics/metricflow_semantics/mf_logging/mf_logger.py create mode 100644 metricflow-semantics/metricflow_semantics/mf_logging/pretty_printing_logger.py diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/logger_configuration.py b/metricflow-semantics/metricflow_semantics/mf_logging/logger_configuration.py new file mode 100644 index 0000000000..0257ca7bd2 --- /dev/null +++ b/metricflow-semantics/metricflow_semantics/mf_logging/logger_configuration.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import inspect +import threading + +from metricflow_semantics.mf_logging.mf_logger import MetricFlowLogger, MetricFlowLoggerFactory +from metricflow_semantics.mf_logging.pretty_printing_logger import PrettyPrintingLoggerFactory + + +class MetricFlowLoggerConfiguration: + """Global object used to configure logger object creation in MF. + + The factory can be configured to use other implementations to realize different behaviors like passing JSON to the + `extra` argument. By default, it pretty-formats the objects and appends them to the message. + """ + + _logger: MetricFlowLoggerFactory = PrettyPrintingLoggerFactory() + _state_lock = threading.Lock() + + @classmethod + def set_logger_factory(cls, logger_factory: MetricFlowLoggerFactory) -> None: + """Set the factory to be used for creating logger objects in the `get_logger` method.""" + with cls._state_lock: + cls._logger = logger_factory + + @classmethod + def get_logger(cls, module_name: str) -> MetricFlowLogger: + """Get the logger using the module where the call was made. + + This is a replacement for the standard `logging.getLogger` pattern. + + e.g. + import logging + logger = logging.getLogger(__name__) + + -> + + import ... + logger = MetricFlowLoggerConfiguration.get_module_logger() + """ + with cls._state_lock: + return cls._logger.get_logger(module_name) + + +def mf_get_logger() -> MetricFlowLogger: + """Get the logger using the module where the call was made. + + This is a replacement for the standard `logging.getLogger` pattern. + + e.g. + import logging + logger = logging.getLogger(__name__) + + -> + + import ... + logger = mf_get_logger() + """ + caller_frame = inspect.stack()[1] + caller_module = inspect.getmodule(caller_frame[0]) + + if caller_module is None: + raise RuntimeError(f"Unable to get module for {caller_frame[0]=}") + + return MetricFlowLoggerConfiguration.get_logger(caller_module.__name__) diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/mf_logger.py b/metricflow-semantics/metricflow_semantics/mf_logging/mf_logger.py new file mode 100644 index 0000000000..e9d78135f8 --- /dev/null +++ b/metricflow-semantics/metricflow_semantics/mf_logging/mf_logger.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +import logging +from abc import ABC, abstractmethod + + +class MetricFlowLogger(ABC): + """Logger backed by the Python standard-library logger to conveniently and efficiently log objects. + + In MF, there is a need to log complex objects like DAGs and paths for debugging. Previously, the pattern for logging + those objects used a pretty-formatting method like: + + logger = logging.getLogger(__name__) + ... + logger.debug(mf_pformat_many("Found matching path.", {"matching_path": matching_path, "source_nodes": source_nodes})) + + -> + + Found matching path. + matching_path: [1, 2, 3] + source_nodes: ["a", "b", "c"] + + However, there were a few issues: + + * The `mf_pformat_many()` call was evaluated regardless of whether the given logging level was enabled. The + formatting of complex objects was found to be significant in performance profiling. + * Since the message is the only object passed to the logger, it was not reasonable to convert the objects to a + different form (e.g. JSON) for different backends. + * Logging calls were verbose, and string keys like "matching_path" were not amenable to auto-complete in the IDE. + + The class is used in a new approach that uses a custom logging class that takes keyword arguments that are only + converted to strings when the corresponding logging level is enabled. The logger is created via a configurable + factory that can be used to implement different ways of logging those objects (e.g. JSON). + + The new pattern addresses the above issues and looks like: + + logger = mf_get_logger() + ... + logger.debug("Found matching path.", matching_path=matching_path, source_nodes=source_nodes) + + A signature for the logging calls that allows non-named arguments and uses the variable names as keys in the output + was considered for reduced verbosity: + + logger.debug("Found matching path.", matching_path, source_nodes) + + This approach was prototyped using the `varname` library to resolve the variable name, but it was found to be too + slow (~0.7s for 1000 calls). + """ + + @abstractmethod + def log_implementation(self, level_int: int, message: str, stack_level: int, **kwargs) -> None: # type: ignore[no-untyped-def] + """Appropriately log the message and the appropriate representation of `kwargs`. + + An example of the representation of `kwargs` might be the string form of the objects on separate lines. + The `stack_level` argument should be passed to the Python-standard-library log-call so that the correct file + and line number are used for the source of the log line, not this library. + """ + raise NotImplementedError + + def _safely_log(self, level_int: int, message: str, **kwargs) -> None: # type: ignore[no-untyped-def] + """Calls `.log_implementation` with a try-except so that bugs do not cause a fatal exit.""" + if not self.standard_library_logger.isEnabledFor(level_int): + return + try: + self.log_implementation(level_int, message, stack_level=4, **kwargs) + except Exception: + logging.exception( + f"Got an exception while using logger instance {self}. " + f"Since there might be an issue with the logger instance, this message is logged using the root logger." + ) + + def debug(self, message: str, **kwargs) -> None: # type: ignore[no-untyped-def] # noqa: D102 + self._safely_log(logging.DEBUG, message, **kwargs) + + def info(self, message: str, **kwargs) -> None: # type: ignore[no-untyped-def] # noqa: D102 + self._safely_log(logging.INFO, message, **kwargs) + + def warning(self, message: str, **kwargs) -> None: # type: ignore[no-untyped-def] # noqa: D102 + self._safely_log(logging.WARNING, message, **kwargs) + + def error(self, message: str, **kwargs) -> None: # type: ignore[no-untyped-def] # noqa: D102 + self._safely_log(logging.ERROR, message, **kwargs) + + def critical(self, message: str, **kwargs) -> None: # type: ignore[no-untyped-def] # noqa: D102 + self._safely_log(logging.CRITICAL, message, **kwargs) + + def fatal(self, message: str, **kwargs) -> None: # type: ignore[no-untyped-def] # noqa: D102 + self._safely_log(logging.FATAL, message, **kwargs) + + @property + @abstractmethod + def standard_library_logger(self) -> logging.Logger: + """Return the Python-standard-library logger that this uses.""" + raise NotImplementedError + + +class MetricFlowLoggerFactory(ABC): + """Interface for creating logger instances.""" + + @abstractmethod + def get_logger(self, module_name: str) -> MetricFlowLogger: + """Create a logger for the given module. Similar to the standard `logging.getLogger(__name__)`.""" + raise NotImplementedError diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/pretty_printing_logger.py b/metricflow-semantics/metricflow_semantics/mf_logging/pretty_printing_logger.py new file mode 100644 index 0000000000..e60ff3cb35 --- /dev/null +++ b/metricflow-semantics/metricflow_semantics/mf_logging/pretty_printing_logger.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import logging + +from typing_extensions import override + +from metricflow_semantics.mf_logging.mf_logger import MetricFlowLogger, MetricFlowLoggerFactory +from metricflow_semantics.mf_logging.pretty_print import mf_pformat_many + + +class PrettyPrintingLogger(MetricFlowLogger): + """Logger that pretty-prints the objects passed as keyword arguments. + + e.g. + logger.debug("Found matching path.", matching_path=matching_path, source_nodes=source_nodes) + + -> + + Found matching path. + matching_path: [1, 2, 3] + source_nodes: ["a", "b", "c"] + """ + + def __init__(self, python_logger: logging.Logger) -> None: # noqa: D107 + self._standard_library_logger = python_logger + + @override + def log_implementation(self, level_int: int, message: str, stack_level: int, **kwargs) -> None: # type: ignore[no-untyped-def] + self._standard_library_logger.log(level_int, mf_pformat_many(message, kwargs), stacklevel=stack_level) + + @property + @override + def standard_library_logger(self) -> logging.Logger: + return self._standard_library_logger + + +class PrettyPrintingLoggerFactory(MetricFlowLoggerFactory): + """Factory for creating `PrettyPrintingLogger`.""" + + @override + def get_logger(self, module_name: str) -> MetricFlowLogger: + return PrettyPrintingLogger(logging.getLogger(module_name))