diff --git a/ddtrace/llmobs/_evaluators/ragas/base.py b/ddtrace/llmobs/_evaluators/ragas/base.py index 2bdd5575d2..23aa4cd3ca 100644 --- a/ddtrace/llmobs/_evaluators/ragas/base.py +++ b/ddtrace/llmobs/_evaluators/ragas/base.py @@ -17,7 +17,7 @@ logger = get_logger(__name__) -class MiniRagas: +class RagasDependencies: """ A helper class to store instances of ragas classes and functions that may or may not exist in a user's environment. @@ -105,8 +105,8 @@ def __init__(self, llmobs_service): self.ragas_version = "unknown" telemetry_state = "ok" try: - self.mini_ragas = MiniRagas() - self.ragas_version = self.mini_ragas.ragas_version + self.ragas_dependencies = RagasDependencies() + self.ragas_version = self.ragas_dependencies.ragas_version except ImportError as e: telemetry_state = "fail_import_error" raise NotImplementedError("Failed to load dependencies for `{}` evaluator".format(self.LABEL)) from e @@ -127,7 +127,7 @@ def __init__(self, llmobs_service): tags=( ("evaluator_label", self.LABEL), ("state", telemetry_state), - ("ragas_version", self.ragas_version), + ("evaluator_version", self.ragas_version), ), ) if telemetry_state != "ok": @@ -135,7 +135,7 @@ def __init__(self, llmobs_service): level=TELEMETRY_LOG_LEVEL.ERROR, message="Failed to import Ragas dependencies", stack_trace=traceback.format_exc(), - tags={"ragas_version": self.ragas_version}, + tags={"evaluator_version": self.ragas_version}, ) def run_and_submit_evaluation(self, span_event: dict): @@ -149,7 +149,7 @@ def run_and_submit_evaluation(self, span_event: dict): tags=( ("evaluator_label", self.LABEL), ("state", score_result_or_failure if isinstance(score_result_or_failure, str) else "success"), - ("ragas_version", self.ragas_version), + ("evaluator_version", self.ragas_version), ), ) if isinstance(score_result_or_failure, float): @@ -162,7 +162,7 @@ def run_and_submit_evaluation(self, span_event: dict): ) def evaluate(self, span_event: dict) -> Tuple[Union[float, str], Optional[dict]]: - raise NotImplementedError("evaluate method must be implemented by individual ragas metrics") + raise NotImplementedError("evaluate method must be implemented by individual evaluators") def _extract_evaluation_inputs_from_span(self, span_event: dict) -> Optional[dict]: """ @@ -184,9 +184,7 @@ def _extract_evaluation_inputs_from_span(self, span_event: dict) -> Optional[dic prompt = meta_input.get("prompt") if prompt is None: - logger.debug( - "Failed to extract `prompt` from span for ragas evaluation", - ) + logger.debug("Failed to extract `prompt` from span for ragas evaluation") return None prompt_variables = prompt.get("variables") diff --git a/ddtrace/llmobs/_evaluators/ragas/faithfulness.py b/ddtrace/llmobs/_evaluators/ragas/faithfulness.py index 0ebd8eaa2e..98725b1f27 100644 --- a/ddtrace/llmobs/_evaluators/ragas/faithfulness.py +++ b/ddtrace/llmobs/_evaluators/ragas/faithfulness.py @@ -46,13 +46,13 @@ def __init__(self, llmobs_service): """ super().__init__(llmobs_service) self.ragas_faithfulness_instance = self._get_faithfulness_instance() - self.llm_output_parser_for_generated_statements = self.mini_ragas.RagasoutputParser( - pydantic_object=self.mini_ragas.StatementsAnswers + self.llm_output_parser_for_generated_statements = self.ragas_dependencies.RagasoutputParser( + pydantic_object=self.ragas_dependencies.StatementsAnswers ) - self.llm_output_parser_for_faithfulness_score = self.mini_ragas.RagasoutputParser( - pydantic_object=self.mini_ragas.StatementFaithfulnessAnswers + self.llm_output_parser_for_faithfulness_score = self.ragas_dependencies.RagasoutputParser( + pydantic_object=self.ragas_dependencies.StatementFaithfulnessAnswers ) - self.split_answer_into_sentences = self.mini_ragas.get_segmenter( + self.split_answer_into_sentences = self.ragas_dependencies.get_segmenter( language=self.ragas_faithfulness_instance.nli_statements_message.language, clean=False ) @@ -62,11 +62,11 @@ def _get_faithfulness_instance(self) -> Optional[object]: ragas evaluator is updated with the latest ragas faithfulness instance AND has an non-null llm """ - if self.mini_ragas.faithfulness is None: + if self.ragas_dependencies.faithfulness is None: return None - ragas_faithfulness_instance = self.mini_ragas.faithfulness + ragas_faithfulness_instance = self.ragas_dependencies.faithfulness if not ragas_faithfulness_instance.llm: - ragas_faithfulness_instance.llm = self.mini_ragas.llm_factory() + ragas_faithfulness_instance.llm = self.ragas_dependencies.llm_factory() return ragas_faithfulness_instance def evaluate(self, span_event: dict) -> Tuple[Union[float, str], Optional[dict]]: @@ -199,9 +199,9 @@ def _create_verdicts(self, context: str, statements: List[str]): return None # collapse multiple generations into a single faithfulness list - faithfulness_list = self.mini_ragas.ensembler.from_discrete(raw_faithfulness_list, "verdict") + faithfulness_list = self.ragas_dependencies.ensembler.from_discrete(raw_faithfulness_list, "verdict") try: - return self.mini_ragas.StatementFaithfulnessAnswers.parse_obj(faithfulness_list) + return self.ragas_dependencies.StatementFaithfulnessAnswers.parse_obj(faithfulness_list) except Exception as e: logger.debug("Failed to parse faithfulness_list", exc_info=e) return None