chore: rename llm-based metrics

Canner · Feb 21, 2025 · 72a794e · 72a794e
1 parent b24d278
commit 72a794e
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 14 deletions.
diff --git a/wren-ai-service/eval/metrics/__init__.py b/wren-ai-service/eval/metrics/__init__.py
@@ -5,8 +5,8 @@
 from .context_relevancy import ContextualRelevancyMetric
 from .faithfulness import FaithfulnessMetric
 from .llm import (
-    QuestionCoherenceJudge,
-    ReasoningValidityJudge,
+    QuestionToReasoningJudge,
+    ReasoningToSqlJudge,
     SqlSemanticsJudge,
 )
 from .spider.exact_match import ExactMatchAccuracy
@@ -22,7 +22,7 @@
     "FaithfulnessMetric",
     "ExactMatchAccuracy",
     "ExecutionAccuracy",
-    "QuestionCoherenceJudge",
-    "ReasoningValidityJudge",
+    "QuestionToReasoningJudge",
+    "ReasoningToSqlJudge",
     "SqlSemanticsJudge",
 ]
diff --git a/wren-ai-service/eval/metrics/llm/__init__.py b/wren-ai-service/eval/metrics/llm/__init__.py
@@ -29,7 +29,7 @@ def format(response: dict) -> EvalResult:
     return EvalResult.model_validate_json(reply)
 
 
-class QuestionCoherenceJudge(BaseMetric):
+class QuestionToReasoningJudge(BaseMetric):
     _system_prompt = """
     You are an expert evaluator. Your task is to analyze the reasoning provided for a given question and determine if it makes sense. 
     Provide a score in the range 0.0~1.0 and a detailed explanation for your evaluation.
@@ -74,10 +74,10 @@ def is_successful(self):
 
     @property
     def __name__(self):
-        return "QuestionCoherenceJudge"
+        return "QuestionToReasoningJudge"
 
 
-class ReasoningValidityJudge(BaseMetric):
+class ReasoningToSqlJudge(BaseMetric):
     _system_prompt = """
     You are an expert evaluator. Your task is to analyze the reasoning provided for a given SQL query and determine if it makes sense. 
     Provide a score in the range 0.0~1.0 and a detailed explanation for your evaluation.
@@ -122,7 +122,7 @@ def is_successful(self):
 
     @property
     def __name__(self):
-        return "ReasoningValidityJudge"
+        return "ReasoningToSqlJudge"
 
 
 class SqlSemanticsJudge(BaseMetric):

diff --git a/wren-ai-service/eval/pipelines.py b/wren-ai-service/eval/pipelines.py
@@ -25,8 +25,8 @@
     ExactMatchAccuracy,
     ExecutionAccuracy,
     FaithfulnessMetric,
-    QuestionCoherenceJudge,
-    ReasoningValidityJudge,
+    QuestionToReasoningJudge,
+    ReasoningToSqlJudge,
     SqlSemanticsJudge,
 )
 from eval.utils import (
@@ -311,8 +311,8 @@ def metrics(
                 # this is for spider dataset, rn we temporarily disable it
                 ExactMatchAccuracy(),
                 ExecutionAccuracy(),
-                QuestionCoherenceJudge(**component),
-                ReasoningValidityJudge(**component),
+                QuestionToReasoningJudge(**component),
+                ReasoningToSqlJudge(**component),
                 SqlSemanticsJudge(**component),
             ],
             "post_metrics": [],
@@ -433,8 +433,8 @@ def metrics(
                 # this is for spider dataset, rn we temporarily disable it
                 ExactMatchAccuracy(),
                 ExecutionAccuracy(),
-                QuestionCoherenceJudge(**component),
-                ReasoningValidityJudge(**component),
+                QuestionToReasoningJudge(**component),
+                ReasoningToSqlJudge(**component),
                 SqlSemanticsJudge(**component),
             ],
             "post_metrics": [],