Skip to content

Commit

Permalink
chore: rename llm-based metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
paopa committed Feb 21, 2025
1 parent b24d278 commit 72a794e
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 14 deletions.
8 changes: 4 additions & 4 deletions wren-ai-service/eval/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from .context_relevancy import ContextualRelevancyMetric
from .faithfulness import FaithfulnessMetric
from .llm import (
QuestionCoherenceJudge,
ReasoningValidityJudge,
QuestionToReasoningJudge,
ReasoningToSqlJudge,
SqlSemanticsJudge,
)
from .spider.exact_match import ExactMatchAccuracy
Expand All @@ -22,7 +22,7 @@
"FaithfulnessMetric",
"ExactMatchAccuracy",
"ExecutionAccuracy",
"QuestionCoherenceJudge",
"ReasoningValidityJudge",
"QuestionToReasoningJudge",
"ReasoningToSqlJudge",
"SqlSemanticsJudge",
]
8 changes: 4 additions & 4 deletions wren-ai-service/eval/metrics/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def format(response: dict) -> EvalResult:
return EvalResult.model_validate_json(reply)


class QuestionCoherenceJudge(BaseMetric):
class QuestionToReasoningJudge(BaseMetric):
_system_prompt = """
You are an expert evaluator. Your task is to analyze the reasoning provided for a given question and determine if it makes sense.
Provide a score in the range 0.0~1.0 and a detailed explanation for your evaluation.
Expand Down Expand Up @@ -74,10 +74,10 @@ def is_successful(self):

@property
def __name__(self):
return "QuestionCoherenceJudge"
return "QuestionToReasoningJudge"


class ReasoningValidityJudge(BaseMetric):
class ReasoningToSqlJudge(BaseMetric):
_system_prompt = """
You are an expert evaluator. Your task is to analyze the reasoning provided for a given SQL query and determine if it makes sense.
Provide a score in the range 0.0~1.0 and a detailed explanation for your evaluation.
Expand Down Expand Up @@ -122,7 +122,7 @@ def is_successful(self):

@property
def __name__(self):
return "ReasoningValidityJudge"
return "ReasoningToSqlJudge"


class SqlSemanticsJudge(BaseMetric):
Expand Down
12 changes: 6 additions & 6 deletions wren-ai-service/eval/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
ExactMatchAccuracy,
ExecutionAccuracy,
FaithfulnessMetric,
QuestionCoherenceJudge,
ReasoningValidityJudge,
QuestionToReasoningJudge,
ReasoningToSqlJudge,
SqlSemanticsJudge,
)
from eval.utils import (
Expand Down Expand Up @@ -311,8 +311,8 @@ def metrics(
# this is for spider dataset, rn we temporarily disable it
ExactMatchAccuracy(),
ExecutionAccuracy(),
QuestionCoherenceJudge(**component),
ReasoningValidityJudge(**component),
QuestionToReasoningJudge(**component),
ReasoningToSqlJudge(**component),
SqlSemanticsJudge(**component),
],
"post_metrics": [],
Expand Down Expand Up @@ -433,8 +433,8 @@ def metrics(
# this is for spider dataset, rn we temporarily disable it
ExactMatchAccuracy(),
ExecutionAccuracy(),
QuestionCoherenceJudge(**component),
ReasoningValidityJudge(**component),
QuestionToReasoningJudge(**component),
ReasoningToSqlJudge(**component),
SqlSemanticsJudge(**component),
],
"post_metrics": [],
Expand Down

0 comments on commit 72a794e

Please sign in to comment.