Skip to content

Commit

Permalink
fix: template parameter format
Browse files Browse the repository at this point in the history
  • Loading branch information
paopa committed Feb 21, 2025
1 parent 37f606a commit b24d278
Showing 1 changed file with 13 additions and 10 deletions.
23 changes: 13 additions & 10 deletions wren-ai-service/eval/metrics/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,15 @@ def format(response: dict) -> EvalResult:

class QuestionCoherenceJudge(BaseMetric):
_system_prompt = """
You are an expert evaluator. Your task is to analyze the reasoning provided for a given question and determine if it makes sense. Provide a score and a detailed explanation for your evaluation.
You are an expert evaluator. Your task is to analyze the reasoning provided for a given question and determine if it makes sense.
Provide a score in the range 0.0~1.0 and a detailed explanation for your evaluation.
"""
_test_case_prompt = """
Question:
{question}
{{ question }}
Reasoning:
{reasoning}
{{ reasoning }}
"""

def __init__(self, llm_provider: LLMProvider, **_):
Expand Down Expand Up @@ -78,14 +79,15 @@ def __name__(self):

class ReasoningValidityJudge(BaseMetric):
_system_prompt = """
You are an expert evaluator. Your task is to analyze the reasoning provided for a given SQL query and determine if it makes sense. Provide a score and a detailed explanation for your evaluation.
You are an expert evaluator. Your task is to analyze the reasoning provided for a given SQL query and determine if it makes sense.
Provide a score in the range 0.0~1.0 and a detailed explanation for your evaluation.
"""
_test_case_prompt = """
Actual Output:
{actual_output}
{{ actual_output }}
Reasoning:
{reasoning}
{{ reasoning }}
"""

def __init__(self, llm_provider: LLMProvider, **_):
Expand Down Expand Up @@ -125,14 +127,15 @@ def __name__(self):

class SqlSemanticsJudge(BaseMetric):
_system_prompt = """
You are an expert evaluator. Your task is to analyze the actual SQL query and the expected SQL query and determine if they are semantically equivalent. Provide a score and a detailed explanation for your evaluation.
You are an expert evaluator. Your task is to analyze the actual SQL query and the expected SQL query and determine if they are semantically equivalent.
Provide a score in the range 0.0~1.0 and a detailed explanation for your evaluation.
"""
_test_case_prompt = """
Actual SQL:
{actual_sql}
{{ actual_sql }}
Expected SQL:
{expected_sql}
{{ expected_sql }}
"""

def __init__(self, llm_provider: LLMProvider, **_):
Expand Down

0 comments on commit b24d278

Please sign in to comment.