Skip to content

Commit

Permalink
Merge pull request #17 from confident-ai/main
Browse files Browse the repository at this point in the history
Merge from main
  • Loading branch information
Anindyadeep authored Nov 28, 2023
2 parents e58437a + bb2d41d commit 68f277c
Show file tree
Hide file tree
Showing 13 changed files with 156 additions and 942 deletions.
2 changes: 1 addition & 1 deletion deepeval/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__: str = "0.20.23"
__version__: str = "0.20.24"
2 changes: 1 addition & 1 deletion deepeval/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def execute_test(

test_run_manager.get_test_run().add_llm_test_case(
test_case=test_case,
metrics=[metric],
metric=metric,
run_duration=run_duration,
index=count,
)
Expand Down
1 change: 1 addition & 0 deletions deepeval/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .llm_eval_metric import LLMEvalMetric
from .ragas_metric import (
RagasMetric,
ContextualPrecisionMetric,
ContextualRelevancyMetric,
FaithfulnessMetric,
ContextRecallMetric,
Expand Down
65 changes: 55 additions & 10 deletions deepeval/metrics/ragas_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,56 @@
from typing import List


class ContextualPrecisionMetric(BaseMetric):
"""This metric checks the contextual precision using Ragas"""

def __init__(
self,
minimum_score: float = 0.3,
):
self.minimum_score = minimum_score

def measure(self, test_case: LLMTestCase):
# sends to server
try:
from ragas import evaluate
from ragas.metrics import context_precision

except ModuleNotFoundError:
raise ModuleNotFoundError(
"Please install ragas to use this metric. `pip install ragas`."
)

try:
from datasets import Dataset
except ModuleNotFoundError:
raise ModuleNotFoundError("Please install dataset")

# Create a dataset from the test case
data = {
"contexts": [test_case.retrieval_context],
"question": [test_case.input],
"id": [[test_case.id]],
}
dataset = Dataset.from_dict(data)

# Evaluate the dataset using Ragas
scores = evaluate(dataset, metrics=[context_precision])

# Ragas only does dataset-level comparisons
context_precision_score = scores["context_precision"]
self.success = context_precision_score >= self.minimum_score
self.score = context_precision_score
return self.score

def is_successful(self):
return self.success

@property
def __name__(self):
return "Contextual Precision"


class ContextualRelevancyMetric(BaseMetric):
"""This metric checks the contextual relevancy using Ragas"""

Expand Down Expand Up @@ -32,10 +82,8 @@ def measure(self, test_case: LLMTestCase):

# Create a dataset from the test case
data = {
"ground_truths": [[test_case.expected_output]],
"contexts": [test_case.context],
"contexts": [test_case.retrieval_context],
"question": [test_case.input],
"answer": [test_case.actual_output],
"id": [[test_case.id]],
}
dataset = Dataset.from_dict(data)
Expand Down Expand Up @@ -83,8 +131,6 @@ def measure(self, test_case: LLMTestCase):
raise ModuleNotFoundError("Please install dataset")

data = {
"ground_truths": [[test_case.expected_output]],
"contexts": [test_case.context],
"question": [test_case.input],
"answer": [test_case.actual_output],
"id": [[test_case.id]],
Expand Down Expand Up @@ -128,8 +174,7 @@ def measure(self, test_case: LLMTestCase):
raise ModuleNotFoundError("Please install dataset")

data = {
"ground_truths": [[test_case.expected_output]],
"contexts": [test_case.context],
"contexts": [test_case.retrieval_context],
"question": [test_case.input],
"answer": [test_case.actual_output],
"id": [[test_case.id]],
Expand Down Expand Up @@ -175,10 +220,9 @@ def measure(self, test_case: LLMTestCase):
raise ModuleNotFoundError("Please install dataset")

data = {
"ground_truths": [[test_case.expected_output]],
"contexts": [test_case.context],
"question": [test_case.input],
"answer": [test_case.actual_output],
"ground_truths": [[test_case.expected_output]],
"contexts": [test_case.retrieval_context],
"id": [[test_case.id]],
}
dataset = Dataset.from_dict(data)
Expand Down Expand Up @@ -453,6 +497,7 @@ def measure(self, test_case: LLMTestCase):
# Convert the LLMTestCase to a format compatible with Dataset
scores = []
metrics = [
ContextualPrecisionMetric(),
ContextualRelevancyMetric(),
ContextRecallMetric(),
FaithfulnessMetric(),
Expand Down
Loading

0 comments on commit 68f277c

Please sign in to comment.