Skip to content

Commit

Permalink
Merge pull request #15 from confident-ai/main
Browse files Browse the repository at this point in the history
Merge from main.
  • Loading branch information
Anindyadeep authored Nov 23, 2023
2 parents 579d5d9 + 1ca8043 commit f6c4bee
Show file tree
Hide file tree
Showing 52 changed files with 1,390 additions and 1,617 deletions.
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
</p>

<p align="center">
<a href="https://docs.confident-ai.com" target="_blank">
<a href="https://docs.confident-ai.com/docs/getting-started" target="_blank">
Read The Docs
</a>
&nbsp;&nbsp;&nbsp;·&nbsp;&nbsp;&nbsp;
Expand Down Expand Up @@ -67,7 +67,7 @@ Open `test_chatbot.py` and write your first test case using DeepEval:

```python
import pytest
from deepeval.metrics.factual_consistency import FactualConsistencyMetric
from deepeval.metrics import HallucinationMetric
from deepeval.test_case import LLMTestCase
from deepeval.evaluator import assert_test

Expand All @@ -77,9 +77,9 @@ def test_case():

# Replace this with the actual output from your LLM application
actual_output = "We offer a 30-day full refund at no extra costs."
factual_consistency_metric = FactualConsistencyMetric(minimum_score=0.7)
hallucination_metric = HallucinationMetric(minimum_score=0.7)
test_case = LLMTestCase(input=input, actual_output=actual_output, context=context)
assert_test(test_case, [factual_consistency_metric])
assert_test(test_case, [hallucination_metric])
```

Run `test_chatbot.py` in the CLI:
Expand All @@ -91,10 +91,10 @@ deepeval test run test_chatbot.py
**Your test should have passed ✅** Let's breakdown what happened.

- The variable `input` mimics user input, and `actual_output` is a placeholder for your chatbot's intended output based on this query.
- The variable `context` contains the relevant information from your knowledge base, and `FactualConsistencyMetric(minimum_score=0.7)` is an out-of-the-box metric provided by DeepEval. It helps you evaluate the factual accuracy of your chatbot's output based on the provided context.
- The variable `context` contains the relevant information from your knowledge base, and `HallucinationMetric(minimum_score=0.7)` is an out-of-the-box metric provided by DeepEval. It helps you evaluate the factual accuracy of your chatbot's output based on the provided context.
- The metric score ranges from 0 - 1. The `minimum_score=0.7` threshold ultimately determines whether your test has passed or not.

[Read our documentation](https://docs.confident-ai.com) for more information on how to use additional metrics, create your own custom metrics, and tutorials on how to integrate with other tools like LangChain and LlamaIndex.
[Read our documentation](https://docs.confident-ai.com/docs/getting-started) for more information on how to use additional metrics, create your own custom metrics, and tutorials on how to integrate with other tools like LangChain and LlamaIndex.

<br />

Expand Down
2 changes: 1 addition & 1 deletion deepeval/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__: str = "0.20.19"
__version__: str = "0.20.22"
7 changes: 6 additions & 1 deletion deepeval/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from requests.adapters import HTTPAdapter, Response, Retry
from deepeval.constants import API_KEY_ENV
from deepeval.key_handler import KEY_FILE_HANDLER
from enum import Enum

API_BASE_URL = "https://app.confident-ai.com/api"

Expand All @@ -16,6 +17,11 @@
HTTP_RETRY_ALLOWED_METHODS = frozenset({"GET", "POST", "DELETE"})


class Endpoints(Enum):
CREATE_DATASET_ENDPOINT = "/v1/dataset"
CREATE_TEST_RUN_ENDPOINT = "/v1/test-run"


class Api:
"""Internal Api reference for handling http operations"""

Expand Down Expand Up @@ -141,7 +147,6 @@ def _api_request(
proxies=self.proxies,
cert=self.cert,
)

json = None
if res.status_code == 200:
try:
Expand Down
10 changes: 1 addition & 9 deletions deepeval/chat_completion/retry.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,15 @@
from typing import Callable, Any
import openai
import time
import os
import sys


def call_openai_with_retry(
callable: Callable[[], Any], max_retries: int = 2
) -> Any:
if not openai.api_key:
raise ValueError(
"OpenAI API key is not set. Please ensure it's set in your environment variables or passed explicitly."
)

for _ in range(max_retries):
try:
response = callable()
return response
except openai.error.OpenAIError as e:
except Exception as e:
print(f"An error occurred: {e}. Retrying...")
time.sleep(2)
continue
Expand Down
200 changes: 0 additions & 200 deletions deepeval/cli/examples.py

This file was deleted.

5 changes: 2 additions & 3 deletions deepeval/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,10 @@ def login(
print("API Key cannot be empty. Please try again.\n")
KEY_FILE_HANDLER.write_api_key(api_key)
client = Api(api_key=api_key)
print("Success! :raising_hands:")
print("Congratulations! Login successful :raising_hands: ")
print(
"If you are new to DeepEval, try generate a sample test: [bold]deepeval test generate --output-file test_sample.py[/bold]"
"If you are new to DeepEval, follow our quickstart tutorial here: [bold][link=https://docs.confident-ai.com/docs/getting-started]https://docs.confident-ai.com/docs/getting-started[/link][/bold]"
)
print("Run a sample test: [bold]deepeval test run test_sample.py[/bold]")


if __name__ == "__main__":
Expand Down
14 changes: 0 additions & 14 deletions deepeval/cli/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,3 @@ def run(

test_run_manager.wrap_up_test_run()
return retcode


@app.command()
def generate(output_file: str = "test_sample.py"):
with open(
os.path.join(os.path.dirname(__file__), "../test_quickstart.py"),
"r",
) as f_in:
with open(output_file, "w") as f_out:
f_out.write(f_in.read())
print(f"✨ Done! Now run: [bold]deepeval test run {output_file}[/bold]")
print(
"You can generate more tests in the future in our documentation at https://docs.confident-ai.com/docs"
)
1 change: 1 addition & 0 deletions deepeval/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .dataset import EvaluationDataset
18 changes: 18 additions & 0 deletions deepeval/dataset/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from pydantic import BaseModel, Field
from typing import Optional, List


class Golden(BaseModel):
input: str
actual_output: Optional[str] = Field(None, alias="actualOutput")
expected_output: Optional[str] = Field(None, alias="expectedOutput")
context: Optional[list] = Field(None)


class APIDataset(BaseModel):
alias: str
goldens: Optional[List[Golden]] = Field(default=None)


class CreateDatasetHttpResponse(BaseModel):
link: str
Loading

0 comments on commit f6c4bee

Please sign in to comment.