Merge pull request #15 from confident-ai/main

Merge from main.
Anindyadeep · Nov 23, 2023 · f6c4bee · f6c4bee
2 parents 579d5d9 + 1ca8043
commit f6c4bee
Show file tree

Hide file tree

Showing 52 changed files with 1,390 additions and 1,617 deletions.
diff --git a/README.md b/README.md
@@ -9,7 +9,7 @@
 </p>
 
 <p align="center">
-    <a href="https://docs.confident-ai.com" target="_blank">
+    <a href="https://docs.confident-ai.com/docs/getting-started" target="_blank">
         Read The Docs
     </a>
     &nbsp;&nbsp;&nbsp;·&nbsp;&nbsp;&nbsp;
@@ -67,7 +67,7 @@ Open `test_chatbot.py` and write your first test case using DeepEval:
 
 ```python
 import pytest
-from deepeval.metrics.factual_consistency import FactualConsistencyMetric
+from deepeval.metrics import HallucinationMetric
 from deepeval.test_case import LLMTestCase
 from deepeval.evaluator import assert_test
 
@@ -77,9 +77,9 @@ def test_case():
 
     # Replace this with the actual output from your LLM application
     actual_output = "We offer a 30-day full refund at no extra costs."
-    factual_consistency_metric = FactualConsistencyMetric(minimum_score=0.7)
+    hallucination_metric = HallucinationMetric(minimum_score=0.7)
     test_case = LLMTestCase(input=input, actual_output=actual_output, context=context)
-    assert_test(test_case, [factual_consistency_metric])
+    assert_test(test_case, [hallucination_metric])
 ```
 
 Run `test_chatbot.py` in the CLI:
@@ -91,10 +91,10 @@ deepeval test run test_chatbot.py
 **Your test should have passed ✅** Let's breakdown what happened.
 
 - The variable `input` mimics user input, and `actual_output` is a placeholder for your chatbot's intended output based on this query.
-- The variable `context` contains the relevant information from your knowledge base, and `FactualConsistencyMetric(minimum_score=0.7)` is an out-of-the-box metric provided by DeepEval. It helps you evaluate the factual accuracy of your chatbot's output based on the provided context.
+- The variable `context` contains the relevant information from your knowledge base, and `HallucinationMetric(minimum_score=0.7)` is an out-of-the-box metric provided by DeepEval. It helps you evaluate the factual accuracy of your chatbot's output based on the provided context.
 - The metric score ranges from 0 - 1. The `minimum_score=0.7` threshold ultimately determines whether your test has passed or not.
 
-[Read our documentation](https://docs.confident-ai.com) for more information on how to use additional metrics, create your own custom metrics, and tutorials on how to integrate with other tools like LangChain and LlamaIndex.
+[Read our documentation](https://docs.confident-ai.com/docs/getting-started) for more information on how to use additional metrics, create your own custom metrics, and tutorials on how to integrate with other tools like LangChain and LlamaIndex.
 
 <br />
 

diff --git a/deepeval/_version.py b/deepeval/_version.py
@@ -1 +1 @@
-__version__: str = "0.20.19"
+__version__: str = "0.20.22"
diff --git a/deepeval/api.py b/deepeval/api.py
@@ -6,6 +6,7 @@
 from requests.adapters import HTTPAdapter, Response, Retry
 from deepeval.constants import API_KEY_ENV
 from deepeval.key_handler import KEY_FILE_HANDLER
+from enum import Enum
 
 API_BASE_URL = "https://app.confident-ai.com/api"
 
@@ -16,6 +17,11 @@
 HTTP_RETRY_ALLOWED_METHODS = frozenset({"GET", "POST", "DELETE"})
 
 
+class Endpoints(Enum):
+    CREATE_DATASET_ENDPOINT = "/v1/dataset"
+    CREATE_TEST_RUN_ENDPOINT = "/v1/test-run"
+
+
 class Api:
     """Internal Api reference for handling http operations"""
 
@@ -141,7 +147,6 @@ def _api_request(
             proxies=self.proxies,
             cert=self.cert,
         )
-
         json = None
         if res.status_code == 200:
             try:

diff --git a/deepeval/chat_completion/retry.py b/deepeval/chat_completion/retry.py
@@ -1,23 +1,15 @@
 from typing import Callable, Any
-import openai
 import time
-import os
-import sys
 
 
 def call_openai_with_retry(
     callable: Callable[[], Any], max_retries: int = 2
 ) -> Any:
-    if not openai.api_key:
-        raise ValueError(
-            "OpenAI API key is not set. Please ensure it's set in your environment variables or passed explicitly."
-        )
-
     for _ in range(max_retries):
         try:
             response = callable()
             return response
-        except openai.error.OpenAIError as e:
+        except Exception as e:
             print(f"An error occurred: {e}. Retrying...")
             time.sleep(2)
             continue

diff --git a/deepeval/cli/examples.py b/deepeval/cli/examples.py
diff --git a/deepeval/cli/main.py b/deepeval/cli/main.py
@@ -40,11 +40,10 @@ def login(
                 print("API Key cannot be empty. Please try again.\n")
     KEY_FILE_HANDLER.write_api_key(api_key)
     client = Api(api_key=api_key)
-    print("Success! :raising_hands:")
+    print("Congratulations! Login successful :raising_hands: ")
     print(
-        "If you are new to DeepEval, try generate a sample test: [bold]deepeval test generate --output-file test_sample.py[/bold]"
+        "If you are new to DeepEval, follow our quickstart tutorial here: [bold][link=https://docs.confident-ai.com/docs/getting-started]https://docs.confident-ai.com/docs/getting-started[/link][/bold]"
     )
-    print("Run a sample test: [bold]deepeval test run test_sample.py[/bold]")
 
 
 if __name__ == "__main__":

diff --git a/deepeval/cli/test.py b/deepeval/cli/test.py
@@ -81,17 +81,3 @@ def run(
 
     test_run_manager.wrap_up_test_run()
     return retcode
-
-
-@app.command()
-def generate(output_file: str = "test_sample.py"):
-    with open(
-        os.path.join(os.path.dirname(__file__), "../test_quickstart.py"),
-        "r",
-    ) as f_in:
-        with open(output_file, "w") as f_out:
-            f_out.write(f_in.read())
-    print(f"✨ Done! Now run: [bold]deepeval test run {output_file}[/bold]")
-    print(
-        "You can generate more tests in the future in our documentation at https://docs.confident-ai.com/docs"
-    )
diff --git a/deepeval/dataset/__init__.py b/deepeval/dataset/__init__.py
@@ -0,0 +1 @@
+from .dataset import EvaluationDataset
diff --git a/deepeval/dataset/api.py b/deepeval/dataset/api.py
@@ -0,0 +1,18 @@
+from pydantic import BaseModel, Field
+from typing import Optional, List
+
+
+class Golden(BaseModel):
+    input: str
+    actual_output: Optional[str] = Field(None, alias="actualOutput")
+    expected_output: Optional[str] = Field(None, alias="expectedOutput")
+    context: Optional[list] = Field(None)
+
+
+class APIDataset(BaseModel):
+    alias: str
+    goldens: Optional[List[Golden]] = Field(default=None)
+
+
+class CreateDatasetHttpResponse(BaseModel):
+    link: str
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		__version__: str = "0.20.19"
		__version__: str = "0.20.22"