From f7860fff94f6008b49993f478f055998241e63cd Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Tue, 26 Dec 2023 00:46:45 +0800
Subject: [PATCH 01/46] Wrap up explanable metrics

---
 deepeval/chat_completion/retry.py        | 60 ++++++++++++++------
 deepeval/metrics/answer_relevancy.py     |  7 ++-
 deepeval/metrics/contextual_precision.py |  7 ++-
 deepeval/metrics/contextual_recall.py    |  7 ++-
 deepeval/metrics/contextual_relevancy.py |  7 ++-
 deepeval/metrics/faithfulness.py         |  7 ++-
 deepeval/models/gpt_model.py             |  5 +-
 deepeval/templates.py                    |  1 +
 deepeval/test_run/test_run.py            | 12 ++--
 tests/test_answer_relevancy.py           |  2 +-
 tests/test_contextual_precision.py       | 71 ++++++++++++++++++++++++
 11 files changed, 156 insertions(+), 30 deletions(-)
 create mode 100644 tests/test_contextual_precision.py

diff --git a/deepeval/chat_completion/retry.py b/deepeval/chat_completion/retry.py
index 501bd5aa0..1df7b2be0 100644
--- a/deepeval/chat_completion/retry.py
+++ b/deepeval/chat_completion/retry.py
@@ -1,19 +1,47 @@
-from typing import Callable, Any
+import random
 import time
+import openai
 
 
-def call_openai_with_retry(
-    callable: Callable[[], Any], max_retries: int = 2
-) -> Any:
-    for _ in range(max_retries):
-        try:
-            response = callable()
-            return response
-        except Exception as e:
-            print(f"An error occurred: {e}. Retrying...")
-            time.sleep(2)
-            continue
-
-    raise Exception(
-        "Max retries reached. Unable to make a successful API call to OpenAI."
-    )
+def retry_with_exponential_backoff(
+    func,
+    initial_delay: float = 1,
+    exponential_base: float = 2,
+    jitter: bool = True,
+    max_retries: int = 10,
+    errors: tuple = (openai.RateLimitError,),
+):
+    """Retry a function with exponential backoff."""
+
+    def wrapper(*args, **kwargs):
+        # Initialize variables
+        num_retries = 0
+        delay = initial_delay
+
+        # Loop until a successful response or max_retries is hit or an exception is raised
+        while True:
+            try:
+                return func(*args, **kwargs)
+
+            # Retry on specified errors
+            except errors as e:
+                # Increment retries
+                num_retries += 1
+
+                # Check if max retries has been reached
+                if num_retries > max_retries:
+                    raise Exception(
+                        f"Maximum number of retries ({max_retries}) exceeded."
+                    )
+
+                # Increment the delay
+                delay *= exponential_base * (1 + jitter * random.random())
+
+                # Sleep for the delay
+                time.sleep(delay)
+
+            # Raise exceptions for any errors not specified
+            except Exception as e:
+                raise e
+
+    return wrapper
diff --git a/deepeval/metrics/answer_relevancy.py b/deepeval/metrics/answer_relevancy.py
index 23154ca8d..8380976ff 100644
--- a/deepeval/metrics/answer_relevancy.py
+++ b/deepeval/metrics/answer_relevancy.py
@@ -19,9 +19,11 @@ def __init__(
         self,
         minimum_score: float = 0.5,
         model: Optional[str] = None,
+        include_reason: bool = True,
     ):
         self.minimum_score = minimum_score
         self.model = model
+        self.include_reason = include_reason
         self.n = 5
 
     def measure(self, test_case: LLMTestCase) -> float:
@@ -34,7 +36,7 @@ def measure(self, test_case: LLMTestCase) -> float:
                 "Input, actual output, or retrieval context cannot be None"
             )
         print(
-            "✨ 🍰 ✨ You're using DeepEval's newest Answer Relevancy Metric! This may take a minute."
+            "✨ 🍰 ✨ You're using DeepEval's latest Answer Relevancy Metric! This may take a minute..."
         )
         self.key_points: List[str] = self._generate_key_points(
             test_case.actual_output, "\n".join(test_case.retrieval_context)
@@ -63,6 +65,9 @@ def _generate_score(self):
     def _generate_reason(
         self, original_question: str, answer: str, score: float
     ) -> str:
+        if self.include_reason is False:
+            return None
+
         irrelevant_points = []
         for verdict in self.verdicts:
             if verdict.verdict.strip().lower() == "no":
diff --git a/deepeval/metrics/contextual_precision.py b/deepeval/metrics/contextual_precision.py
index 8a658b38e..622c6c1b1 100644
--- a/deepeval/metrics/contextual_precision.py
+++ b/deepeval/metrics/contextual_precision.py
@@ -20,8 +20,10 @@ def __init__(
         self,
         minimum_score: float = 0.5,
         model: Optional[str] = None,
+        include_reason: bool = True,
     ):
         self.minimum_score = minimum_score
+        self.include_reason = include_reason
         self.model = model
 
     def measure(self, test_case: LLMTestCase) -> float:
@@ -35,7 +37,7 @@ def measure(self, test_case: LLMTestCase) -> float:
                 "Input, actual output, expected output, or retrieval context cannot be None"
             )
         print(
-            "✨ 🍰 ✨ You're using DeepEval's newest Contextual Precision Metric! This may take a minute."
+            "✨ 🍰 ✨ You're using DeepEval's latest Contextual Precision Metric! This may take a minute..."
         )
         self.verdicts: List[
             ContextualPrecisionVerdict
@@ -55,6 +57,9 @@ def measure(self, test_case: LLMTestCase) -> float:
         return self.score
 
     def _generate_reason(self, input: str, score: float):
+        if self.include_reason is False:
+            return None
+
         retrieval_contexts_verdicts = [
             {
                 "verdict": verdict.verdict,
diff --git a/deepeval/metrics/contextual_recall.py b/deepeval/metrics/contextual_recall.py
index 3067a9513..bdf65878a 100644
--- a/deepeval/metrics/contextual_recall.py
+++ b/deepeval/metrics/contextual_recall.py
@@ -19,9 +19,11 @@ def __init__(
         self,
         minimum_score: float = 0.5,
         model: Optional[str] = None,
+        include_reason: bool = True,
     ):
         self.minimum_score = minimum_score
         self.model = model
+        self.include_reason = include_reason
         self.n = 5
 
     def measure(self, test_case: LLMTestCase) -> float:
@@ -35,7 +37,7 @@ def measure(self, test_case: LLMTestCase) -> float:
                 "Input, actual output, expected output, or retrieval context cannot be None"
             )
         print(
-            "✨ 🍰 ✨ You're using DeepEval's newest Contextual Recall Metric! This may take a minute."
+            "✨ 🍰 ✨ You're using DeepEval's latest Contextual Recall Metric! This may take a minute..."
         )
         self.verdicts: List[ContextualRecallVerdict] = self._generate_verdicts(
             test_case.expected_output, test_case.retrieval_context
@@ -52,6 +54,9 @@ def measure(self, test_case: LLMTestCase) -> float:
         return self.score
 
     def _generate_reason(self, expected_output: str, score: float):
+        if self.include_reason is False:
+            return None
+
         supportive_reasons = []
         unsupportive_reasons = []
         for verdict in self.verdicts:
diff --git a/deepeval/metrics/contextual_relevancy.py b/deepeval/metrics/contextual_relevancy.py
index f6806cfe8..73ccbfc43 100644
--- a/deepeval/metrics/contextual_relevancy.py
+++ b/deepeval/metrics/contextual_relevancy.py
@@ -20,9 +20,11 @@ def __init__(
         self,
         minimum_score: float = 0.5,
         model: Optional[str] = "gpt-4",
+        include_reason: bool = True,
     ):
         self.minimum_score = minimum_score
         self.model = model
+        self.include_reason = include_reason
 
     def measure(self, test_case: LLMTestCase) -> float:
         if (
@@ -34,7 +36,7 @@ def measure(self, test_case: LLMTestCase) -> float:
                 "Input, actual output, or retrieval context cannot be None"
             )
         print(
-            "✨ 🍰 ✨ You're using DeepEval's newest Contextual Relevancy Metric! This may take a minute."
+            "✨ 🍰 ✨ You're using DeepEval's latest Contextual Relevancy Metric! This may take a minute..."
         )
         self.verdicts_list: List[
             List[ContextualRelevancyVerdict]
@@ -53,6 +55,9 @@ def measure(self, test_case: LLMTestCase) -> float:
         return self.score
 
     def _generate_reason(self, input: str, score: float):
+        if self.include_reason is False:
+            return None
+
         irrelevant_sentences = []
         for index, verdicts in enumerate(self.verdicts_list):
             for verdict in verdicts:
diff --git a/deepeval/metrics/faithfulness.py b/deepeval/metrics/faithfulness.py
index 7f54a7458..589b95f7b 100644
--- a/deepeval/metrics/faithfulness.py
+++ b/deepeval/metrics/faithfulness.py
@@ -21,12 +21,14 @@ def __init__(
         self,
         minimum_score: float = 0.5,
         model: Optional[str] = None,
+        include_reason: bool = True,
     ):
         self.minimum_score = minimum_score
         # Don't set self.chat_model when using threading
         self.model = model
         self.truths_list = None
         self.verdicts_list = None
+        self.include_reason = include_reason
 
     def measure(self, test_case: LLMTestCase):
         if (
@@ -38,7 +40,7 @@ def measure(self, test_case: LLMTestCase):
                 "Input, actual output, or retrieval context cannot be None"
             )
         print(
-            "✨ 🍰 ✨ You're using DeepEval's newest Faithfulness Metric! This may take a minute."
+            "✨ 🍰 ✨ You're using DeepEval's latest Faithfulness Metric! This may take a minute..."
         )
         self.truths_list: List[List[str]] = self._generate_truths_list(
             test_case.retrieval_context
@@ -66,6 +68,9 @@ def _generate_score(self):
         return faithful_count / total_verdicts
 
     def _generate_reason(self, score: float):
+        if self.include_reason is False:
+            return None
+
         contradiction_reasons = []
         for verdicts in self.verdicts_list:
             for verdict in verdicts:
diff --git a/deepeval/models/gpt_model.py b/deepeval/models/gpt_model.py
index 1a8bdbda0..6ca7cda8a 100644
--- a/deepeval/models/gpt_model.py
+++ b/deepeval/models/gpt_model.py
@@ -4,7 +4,7 @@
 from langchain.chat_models import ChatOpenAI, AzureChatOpenAI
 from deepeval.key_handler import KeyValues, KEY_FILE_HANDLER
 from deepeval.models.base import DeepEvalBaseModel
-from deepeval.chat_completion.retry import call_openai_with_retry
+from deepeval.chat_completion.retry import retry_with_exponential_backoff
 
 valid_gpt_models = [
     "gpt-4-1106-preview",
@@ -74,9 +74,10 @@ def load_model(self):
             model_name=self.model_name, model_kwargs=self.model_kwargs
         )
 
+    @retry_with_exponential_backoff
     def _call(self, prompt: str):
         chat_model = self.load_model()
-        return call_openai_with_retry(lambda: chat_model.invoke(prompt))
+        return chat_model.invoke(prompt)
 
     def should_use_azure_openai(self):
         value = KEY_FILE_HANDLER.fetch_data(KeyValues.USE_AZURE_OPENAI)
diff --git a/deepeval/templates.py b/deepeval/templates.py
index d7e53ff78..0437a84c0 100644
--- a/deepeval/templates.py
+++ b/deepeval/templates.py
@@ -408,6 +408,7 @@ def generate_reason(input, verdicts, score):
 
 **
 IMPORTANT: DO NOT mention 'verdict' in your reason, but instead phrase it as irrelevant nodes. The term 'verdict' are just here for you to understand the broader scope of things.
+Also DO NOT mention there are `reason` fields in the retrieval contexts you are presented with, instead just use the information in the `reason` field.
 In your reason, you MUST USE the `reason`, QUOTES in the 'reason', and the node RANK (starting from 1, eg. first node) to explain why the 'no' verdicts should be ranked lower than the 'yes' verdicts.
 When addressing nodes, make it explicit that it is nodes in retrieval context.
 If the score is 1, keep it short and say something positive with an upbeat tone (but don't overdo it otherwise it gets annoying).
diff --git a/deepeval/test_run/test_run.py b/deepeval/test_run/test_run.py
index f28f61a23..4d1f3d1e0 100644
--- a/deepeval/test_run/test_run.py
+++ b/deepeval/test_run/test_run.py
@@ -201,11 +201,11 @@ def clear_test_run(self):
 
     def display_results_table(self, test_run: TestRun):
         table = Table(title="Test Results")
-        table.add_column("Test case", justify="right")
-        table.add_column("Metric", justify="right")
-        table.add_column("Score", justify="right")
-        table.add_column("Status", justify="right")
-        table.add_column("Overall Success Rate", justify="right")
+        table.add_column("Test case", justify="left")
+        table.add_column("Metric", justify="left")
+        table.add_column("Score", justify="left")
+        table.add_column("Status", justify="left")
+        table.add_column("Overall Success Rate", justify="left")
 
         for index, test_case in enumerate(test_run.test_cases):
             pass_count = 0
@@ -237,7 +237,7 @@ def display_results_table(self, test_run: TestRun):
                 table.add_row(
                     "",
                     str(metric_metadata.metric),
-                    f"{round(metric_metadata.score,2)} (threshold={metric_metadata.minimum_score})",
+                    f"{round(metric_metadata.score,2)} (threshold={metric_metadata.minimum_score}, reason={metric_metadata.reason})",
                     status,
                     "",
                 )
diff --git a/tests/test_answer_relevancy.py b/tests/test_answer_relevancy.py
index 8d828cc82..354d82942 100644
--- a/tests/test_answer_relevancy.py
+++ b/tests/test_answer_relevancy.py
@@ -45,7 +45,7 @@
 """
 
 
-@pytest.mark.skip(reason="openai is expensive")
+# @pytest.mark.skip(reason="openai is expensive")
 def test_answer_relevancy():
     metric = AnswerRelevancyMetric(minimum_score=0.5)
     test_case = LLMTestCase(
diff --git a/tests/test_contextual_precision.py b/tests/test_contextual_precision.py
new file mode 100644
index 000000000..bff8012dc
--- /dev/null
+++ b/tests/test_contextual_precision.py
@@ -0,0 +1,71 @@
+import pytest
+from deepeval.test_case import LLMTestCase
+from deepeval.metrics import ContextualPrecisionMetric
+from deepeval import assert_test
+
+question = "What are the primary benefits of meditation?"
+answer = """
+Meditation offers a rich tapestry of benefits that touch upon various aspects of well-being. On a mental level, 
+it greatly reduces stress and anxiety, fostering enhanced emotional health. This translates to better emotional 
+regulation and a heightened sense of overall well-being. Interestingly, the practice of meditation has been around 
+for centuries, evolving through various cultures and traditions, which underscores its timeless relevance.
+
+Physically, it contributes to lowering blood pressure and alleviating chronic pain, which is pivotal for long-term health. 
+Improved sleep quality is another significant benefit, aiding in overall physical restoration. Cognitively, meditation is a 
+boon for enhancing attention span, improving memory, and slowing down age-related cognitive decline. Amidst these benefits, 
+meditation's role in cultural and historical contexts is a fascinating side note, though not directly related to its health benefits.
+
+Such a comprehensive set of advantages makes meditation a valuable practice for individuals seeking holistic improvement i
+n both mental and physical health, transcending its historical and cultural origins.
+"""
+
+one = """
+Meditation is an ancient practice, rooted in various cultural traditions, where individuals 
+engage in mental exercises like mindfulness or concentration to promote mental clarity, emotional 
+calmness, and physical relaxation. This practice can range from techniques focusing on breath, visual 
+imagery, to movement-based forms like yoga. The goal is to bring about a sense of peace and self-awareness, 
+enabling individuals to deal with everyday stress more effectively.
+"""
+
+two = """
+One of the key benefits of meditation is its impact on mental health. It's widely used as a tool to 
+reduce stress and anxiety. Meditation helps in managing emotions, leading to enhanced emotional health. 
+It can improve symptoms of anxiety and depression, fostering a general sense of well-being. Regular practice 
+is known to increase self-awareness, helping individuals understand their thoughts and emotions more clearly 
+and reduce negative reactions to challenging situations.
+"""
+
+three = """
+Meditation has shown positive effects on various aspects of physical health. It can lower blood pressure, 
+reduce chronic pain, and improve sleep. From a cognitive perspective, meditation can sharpen the mind, increase 
+attention span, and improve memory. It's particularly beneficial in slowing down age-related cognitive decline and 
+enhancing brain functions related to concentration and attention.
+"""
+
+four = """
+Understanding comets and asteroids is crucial in studying the solar system's formation 
+and evolution. Comets, which are remnants from the outer solar system, can provide 
+insights into its icy and volatile components. Asteroids, primarily remnants of the 
+early solar system's formation, offer clues about the materials that didn't form into 
+planets, mostly located in the asteroid belt.
+"""
+
+five = """
+The physical characteristics and orbital paths of comets and asteroids vary significantly. 
+Comets often have highly elliptical orbits, taking them close to the Sun and then far into 
+the outer solar system. Their icy composition leads to distinctive features like tails and 
+comas. Asteroids, conversely, have more circular orbits and lack these visible features, 
+being composed mostly of rock and metal.
+"""
+
+
+# @pytest.mark.skip(reason="openai is expensive")
+def test_contextual_precision():
+    metric = ContextualPrecisionMetric(minimum_score=0.5)
+    test_case = LLMTestCase(
+        input=question,
+        actual_output=answer,
+        expected_output=answer,
+        retrieval_context=[one, four, two, five, three],
+    )
+    assert_test(test_case, [metric])

From 860564d8d7acab3dc6462222cd2a0e3a4e537a73 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Tue, 26 Dec 2023 18:48:00 +0800
Subject: [PATCH 02/46] fix tests

---
 tests/test_answer_relevancy.py     | 2 +-
 tests/test_contextual_precision.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_answer_relevancy.py b/tests/test_answer_relevancy.py
index 354d82942..8d828cc82 100644
--- a/tests/test_answer_relevancy.py
+++ b/tests/test_answer_relevancy.py
@@ -45,7 +45,7 @@
 """
 
 
-# @pytest.mark.skip(reason="openai is expensive")
+@pytest.mark.skip(reason="openai is expensive")
 def test_answer_relevancy():
     metric = AnswerRelevancyMetric(minimum_score=0.5)
     test_case = LLMTestCase(
diff --git a/tests/test_contextual_precision.py b/tests/test_contextual_precision.py
index bff8012dc..6bd36a189 100644
--- a/tests/test_contextual_precision.py
+++ b/tests/test_contextual_precision.py
@@ -59,7 +59,7 @@
 """
 
 
-# @pytest.mark.skip(reason="openai is expensive")
+@pytest.mark.skip(reason="openai is expensive")
 def test_contextual_precision():
     metric = ContextualPrecisionMetric(minimum_score=0.5)
     test_case = LLMTestCase(

From 4ea05b17c3f859bfc29f393617b37d2dac73248c Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Tue, 26 Dec 2023 22:04:30 +0800
Subject: [PATCH 03/46] Optimized faithfulness reasoning

---
 deepeval/metrics/faithfulness.py | 13 ++++++-------
 deepeval/templates.py            | 26 ++++++++++++++++++--------
 tests/test_faithfulness.py       |  2 +-
 3 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/deepeval/metrics/faithfulness.py b/deepeval/metrics/faithfulness.py
index 589b95f7b..643ef2c40 100644
--- a/deepeval/metrics/faithfulness.py
+++ b/deepeval/metrics/faithfulness.py
@@ -12,7 +12,7 @@
 
 class FaithfulnessVerdict(BaseModel):
     verdict: str
-    reason: str
+    reason: str = Field(default=None)
     truth: str = Field(default=None)
 
 
@@ -26,8 +26,6 @@ def __init__(
         self.minimum_score = minimum_score
         # Don't set self.chat_model when using threading
         self.model = model
-        self.truths_list = None
-        self.verdicts_list = None
         self.include_reason = include_reason
 
     def measure(self, test_case: LLMTestCase):
@@ -71,14 +69,15 @@ def _generate_reason(self, score: float):
         if self.include_reason is False:
             return None
 
-        contradiction_reasons = []
-        for verdicts in self.verdicts_list:
+        contradictions = []
+        for index, verdicts in enumerate(self.verdicts_list):
             for verdict in verdicts:
                 if verdict.verdict.strip().lower() == "no":
-                    contradiction_reasons.append(verdict.reason)
+                    data = {"contradiction": verdict.reason, "rank": index + 1}
+                    contradictions.append(data)
 
         prompt: dict = FaithfulnessTemplate.generate_reason(
-            contradiction_reasons=contradiction_reasons,
+            contradictions=contradictions,
             score=format(score, ".2f"),
         )
 
diff --git a/deepeval/templates.py b/deepeval/templates.py
index 0437a84c0..b4ae5d584 100644
--- a/deepeval/templates.py
+++ b/deepeval/templates.py
@@ -73,7 +73,8 @@ def generate_truths(text):
     def generate_verdicts(truths, text):
         return f"""Based on a list of strings, called contexts, please generate a list of JSON objects to indicate whether the given 'actual output' agrees with EACH context. The JSON will have 2 fields: 'verdict' and 'reason'.
 The 'verdict' key should STRICTLY be either 'yes', 'no', or 'idk', and states whether the given text agrees with the context. 
-The 'reason' is the reason for the verdict. When the answer is 'no' or 'idk', try to provide a correction in the reason.
+The 'reason' is the reason for the verdict. When the answer is 'no' or 'idk', try to provide a correction in the reason. 
+You DON'T have to provide a reason if the answer is 'yes'.
 
 **
 IMPORTANT: Please make sure to only return in JSON format, with the 'verdicts' key as a list of JSON objects.
@@ -85,19 +86,20 @@ def generate_verdicts(truths, text):
     "verdicts": [
         {{
             "verdict": "yes",
-            "reason": "The context states that Einstein won the Nobel Prize for his discovery of the photoelectric effect."
+            "reason": "The node in the retrieval context states that Einstein won the Nobel Prize for his discovery of the photoelectric effect."
         }},
         {{
             "verdict": "no",
-            "reason": "The context states that Einstein won the Nobel Prize in 1968, not 1969."
+            "reason": "The node in the retrieval context states that Einstein won the Nobel Prize in 1968, not 1969."
         }}
     ]  
 }}
 
 You should NOT incorporate any prior knowledge you have and take each context at face value. Since you are going to generate a verdict for each context, the number of 'verdicts' SHOULD BE STRICTLY EQUAL to that of contexts.
+You DON'T have to provide a reason if the answer is 'yes'.
 **
 
-Contexts:
+Retrieval Contexts:
 {truths}
 
 Actual Output:
@@ -107,19 +109,27 @@ def generate_verdicts(truths, text):
 """
 
     @staticmethod
-    def generate_reason(score, contradiction_reasons):
-        return f"""Below is a list of Contradictions. It explains why the 'actual output' does not align with the 'retrieval context'.
-Given the faithfulness score, which is a 0-1 score indicating how faithful the `actual output` is the context (higher the better), concisely summarize the contradictions to justify the score. If there are no contradictions, just say something positive with an upbeat encouraging tone (but don't overdo it otherwise it gets annoying).
+    def generate_reason(score, contradictions):
+        return f"""Below is a list of Contradictions. It is a list of JSON with the `contradiction` and `rank` key.
+The `contradiction` explains why the 'actual output' does not align with a certain node in the 'retrieval context'. Contradictions happen in the 'actual output', NOT the 'retrieval context'.
+The `rank` tells you which node in the 'retrieval context' the actual output contradicted with.
+Given the faithfulness score, which is a 0-1 score indicating how faithful the `actual output` is to the retrieval context (higher the better), concisely summarize the contradictions to justify the score. 
 
 Faithfulness Score:
 {score}
 
 Contradictions:
-{contradiction_reasons}
+{contradictions}
 
 Example:
 The score is <faithfulness_score> because <your_reason>.
 
+**
+IMPORTANT: 
+If there are no contradictions, just say something positive with an upbeat encouraging tone (but don't overdo it otherwise it gets annoying).
+Your reason MUST use information in `contradiction` and the node RANK (eg., first node of the retrieval context) in your reason.
+**
+
 Reason:
 """
 
diff --git a/tests/test_faithfulness.py b/tests/test_faithfulness.py
index 7b3a5c27c..d3595be69 100644
--- a/tests/test_faithfulness.py
+++ b/tests/test_faithfulness.py
@@ -38,7 +38,7 @@
 """
 
 
-@pytest.mark.skip(reason="openai is expensive")
+# @pytest.mark.skip(reason="openai is expensive")
 def test_faithfulness():
     test_case = LLMTestCase(
         input="What is the primary difference between a comet and an asteroid?",

From e79a90fccffebe6217707c1fe15dbe60417c4ca3 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Tue, 26 Dec 2023 22:04:50 +0800
Subject: [PATCH 04/46] .

---
 tests/test_faithfulness.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_faithfulness.py b/tests/test_faithfulness.py
index d3595be69..7b3a5c27c 100644
--- a/tests/test_faithfulness.py
+++ b/tests/test_faithfulness.py
@@ -38,7 +38,7 @@
 """
 
 
-# @pytest.mark.skip(reason="openai is expensive")
+@pytest.mark.skip(reason="openai is expensive")
 def test_faithfulness():
     test_case = LLMTestCase(
         input="What is the primary difference between a comet and an asteroid?",

From 9835fbe4deedbb518f62375b3ed27df7fa053bf0 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 27 Dec 2023 00:26:46 +0800
Subject: [PATCH 05/46] Delay transformers import

---
 deepeval/models/_summac_model.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/deepeval/models/_summac_model.py b/deepeval/models/_summac_model.py
index 6541f103f..b7321e250 100644
--- a/deepeval/models/_summac_model.py
+++ b/deepeval/models/_summac_model.py
@@ -3,7 +3,6 @@
 # Source: https://github.com/tingofurro/summac
 ###############################################
 
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import nltk
 import numpy as np
 import torch
@@ -131,6 +130,15 @@ def load_nli(self):
             )
 
         else:
+            try:
+                from transformers import (
+                    AutoTokenizer,
+                    AutoModelForSequenceClassification,
+                )
+            except ModuleNotFoundError:
+                print(
+                    "transformers library is not installed. Run 'pip install transformers'"
+                )
             self.tokenizer = AutoTokenizer.from_pretrained(self.model_card)
             self.model = AutoModelForSequenceClassification.from_pretrained(
                 self.model_card

From 448431fd625a3a75195fefb9458776545c665b05 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <143328635+penguine-ip@users.noreply.github.com>
Date: Wed, 27 Dec 2023 01:31:48 +0800
Subject: [PATCH 06/46] Update README.md

---
 README.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index caaab076b..cf3851ef2 100644
--- a/README.md
+++ b/README.md
@@ -26,16 +26,20 @@ Whether your application is implemented via RAG or fine-tuning, LangChain or Lla
 
 # Features
 
-- Large variety of ready-to-use evaluation metrics powered by LLMs, statistical methods, or NLP models that runs **locally on your machine**:
+- Large variety of ready-to-use evaluation metrics powered by LLMs (all with explanations), statistical methods, or NLP models that runs **locally on your machine**:
   - Hallucination
+  - Summarization
   - Answer Relevancy
+  - Faithfulness
+  - Contextual Recall
+  - Contextual Precision
   - RAGAS
   - G-Eval
   - Toxicity
   - Bias
   - etc.
 - Easily create your own custom metrics that are automatically integrated with DeepEval's ecosystem by inheriting DeepEval's base metric class.
-- Evaluate your entire dataset in bulk using fewer than 20 lines of Python code **in parallel**.
+- Evaluate your entire dataset in bulk in under 20 lines of Python code **in parallel**.
 - [Automatically integrated with Confident AI](https://app.confident-ai.com) for continous evaluation throughout the lifetime of your LLM (app):
   - log evaluation results and analyze metrics pass / fails
   - compare and pick the optimal hyperparameters (eg. prompt templates, chunk size, models used, etc.) based on evaluation results

From 0c75412ff981fa99dff4be908c9b9d39ae2fcf9d Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 27 Dec 2023 01:39:06 +0800
Subject: [PATCH 07/46] Fix azure commands

---
 deepeval/cli/azure_openai.py | 66 ------------------------------------
 deepeval/cli/main.py         | 57 +++++++++++++++++++++++++++++--
 2 files changed, 55 insertions(+), 68 deletions(-)
 delete mode 100644 deepeval/cli/azure_openai.py

diff --git a/deepeval/cli/azure_openai.py b/deepeval/cli/azure_openai.py
deleted file mode 100644
index 70b20c8e8..000000000
--- a/deepeval/cli/azure_openai.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import os
-import typer
-from typing import Optional
-
-try:
-    from rich import print
-except Exception as e:
-    pass
-from deepeval.key_handler import KEY_FILE_HANDLER, KeyValues
-
-app = typer.Typer(name="azure-openai")
-
-
-@app.command(name="set")
-def set_azure_openai_env(
-    azure_openai_api_key: str = typer.Option(
-        ..., "--openai-api-key", help="Azure OpenAI API key"
-    ),
-    azure_openai_endpoint: str = typer.Option(
-        ..., "--openai-endpoint", help="Azure OpenAI endpoint"
-    ),
-    openai_api_version: str = typer.Option(
-        ..., "--openai-api-version", help="OpenAI API version"
-    ),
-    azure_deployment_name: str = typer.Option(
-        ..., "--deployment-name", help="Azure deployment name"
-    ),
-    azure_model_version: Optional[str] = typer.Option(
-        None, "--model-version", help="Azure model version (optional)"
-    ),
-):
-    KEY_FILE_HANDLER.write_key(
-        KeyValues.AZURE_OPENAI_API_KEY, azure_openai_api_key
-    )
-    KEY_FILE_HANDLER.write_key(
-        KeyValues.AZURE_OPENAI_ENDPOINT, azure_openai_endpoint
-    )
-    KEY_FILE_HANDLER.write_key(KeyValues.OPENAI_API_VERSION, openai_api_version)
-    KEY_FILE_HANDLER.write_key(
-        KeyValues.AZURE_DEPLOYMENT_NAME, azure_deployment_name
-    )
-
-    if azure_model_version is not None:
-        KEY_FILE_HANDLER.write_key(
-            KeyValues.AZURE_MODEL_VERSION, azure_model_version
-        )
-
-    KEY_FILE_HANDLER.write_key(KeyValues.USE_AZURE_OPENAI, "YES")
-
-    print(
-        ":raising_hands: Congratulations! You're now using Azure OpenAI for all evals that require an LLM."
-    )
-
-
-@app.command(name="unset")
-def unset_azure_openai_env():
-    KEY_FILE_HANDLER.remove_key(KeyValues.AZURE_OPENAI_API_KEY)
-    KEY_FILE_HANDLER.remove_key(KeyValues.AZURE_OPENAI_ENDPOINT)
-    KEY_FILE_HANDLER.remove_key(KeyValues.OPENAI_API_VERSION)
-    KEY_FILE_HANDLER.remove_key(KeyValues.AZURE_DEPLOYMENT_NAME)
-    KEY_FILE_HANDLER.remove_key(KeyValues.AZURE_MODEL_VERSION)
-    KEY_FILE_HANDLER.remove_key(KeyValues.USE_AZURE_OPENAI)
-
-    print(
-        ":raising_hands: Congratulations! You're now using regular OpenAI for all evals that require an LLM."
-    )
diff --git a/deepeval/cli/main.py b/deepeval/cli/main.py
index b68f8af07..78596596b 100644
--- a/deepeval/cli/main.py
+++ b/deepeval/cli/main.py
@@ -8,14 +8,12 @@
     pass
 from deepeval.key_handler import KEY_FILE_HANDLER, KeyValues
 from deepeval.cli.test import app as test_app
-from deepeval.cli.azure_openai import app as azure_openai_app
 from typing import Optional
 import webbrowser
 
 app = typer.Typer(name="deepeval")
 
 app.add_typer(test_app, name="test")
-app.add_typer(azure_openai_app, name="azure-openai")
 
 
 @app.command()
@@ -58,5 +56,60 @@ def login(
     )
 
 
+@app.command(name="set-azure-openai")
+def set_azure_openai_env(
+    azure_openai_api_key: str = typer.Option(
+        ..., "--openai-api-key", help="Azure OpenAI API key"
+    ),
+    azure_openai_endpoint: str = typer.Option(
+        ..., "--openai-endpoint", help="Azure OpenAI endpoint"
+    ),
+    openai_api_version: str = typer.Option(
+        ..., "--openai-api-version", help="OpenAI API version"
+    ),
+    azure_deployment_name: str = typer.Option(
+        ..., "--deployment-name", help="Azure deployment name"
+    ),
+    azure_model_version: Optional[str] = typer.Option(
+        None, "--model-version", help="Azure model version (optional)"
+    ),
+):
+    KEY_FILE_HANDLER.write_key(
+        KeyValues.AZURE_OPENAI_API_KEY, azure_openai_api_key
+    )
+    KEY_FILE_HANDLER.write_key(
+        KeyValues.AZURE_OPENAI_ENDPOINT, azure_openai_endpoint
+    )
+    KEY_FILE_HANDLER.write_key(KeyValues.OPENAI_API_VERSION, openai_api_version)
+    KEY_FILE_HANDLER.write_key(
+        KeyValues.AZURE_DEPLOYMENT_NAME, azure_deployment_name
+    )
+
+    if azure_model_version is not None:
+        KEY_FILE_HANDLER.write_key(
+            KeyValues.AZURE_MODEL_VERSION, azure_model_version
+        )
+
+    KEY_FILE_HANDLER.write_key(KeyValues.USE_AZURE_OPENAI, "YES")
+
+    print(
+        ":raising_hands: Congratulations! You're now using Azure OpenAI for all evals that require an LLM."
+    )
+
+
+@app.command(name="unset-azure-openai")
+def unset_azure_openai_env():
+    KEY_FILE_HANDLER.remove_key(KeyValues.AZURE_OPENAI_API_KEY)
+    KEY_FILE_HANDLER.remove_key(KeyValues.AZURE_OPENAI_ENDPOINT)
+    KEY_FILE_HANDLER.remove_key(KeyValues.OPENAI_API_VERSION)
+    KEY_FILE_HANDLER.remove_key(KeyValues.AZURE_DEPLOYMENT_NAME)
+    KEY_FILE_HANDLER.remove_key(KeyValues.AZURE_MODEL_VERSION)
+    KEY_FILE_HANDLER.remove_key(KeyValues.USE_AZURE_OPENAI)
+
+    print(
+        ":raising_hands: Congratulations! You're now using regular OpenAI for all evals that require an LLM."
+    )
+
+
 if __name__ == "__main__":
     app()

From 36bd7a96afaf57dd069b1d3cc7bb2ba2801ac78f Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 27 Dec 2023 01:39:55 +0800
Subject: [PATCH 08/46] Updated azure docs

---
 docs/docs/metrics-introduction.mdx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/docs/metrics-introduction.mdx b/docs/docs/metrics-introduction.mdx
index 562a6c27e..34feda189 100644
--- a/docs/docs/metrics-introduction.mdx
+++ b/docs/docs/metrics-introduction.mdx
@@ -93,7 +93,7 @@ For more details on how a metric evaluates a test case, refer to the [test cases
 `deepeval` also allows you to use Azure OpenAI for metrics that are evaluated using an LLM. Run the following command in the CLI to configure your `deepeval` enviornment to use Azure OpenAI for **all** LLM-based metrics.
 
 ```console
-deepeval azure-openai set --openai-endpoint=<endpoint> \
+deepeval set-azure-openai --openai-endpoint=<endpoint> \
     --openai-api-key=<api_key> \
     --deployment-name=<deployment_name> \
     --openai-api-version=<openai_api_version> \
@@ -103,5 +103,5 @@ deepeval azure-openai set --openai-endpoint=<endpoint> \
 Note that the `model-version` is **optional**. If you ever wish to stop using Azure OpenAI and move back to regular OpenAI, simply run:
 
 ```console
-deepeval azure-openai unset
+deepeval unset-azure-openai
 ```

From ab16dc36f0f2679f11bf775fc9e18926b6b33052 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 27 Dec 2023 01:44:06 +0800
Subject: [PATCH 09/46] new release

---
 deepeval/_version.py | 2 +-
 pyproject.toml       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/deepeval/_version.py b/deepeval/_version.py
index 448d0ed8b..f81ac3edf 100644
--- a/deepeval/_version.py
+++ b/deepeval/_version.py
@@ -1 +1 @@
-__version__: str = "0.20.42"
+__version__: str = "0.20.43"
diff --git a/pyproject.toml b/pyproject.toml
index e521dc652..3888e2aab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "deepeval"
-version = "0.20.42"
+version = "0.20.43"
 description = "The Evaluation Framework for LLMs"
 authors = ["Jeffrey Ip <jeffreyip@confident-ai.com>"]
 license = "Apache-2.0"

From 86853ff43092fb0de13d49f4cd4eeb18d0b2c5a7 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 27 Dec 2023 02:55:01 +0800
Subject: [PATCH 10/46] Updated docs

---
 docs/docs/metrics-answer-relevancy.mdx     |  9 ++-
 docs/docs/metrics-contextual-precision.mdx |  9 ++-
 docs/docs/metrics-contextual-recall.mdx    |  9 ++-
 docs/docs/metrics-contextual-relevancy.mdx |  9 ++-
 docs/docs/metrics-faithfulness.mdx         |  9 ++-
 docs/docs/metrics-introduction.mdx         | 90 +++++++++++++++-------
 6 files changed, 96 insertions(+), 39 deletions(-)

diff --git a/docs/docs/metrics-answer-relevancy.mdx b/docs/docs/metrics-answer-relevancy.mdx
index 7be738321..193f72822 100644
--- a/docs/docs/metrics-answer-relevancy.mdx
+++ b/docs/docs/metrics-answer-relevancy.mdx
@@ -4,7 +4,7 @@ title: Answer Relevancy
 sidebar_label: Answer Relevancy
 ---
 
-Answer Relevancy measures how relevant the `actual_output` of your LLM application is compared to the provided `input`. You don't have to supply `context` or `expected_output` when creating an `LLMTestCase` if you're just evaluating answer relevancy.
+The answer relevancy metric measures the quality of your RAG pipeline's generator by evaluating how relevant the `actual_output` of your LLM application is compared to the provided `input`. `deepeval`'s answer relevancy metric is a self-explaining LLM-Eval, meaning it outputs a reason for its metric score.
 
 ## Required Parameters
 
@@ -27,7 +27,11 @@ actual_output = "We offer a 30-day full refund at no extra cost."
 # Replace this with the actual retrieved context from your RAG pipeline
 retrieval_context = ["All customers are eligible for a 30 day full refund at no extra cost."]
 
-metric = AnswerRelevancyMetric(minimum_score=0.7)
+metric = AnswerRelevancyMetric(
+    minimum_score=0.7,
+    model="gpt-4",
+    include_reason=True
+)
 test_case = LLMTestCase(
     input="What if these shoes don't fit?",
     actual_output=actual_output,
@@ -36,6 +40,7 @@ test_case = LLMTestCase(
 
 metric.measure(test_case)
 print(metric.score)
+print(metric.reason)
 
 # or evaluate test cases in bulk
 evaluate([test_case], [metric])
diff --git a/docs/docs/metrics-contextual-precision.mdx b/docs/docs/metrics-contextual-precision.mdx
index 78e1716b4..2b8bb4b2d 100644
--- a/docs/docs/metrics-contextual-precision.mdx
+++ b/docs/docs/metrics-contextual-precision.mdx
@@ -4,7 +4,7 @@ title: Contextual Precision
 sidebar_label: Contextual Precision
 ---
 
-The contextual precision metric determines whether more relevant retrieved contexts in your RAG pipeline are ranked higher than less relevant ones. It assesses your RAG pipeline's retriever and is calculated using `input` and `retrieval_context`.
+The contextual precision metric measures your RAG pipeline's retriever by evaluating whether nodes in your `retrieval_context` that are relevant to the given `input` are ranked higher than irrelevant ones. `deepeval`'s contextual precision metric is a self-explaining LLM-Eval, meaning it outputs a reason for its metric score.
 
 ## Required Parameters
 
@@ -27,7 +27,11 @@ actual_output = "We offer a 30-day full refund at no extra cost."
 # Replace this with the actual retrieved context from your RAG pipeline
 retrieval_context = ["All customers are eligible for a 30 day full refund at no extra cost."]
 
-metric = ContextualPrecisionMetric(minimum_score=0.7, model="gpt-3.5-turbo")
+metric = ContextualPrecisionMetric(
+    minimum_score=0.7,
+    model="gpt-4",
+    include_reason=True
+)
 test_case = LLMTestCase(
     input="What if these shoes don't fit?",
     actual_output=actual_output,
@@ -36,6 +40,7 @@ test_case = LLMTestCase(
 
 metric.measure(test_case)
 print(metric.score)
+print(metric.reason)
 
 # or evaluate test cases in bulk
 evaluate([test_case], [metric])
diff --git a/docs/docs/metrics-contextual-recall.mdx b/docs/docs/metrics-contextual-recall.mdx
index e94ba792a..c88f167e7 100644
--- a/docs/docs/metrics-contextual-recall.mdx
+++ b/docs/docs/metrics-contextual-recall.mdx
@@ -4,7 +4,7 @@ title: Contextual Recall
 sidebar_label: Contextual Recall
 ---
 
-The contextual recall metric determines the recall of the retrieved context using the annotated answer as a reference to evaluate the performance of your RAG pipeline's retriever. Calculated using `expected_output` and `retrieval_context`.
+The contextual recall metric measures the quality of your RAG pipeline's retriever by evaluating the extent of which the `retrieval_context` aligns with the `expected_output`. `deepeval`'s contextual recall metric is a self-explaining LLM-Eval, meaning it outputs a reason for its metric score.
 
 ## Required Parameters
 
@@ -31,7 +31,11 @@ expected_output = "You are eligible for a 30 day full refund at no extra cost."
 # Replace this with the actual retrieved context from your RAG pipeline
 retrieval_context = ["All customers are eligible for a 30 day full refund at no extra cost."]
 
-metric = ContextualRecallMetric(minimum_score=0.7, model="gpt-3.5-turbo")
+metric = ContextualRecallMetric(
+    minimum_score=0.7,
+    model="gpt-4",
+    include_reason=True
+)
 test_case = LLMTestCase(
     input="What if these shoes don't fit?",
     actual_output=actual_output,
@@ -41,6 +45,7 @@ test_case = LLMTestCase(
 
 metric.measure(test_case)
 print(metric.score)
+print(metric.reason)
 
 # or evaluate test cases in bulk
 evaluate([test_case], [metric])
diff --git a/docs/docs/metrics-contextual-relevancy.mdx b/docs/docs/metrics-contextual-relevancy.mdx
index 0582a4cb1..d4c8801f2 100644
--- a/docs/docs/metrics-contextual-relevancy.mdx
+++ b/docs/docs/metrics-contextual-relevancy.mdx
@@ -4,7 +4,7 @@ title: Contextual Relevancy
 sidebar_label: Contextual Relevancy
 ---
 
-The contextual relevancy metric assesses the relevance of the retrieved contexts to input, and penalizes redundant information. It evaluates the performance of your RAG pipeline's retriever and is calculated using `input` and `retrieval_context`.
+The contextual relevancy metric measures the quality of your RAG pipeline's retriever by evaluating the overall relevance of the information presented in your `retrieval_context` for a given `input`. `deepeval`'s contextual relevancy metric is a self-explaining LLM-Eval, meaning it outputs a reason for its metric score.
 
 ## Required Parameters
 
@@ -31,7 +31,11 @@ actual_output = "We offer a 30-day full refund at no extra cost."
 # Replace this with the actual retrieved context from your RAG pipeline
 retrieval_context = ["All customers are eligible for a 30 day full refund at no extra cost."]
 
-metric = ContextualRelevancyMetric(minimum_score=0.7, model="gpt-3.5-turbo")
+metric = ContextualRelevancyMetric(
+    minimum_score=0.7,
+    model="gpt-4",
+    include_reason=True
+)
 test_case = LLMTestCase(
     input="What if these shoes don't fit?",
     actual_output=actual_output,
@@ -40,6 +44,7 @@ test_case = LLMTestCase(
 
 metric.measure(test_case)
 print(metric.score)
+print(metric.reason)
 
 # or evaluate test cases in bulk
 evaluate([test_case], [metric])
diff --git a/docs/docs/metrics-faithfulness.mdx b/docs/docs/metrics-faithfulness.mdx
index 45cbb4ca0..af26dca52 100644
--- a/docs/docs/metrics-faithfulness.mdx
+++ b/docs/docs/metrics-faithfulness.mdx
@@ -4,7 +4,7 @@ title: Faithfulness
 sidebar_label: Faithfulness
 ---
 
-The faithfulness metric measures hallucination in a RAG pipeline to ensure output aligns with the retrieved context. It evaluates the quality of your RAG pipeline's generator and is calculated using `actual_output` and `retrieval_context`.
+The faithfulness metric measures the quality of your RAG pipeline's generator by evaluating whether the `actual_output` factually aligns with the contents of your `retrieval_context`. `deepeval`'s faithfulness metric is a self-explaining LLM-Eval, meaning it outputs a reason for its metric score.
 
 :::info
 Although similar to the `HallucinationMetric`, the faithfulness metric in `deepeval` is more concerned with hallucination in RAG pipelines, rather than the actual LLM itself.
@@ -31,7 +31,11 @@ actual_output = "We offer a 30-day full refund at no extra cost."
 # Replace this with the actual retrieved context from your RAG pipeline
 retrieval_context = ["All customers are eligible for a 30 day full refund at no extra cost."]
 
-metric = FaithfulnessMetric(minimum_score=0.5, model="gpt-3.5-turbo")
+metric = FaithfulnessMetric(
+    minimum_score=0.7,
+    model="gpt-4",
+    include_reason=True
+)
 test_case = LLMTestCase(
     input="What if these shoes don't fit?",
     actual_output=actual_output,
@@ -40,6 +44,7 @@ test_case = LLMTestCase(
 
 metric.measure(test_case)
 print(metric.score)
+print(metric.reason)
 
 # or evaluate test cases in bulk
 evaluate([test_case], [metric])
diff --git a/docs/docs/metrics-introduction.mdx b/docs/docs/metrics-introduction.mdx
index 34feda189..f2cb51ecb 100644
--- a/docs/docs/metrics-introduction.mdx
+++ b/docs/docs/metrics-introduction.mdx
@@ -6,31 +6,74 @@ sidebar_label: Introduction
 
 ## Quick Summary
 
-In `deepeval`, a metric serves as a standard of measurement for evaluating the performance of an LLM output based on a specific criteria of interest. Essentially, while the metric acts as the ruler, a test case represents the thing you're trying to measure. `deepeval` offers a range of default metrics for you to quickly get started with, which includes:
+In `deepeval`, a metric serves as a standard of measurement for evaluating the performance of an LLM output based on a specific criteria of interest. Essentially, while the metric acts as the ruler, a test case represents the thing you're trying to measure. `deepeval` offers a range of default metrics for you to quickly get started with, such as:
 
 - Hallucination
+- Summarization
+- Faithfulness
 - Answer Relevancy
+- Contextual Relevancy
+- Contextual Precision
+- Contextual Recall
 - Ragas
 - Toxicity
 - Bias
 
-`deepeval` also offers you a straightforward way to develop your own custom LLM-based evaluation metrics. This is noteworthy because all default metrics in `deepeval` are derived from traditional NLP models, not LLMs. All metrics are measured on a test case. Visit the [test cases section](evaluation-test-cases) to learn how to apply any metric on test cases for evaluation.
+`deepeval` also offers you a straightforward way to develop your own custom evaluation metrics. All metrics are measured on a test case. Visit the [test cases section](evaluation-test-cases) to learn how to apply any metric on test cases for evaluation.
 
 ## Types of Metrics
 
-A **_custom_** metric is a type of metric you can easily create by implementing abstract methods and properties of base classes provided by `deepeval`. They are extremely versitle and seamlessly integrate with Confident AI without requiring any additional setup. As you'll see later, a custom metric can either be an **_LLM evaluated_** or **_classic_** metric. A classic metric is a type of metric whose criteria isn't evaluated using an LLM.
+A **_custom_** metric is a type of metric you can easily create by implementing abstract methods and properties of base classes provided by `deepeval`. They are extremely versitle and seamlessly integrate with Confident AI without requiring any additional setup. As you'll see later, a custom metric can either be an **_LLM-Eval_** (LLM evaluated) or **_classic_** metric. A classic metric is a type of metric whose criteria isn't evaluated using an LLM.
 
-`deepeval` also offer **_default_** metrics. All default metrics offered by `deepeval` are classic metrics. This means all default metrics in `deepeval` does not use LLMs for evaluation. This is delibrate for two main reasons:
+`deepeval` also offer **_default_** metrics. Most default metrics offered by `deepeval` are LLM-Evals, which means they are evaluated using LLMs. This is delibrate because LLM-Evals are versitle in nature and better aligns with human expectations when compared to traditional model based approaches.
 
-- LLM evaluated metrics are versitle in nature and it's better if you create one using `deepeval`'s build-ins
-- Classic metrics are much harder to compute and requires extensive research
+`deepeval`'s LLM-Evals are a step up to other implementations because they:
+
+- are extra reliable as LLMs are only used for extremely specific tasks during evaluation to greatly reduce stochasticity and flakiness in scores.
+- provide a comprehensive reason for the scores computed.
 
 All of `deepeval`'s default metrics output a score between 0-1, and require a `minimum_score` argument to instantiate. A default metric is only successful if the evaluation score is equal to or greater than `minimum_score`.
 
 :::info
-All GPT models from OpenAI are available for metrics that use LLMs for evaluation. You can switch between models by providing a string corresponding to OpenAI's model names via the `model` argument.
+All GPT models from OpenAI are available for LLM-Evals (metrics that use LLMs for evaluation). You can switch between models by providing a string corresponding to OpenAI's model names via the optional `model` argument when instantiating an LLM-Eval.
 :::
 
+## Using OpenAI
+
+To use OpenAI for `deepeval`'s LLM-Evals (metrics evaluated using an LLM), supply your `OPENAI_API_KEY` in the CLI:
+
+```console
+export OPENAI_API_KEY=<your-openai-api-key>
+```
+
+Alternatively, if you're working in a notebook enviornment (Jupyter or Colab), set your `OPENAI_API_KEY` in a cell:
+
+```console
+ %env OPENAI_API_KEY=<your-openai-api-key>
+```
+
+:::note
+Please **do not include** quotation marks when setting your `OPENAI_API_KEY` if you're working in a notebook enviornment.
+:::
+
+## Using Azure OpenAI
+
+`deepeval` also allows you to use Azure OpenAI for metrics that are evaluated using an LLM. Run the following command in the CLI to configure your `deepeval` enviornment to use Azure OpenAI for **all** LLM-based metrics.
+
+```console
+deepeval set-azure-openai --openai-endpoint=<endpoint> \
+    --openai-api-key=<api_key> \
+    --deployment-name=<deployment_name> \
+    --openai-api-version=<openai_api_version> \
+    --model-version=<model_version>
+```
+
+Note that the `model-version` is **optional**. If you ever wish to stop using Azure OpenAI and move back to regular OpenAI, simply run:
+
+```console
+deepeval unset-azure-openai
+```
+
 ## Measuring a Metric
 
 All metrics in `deepeval`, including [custom metrics that you create](metrics-custom):
@@ -41,17 +84,23 @@ All metrics in `deepeval`, including [custom metrics that you create](metrics-cu
 - can be used to evaluate test cases or entire datasets, with or without Pytest.
 - has a `minimum_score` that acts as the threshold for success. `metric.is_successful()` is only true if `metric.score` >= `minimum_score`.
 
+In additional, most LLM-Evals in `deepeval` offers a reason for its score, which can be accessed via `metric.reason`.
+
 Here's a quick example.
 
+```console
+export OPENAI_API_KEY=<your-openai-api-key>
+```
+
 ```python
-from deepeval.metrics import HallucinationMetric
+from deepeval.metrics import AnswerRelevancyMetric
 from deepeval.test_case import LLMTestCase
 
 # Initialize a test case
 test_case = LLMTestCase(input="...", actual_output="...")
 
 # Initialize metric with minimum_score
-metric = HallucinationMetric(minimum_score=0.5)
+metric = AnswerRelevancyMetric(minimum_score=0.5)
 ```
 
 Using this metric, you can either evaluate a test case using `deepeval test run`:
@@ -60,7 +109,7 @@ Using this metric, you can either evaluate a test case using `deepeval test run`
 from deepeval import evaluate
 ...
 
-def test_hallucination():
+def test_answer_relevancy():
     assert_test(test_case, metric)
 ```
 
@@ -68,7 +117,7 @@ def test_hallucination():
 deepeval test run test_file.py
 ```
 
-The `evaluate` function:
+Or using the `evaluate` function:
 
 ```python
 from deepeval import assert_test
@@ -84,24 +133,7 @@ Or execute the metric directly and get its score:
 
 metric.measure(test_case)
 print(metric.score)
+print(metric.reason)
 ```
 
 For more details on how a metric evaluates a test case, refer to the [test cases section.](evaluation-test-cases#assert-test-cases)
-
-## Using Azure OpenAI
-
-`deepeval` also allows you to use Azure OpenAI for metrics that are evaluated using an LLM. Run the following command in the CLI to configure your `deepeval` enviornment to use Azure OpenAI for **all** LLM-based metrics.
-
-```console
-deepeval set-azure-openai --openai-endpoint=<endpoint> \
-    --openai-api-key=<api_key> \
-    --deployment-name=<deployment_name> \
-    --openai-api-version=<openai_api_version> \
-    --model-version=<model_version>
-```
-
-Note that the `model-version` is **optional**. If you ever wish to stop using Azure OpenAI and move back to regular OpenAI, simply run:
-
-```console
-deepeval unset-azure-openai
-```

From 8429e275ddaf4d6cecf39848c65ccd12e5cee7db Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 27 Dec 2023 03:05:02 +0800
Subject: [PATCH 11/46] Updated docs

---
 docs/docs/metrics-answer-relevancy.mdx     | 6 ++++++
 docs/docs/metrics-contextual-precision.mdx | 6 ++++++
 docs/docs/metrics-contextual-recall.mdx    | 6 ++++++
 docs/docs/metrics-contextual-relevancy.mdx | 6 ++++++
 docs/docs/metrics-faithfulness.mdx         | 6 ++++++
 docs/docs/metrics-judgemental.mdx          | 4 ++--
 docs/docs/metrics-ragas.mdx                | 8 ++++----
 7 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/docs/docs/metrics-answer-relevancy.mdx b/docs/docs/metrics-answer-relevancy.mdx
index 193f72822..9239f517a 100644
--- a/docs/docs/metrics-answer-relevancy.mdx
+++ b/docs/docs/metrics-answer-relevancy.mdx
@@ -45,3 +45,9 @@ print(metric.reason)
 # or evaluate test cases in bulk
 evaluate([test_case], [metric])
 ```
+
+You can also choose to fallback to Ragas' answer relevancy metric (which has a similar implemention). This however is not capable of generating a reason.
+
+```python
+from deepeval.metrics import RAGASAnswerRelevancyMetric
+```
diff --git a/docs/docs/metrics-contextual-precision.mdx b/docs/docs/metrics-contextual-precision.mdx
index 2b8bb4b2d..695ea5d93 100644
--- a/docs/docs/metrics-contextual-precision.mdx
+++ b/docs/docs/metrics-contextual-precision.mdx
@@ -45,3 +45,9 @@ print(metric.reason)
 # or evaluate test cases in bulk
 evaluate([test_case], [metric])
 ```
+
+You can also choose to fallback to Ragas' contextual precision metric (which has a similar implemention). This however is not capable of generating a reason.
+
+```python
+from deepeval.metrics import RAGASContextualPrecisionMetric
+```
diff --git a/docs/docs/metrics-contextual-recall.mdx b/docs/docs/metrics-contextual-recall.mdx
index c88f167e7..b6afe519d 100644
--- a/docs/docs/metrics-contextual-recall.mdx
+++ b/docs/docs/metrics-contextual-recall.mdx
@@ -50,3 +50,9 @@ print(metric.reason)
 # or evaluate test cases in bulk
 evaluate([test_case], [metric])
 ```
+
+You can also choose to fallback to Ragas' contextual recall metric (which has a similar implemention). This however is not capable of generating a reason.
+
+```python
+from deepeval.metrics import RAGASContextualRecallMetric
+```
diff --git a/docs/docs/metrics-contextual-relevancy.mdx b/docs/docs/metrics-contextual-relevancy.mdx
index d4c8801f2..2e52fe43d 100644
--- a/docs/docs/metrics-contextual-relevancy.mdx
+++ b/docs/docs/metrics-contextual-relevancy.mdx
@@ -49,3 +49,9 @@ print(metric.reason)
 # or evaluate test cases in bulk
 evaluate([test_case], [metric])
 ```
+
+You can also choose to fallback to Ragas' contextual relevancy metric (which has a similar implemention). This however is not capable of generating a reason.
+
+```python
+from deepeval.metrics import RAGASContextualRelevancyMetric
+```
diff --git a/docs/docs/metrics-faithfulness.mdx b/docs/docs/metrics-faithfulness.mdx
index af26dca52..6a2f8fefb 100644
--- a/docs/docs/metrics-faithfulness.mdx
+++ b/docs/docs/metrics-faithfulness.mdx
@@ -49,3 +49,9 @@ print(metric.reason)
 # or evaluate test cases in bulk
 evaluate([test_case], [metric])
 ```
+
+You can also choose to fallback to Ragas' faithfulness metric (which has a similar implemention). This however is not capable of generating a reason.
+
+```python
+from deepeval.metrics import RAGASFaithfulnessMetric
+```
diff --git a/docs/docs/metrics-judgemental.mdx b/docs/docs/metrics-judgemental.mdx
index 2a1ea074c..933b1e3d9 100644
--- a/docs/docs/metrics-judgemental.mdx
+++ b/docs/docs/metrics-judgemental.mdx
@@ -1,7 +1,7 @@
 ---
 id: metrics-judgemental
-title: Judgemental GPT
-sidebar_label: Judgemental GPT
+title: JudgementalGPT
+sidebar_label: JudgementalGPT
 ---
 
 `JudgementalGPT` is an LLM agent developed in-house by [Confident AI](https://confident-ai.com) that's dedicated to evaluation and is superior to `LLMEvalMetric`. While it operates similarly to `LLMEvalMetric` by utilizing LLMs for scoring, it:
diff --git a/docs/docs/metrics-ragas.mdx b/docs/docs/metrics-ragas.mdx
index 70cd0a11a..0ec7c2441 100644
--- a/docs/docs/metrics-ragas.mdx
+++ b/docs/docs/metrics-ragas.mdx
@@ -6,10 +6,10 @@ sidebar_label: RAGAS
 
 The RAGAS metric is the harmonic mean of four distinct metrics:
 
-- `AnswerRelevancyMetric`
-- `FaithfulnessMetric`
-- `ContextualPrecisionMetric`
-- `ContextualRecallMetric`
+- `RAGASAnswerRelevancyMetric`
+- `RAGASFaithfulnessMetric`
+- `RAGASContextualPrecisionMetric`
+- `RAGASContextualRecallMetric`
 
 It provides a score to holistically evaluate of your RAG pipeline's generator and retriever.
 

From 9c6040e8f0871199994a80e62f1dfe39ae9615c7 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 27 Dec 2023 22:27:59 +0800
Subject: [PATCH 12/46] Updated docs

---
 docs/docs/metrics-summarization.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docs/metrics-summarization.mdx b/docs/docs/metrics-summarization.mdx
index 98b0de7e6..322b488c8 100644
--- a/docs/docs/metrics-summarization.mdx
+++ b/docs/docs/metrics-summarization.mdx
@@ -76,7 +76,7 @@ In `deepeval`, we judge summarization by taking the minimum of the two distinct
 - `alignment_score`: determines whether the summary contains hallucinated or contradictory information to the original text.
 - `inclusion_score`: determines whether the summary contains the neccessary information from the original text.
 
-These scores are calculated by generating `n` closed-ended questions that can only be answered with either a 'yes or a 'no', and calculating the ratio of which the original text and summary yields the same answer.
+These scores are calculated by generating `n` closed-ended questions that can only be answered with either a 'yes or a 'no', and calculating the ratio of which the original text and summary yields the same answer. [Here is a great article](https://www.confident-ai.com/blog/a-step-by-step-guide-to-evaluating-an-llm-text-summarization-task) on how `deepeval`'s summarization metric was build.
 
 You can access the `alignment_score` and `inclusion_score` from a `SummarizationMetric` as follows:
 

From a37b54164868c6f3f9ae1bdea367b0d613f66c17 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 27 Dec 2023 22:38:54 +0800
Subject: [PATCH 13/46] .

---
 docs/docs/metrics-hallucination.mdx | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/docs/metrics-hallucination.mdx b/docs/docs/metrics-hallucination.mdx
index 531ec6126..75b294266 100644
--- a/docs/docs/metrics-hallucination.mdx
+++ b/docs/docs/metrics-hallucination.mdx
@@ -6,6 +6,10 @@ sidebar_label: Hallucination
 
 The hallucination metric determines whether your LLM generates factually correct information by comparing the `actual_output` to the provided `context`.
 
+:::info
+If you're looking to evaluate hallucination for a RAG system, please refer to the [faithfulness metric](metrics-faithfulness) instead.
+:::
+
 ## Required Parameters
 
 To use the `HallucinationMetric`, you'll have to provide the following parameters when creating an `LLMTestCase`:

From 87aa422a844198db10ba9967501f088608b4538b Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 27 Dec 2023 22:46:45 +0800
Subject: [PATCH 14/46] Updated docs

---
 docs/docs/evaluation-test-cases.mdx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/docs/evaluation-test-cases.mdx b/docs/docs/evaluation-test-cases.mdx
index 0c544bd34..be1ff5c98 100644
--- a/docs/docs/evaluation-test-cases.mdx
+++ b/docs/docs/evaluation-test-cases.mdx
@@ -28,7 +28,7 @@ test_case = LLMTestCase(
 
 **Note that only `input` and `actual_output` is mandatory.**
 
-However, depending on the specific metric you're evaluating your test cases on, you may or may not require a `retrieval_context`, `expected_output` and/or `context` as additional parameters. For example, you won't need `expected_output` and `context` if you're just measuring answer relevancy, but if you're evaluating factual consistency you'll have to provide `context` in order for `deepeval` to know what the **ground truth** is.
+However, depending on the specific metric you're evaluating your test cases on, you may or may not require a `retrieval_context`, `expected_output` and/or `context` as additional parameters. For example, you won't need `expected_output` and `context` if you're just measuring answer relevancy, but if you're evaluating hallucination you'll have to provide `context` in order for `deepeval` to know what the **ground truth** is.
 
 Let's go through the purpsoe of each parameter.
 
@@ -219,7 +219,7 @@ Similar to Pytest, `deepeval` allows you to assert any test case you create by c
 - `test_case`: an `LLMTestCase`
 - `metrics`: a list of metrics
 
-A test case passes only if all metrics meet their respective evaluation criterion. Depending on the metric, a combination of `input`, `actual_output`, `expected_output`, and `context` is used to ascertain whether their criterion have been met.
+A test case passes only if all metrics meet their respective evaluation criterion. Depending on the metric, a combination of `input`, `actual_output`, `expected_output`, `context`, and `retrieval_context` is used to ascertain whether their criterion have been met.
 
 ```python title="test_assert_example.py"
 # A hypothetical LLM application example
@@ -271,7 +271,7 @@ deepeval test run test_assert_example.py -n 4
 
 ## Evaluate Test Cases in Bulk
 
-Lastly, `deepeval` offers an `evaluate` function to evaluate multiple test cases at once, which similar to assert_test but without needing pytest or the CLI.
+Lastly, `deepeval` offers an `evaluate` function to evaluate multiple test cases at once, which similar to `assert_test` but without the need for Pytest or the CLI.
 
 ```python
 # A hypothetical LLM application example

From 97076e1fb504166d1293bcf17aac9c04a43e493d Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 28 Dec 2023 00:53:37 +0800
Subject: [PATCH 15/46] Fix docs

---
 docs/docs/confident-ai-introduction.mdx | 2 +-
 docs/docs/evaluation-test-cases.mdx     | 2 +-
 docs/docs/getting-started.mdx           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/docs/confident-ai-introduction.mdx b/docs/docs/confident-ai-introduction.mdx
index 7cc3a0bcb..bd6f69221 100644
--- a/docs/docs/confident-ai-introduction.mdx
+++ b/docs/docs/confident-ai-introduction.mdx
@@ -32,7 +32,7 @@ Continuous evaluation refers to the process of evaluating LLM applications in no
   />
 </div>
 
-Everything in `deepeval` is already automatically integrated with Confident AI, including `deepeval`'s [custom metrics](evaluation-metrics#custom-metrics). To start using Confident AI with `deepeval`, simply login in the CLI:
+Everything in `deepeval` is already automatically integrated with Confident AI, including `deepeval`'s [custom metrics](metrics-custom). To start using Confident AI with `deepeval`, simply login in the CLI:
 
 ```
 deepeval login
diff --git a/docs/docs/evaluation-test-cases.mdx b/docs/docs/evaluation-test-cases.mdx
index be1ff5c98..e21f8e3c8 100644
--- a/docs/docs/evaluation-test-cases.mdx
+++ b/docs/docs/evaluation-test-cases.mdx
@@ -82,7 +82,7 @@ test_case = LLMTestCase(
 
 An expected output is literally what you would want the ideal output to be. Note that this parameter is **optional** depending on the metric you want to evaluate.
 
-The expected output doesn't have to exactly match the actual output in order for your test case to pass since `deepeval` uses a variety of methods to evaluate non-deterministic LLM outputs. We'll go into more details [in the metrics section.](evaluation-metrics)
+The expected output doesn't have to exactly match the actual output in order for your test case to pass since `deepeval` uses a variety of methods to evaluate non-deterministic LLM outputs. We'll go into more details [in the metrics section.](metrics-introduction)
 
 ```python
 # A hypothetical LLM application example
diff --git a/docs/docs/getting-started.mdx b/docs/docs/getting-started.mdx
index 811802a6a..a34f15793 100644
--- a/docs/docs/getting-started.mdx
+++ b/docs/docs/getting-started.mdx
@@ -34,7 +34,7 @@ In your newly created virtual environement, run:
 pip install -U deepeval
 ```
 
-You can also keep track of all evaluation results by logging into our [in all one evaluation platform](https://confident-ai.com), and use Confident AI's [proprietary LLM evaluation agent](evaluation-metrics#judgementalgpt) for evaluation:
+You can also keep track of all evaluation results by logging into our [in all one evaluation platform](https://confident-ai.com), and use Confident AI's [proprietary LLM evaluation agent](metrics-judgemental) for evaluation:
 
 ```console
 deepeval login

From a883f0289b7999bb028ef215a1544bed14af8a9f Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 28 Dec 2023 21:26:49 +0800
Subject: [PATCH 16/46] Added threading to track

---
 deepeval/api.py   |  80 ++++++++++++++++++++++++-
 deepeval/event.py |  26 ++++++---
 poetry.lock       | 145 +++++++++++++++++++++++++---------------------
 pyproject.toml    |   2 +
 4 files changed, 179 insertions(+), 74 deletions(-)

diff --git a/deepeval/api.py b/deepeval/api.py
index 719516786..734a5ee5f 100644
--- a/deepeval/api.py
+++ b/deepeval/api.py
@@ -4,9 +4,13 @@
 import requests
 import warnings
 from requests.adapters import HTTPAdapter, Response, Retry
+import aiohttp
+from aiohttp import ClientSession
+from requests.adapters import HTTPAdapter
+from enum import Enum
+
 from deepeval.constants import API_KEY_ENV
 from deepeval.key_handler import KEY_FILE_HANDLER, KeyValues
-from enum import Enum
 
 API_BASE_URL = "https://app.confident-ai.com/api"
 
@@ -259,3 +263,77 @@ def quote_string(text: str) -> str:
             str: Quoted text in return
         """
         return urllib.parse.quote(text, safe="")
+
+    async def _api_request_async(
+        self,
+        method,
+        endpoint,
+        headers=None,
+        auth=None,
+        params=None,
+        body=None,
+        files=None,
+        data=None,
+    ):
+        """Generic asynchronous HTTP request method with error handling."""
+        url = f"{self.base_api_url}/{endpoint}"
+        async with ClientSession() as session:
+            try:
+                # Preparing the request body for file uploads if files are present
+                if files:
+                    data = aiohttp.FormData()
+                    for file_name, file_content in files.items():
+                        data.add_field(
+                            file_name, file_content, filename=file_name
+                        )
+
+                # Sending the request
+                res = await session.request(
+                    method=method,
+                    url=url,
+                    headers=headers,
+                    params=params,
+                    json=body,
+                )
+
+                # Check response status
+                if res.status == 200:
+                    try:
+                        json = await res.json()
+                        return json
+                    except ValueError:
+                        return await res.text()
+                else:
+                    # Determine how to process the response based on Content-Type
+                    content_type = res.headers.get("Content-Type", "")
+                    if "application/json" in content_type:
+                        error_response = await res.json()
+                    else:
+                        error_response = await res.text()
+
+                    # Specifically handle status code 400
+                    if res.status == 400:
+                        print(f"Error 400: Bad Request - {error_response}")
+
+                    print(f"Error {res.status}: {error_response}")
+                    return None
+
+            except Exception as err:
+                raise Exception(f"HTTP request failed: {err}") from err
+
+    async def post_request_async(
+        self, endpoint, body=None, files=None, data=None
+    ):
+        """Generic asynchronous POST Request Wrapper"""
+        print("hi")
+        return await self._api_request_async(
+            "POST",
+            endpoint,
+            headers=self._headers
+            if files is None
+            else self._headers_multipart_form_data,
+            auth=self._auth,
+            body=body,
+            files=files,
+            data=data,
+        )
diff --git a/deepeval/event.py b/deepeval/event.py
index 709f03c09..4a20344db 100644
--- a/deepeval/event.py
+++ b/deepeval/event.py
@@ -1,5 +1,7 @@
 from typing import Optional, List, Dict
 from deepeval.api import Api, Endpoints
+import threading
+import asyncio
 from pydantic import BaseModel, Field
 
 
@@ -32,7 +34,18 @@ def track(
     conversation_id: Optional[str] = None,
     additional_data: Optional[Dict] = None,
     fail_silently: Optional[bool] = True,
+    run_background_thread: Optional[bool] = True,
 ):
+    def track_event(event: APIEvent, api: Api, fail_silently: bool):
+        try:
+            _ = api.post_request(
+                endpoint=Endpoints.EVENT_ENDPOINT.value,
+                body=event.dict(by_alias=True, exclude_none=True),
+            )
+        except Exception as e:
+            if not fail_silently:
+                raise e
+
     event = APIEvent(
         name=event_name,
         model=model,
@@ -47,11 +60,10 @@ def track(
         additionalData=additional_data,
     )
     api = Api()
-    try:
-        _ = api.post_request(
-            endpoint=Endpoints.EVENT_ENDPOINT.value,
-            body=event.dict(by_alias=True, exclude_none=True),
+    if run_background_thread:
+        thread = threading.Thread(
+            target=track_event, args=(event, api, fail_silently), daemon=True
         )
-    except Exception as e:
-        if not fail_silently:
-            raise (e)
+        thread.start()
+    else:
+        track_event(event, api, fail_silently)
diff --git a/poetry.lock b/poetry.lock
index badd5ac49..161ba87c4 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -450,63 +450,63 @@ test-no-images = ["pytest", "pytest-cov", "pytest-xdist", "wurlitzer"]
 
 [[package]]
 name = "coverage"
-version = "7.3.4"
+version = "7.4.0"
 description = "Code coverage measurement for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "coverage-7.3.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:aff2bd3d585969cc4486bfc69655e862028b689404563e6b549e6a8244f226df"},
-    {file = "coverage-7.3.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4353923f38d752ecfbd3f1f20bf7a3546993ae5ecd7c07fd2f25d40b4e54571"},
-    {file = "coverage-7.3.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea473c37872f0159294f7073f3fa72f68b03a129799f3533b2bb44d5e9fa4f82"},
-    {file = "coverage-7.3.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5214362abf26e254d749fc0c18af4c57b532a4bfde1a057565616dd3b8d7cc94"},
-    {file = "coverage-7.3.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f99b7d3f7a7adfa3d11e3a48d1a91bb65739555dd6a0d3fa68aa5852d962e5b1"},
-    {file = "coverage-7.3.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:74397a1263275bea9d736572d4cf338efaade2de9ff759f9c26bcdceb383bb49"},
-    {file = "coverage-7.3.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:f154bd866318185ef5865ace5be3ac047b6d1cc0aeecf53bf83fe846f4384d5d"},
-    {file = "coverage-7.3.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e0d84099ea7cba9ff467f9c6f747e3fc3906e2aadac1ce7b41add72e8d0a3712"},
-    {file = "coverage-7.3.4-cp310-cp310-win32.whl", hash = "sha256:3f477fb8a56e0c603587b8278d9dbd32e54bcc2922d62405f65574bd76eba78a"},
-    {file = "coverage-7.3.4-cp310-cp310-win_amd64.whl", hash = "sha256:c75738ce13d257efbb6633a049fb2ed8e87e2e6c2e906c52d1093a4d08d67c6b"},
-    {file = "coverage-7.3.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:997aa14b3e014339d8101b9886063c5d06238848905d9ad6c6eabe533440a9a7"},
-    {file = "coverage-7.3.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8a9c5bc5db3eb4cd55ecb8397d8e9b70247904f8eca718cc53c12dcc98e59fc8"},
-    {file = "coverage-7.3.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27ee94f088397d1feea3cb524e4313ff0410ead7d968029ecc4bc5a7e1d34fbf"},
-    {file = "coverage-7.3.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ce03e25e18dd9bf44723e83bc202114817f3367789052dc9e5b5c79f40cf59d"},
-    {file = "coverage-7.3.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85072e99474d894e5df582faec04abe137b28972d5e466999bc64fc37f564a03"},
-    {file = "coverage-7.3.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a877810ef918d0d345b783fc569608804f3ed2507bf32f14f652e4eaf5d8f8d0"},
-    {file = "coverage-7.3.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9ac17b94ab4ca66cf803f2b22d47e392f0977f9da838bf71d1f0db6c32893cb9"},
-    {file = "coverage-7.3.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:36d75ef2acab74dc948d0b537ef021306796da551e8ac8b467810911000af66a"},
-    {file = "coverage-7.3.4-cp311-cp311-win32.whl", hash = "sha256:47ee56c2cd445ea35a8cc3ad5c8134cb9bece3a5cb50bb8265514208d0a65928"},
-    {file = "coverage-7.3.4-cp311-cp311-win_amd64.whl", hash = "sha256:11ab62d0ce5d9324915726f611f511a761efcca970bd49d876cf831b4de65be5"},
-    {file = "coverage-7.3.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:33e63c578f4acce1b6cd292a66bc30164495010f1091d4b7529d014845cd9bee"},
-    {file = "coverage-7.3.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:782693b817218169bfeb9b9ba7f4a9f242764e180ac9589b45112571f32a0ba6"},
-    {file = "coverage-7.3.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c4277ddaad9293454da19121c59f2d850f16bcb27f71f89a5c4836906eb35ef"},
-    {file = "coverage-7.3.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3d892a19ae24b9801771a5a989fb3e850bd1ad2e2b6e83e949c65e8f37bc67a1"},
-    {file = "coverage-7.3.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3024ec1b3a221bd10b5d87337d0373c2bcaf7afd86d42081afe39b3e1820323b"},
-    {file = "coverage-7.3.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a1c3e9d2bbd6f3f79cfecd6f20854f4dc0c6e0ec317df2b265266d0dc06535f1"},
-    {file = "coverage-7.3.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e91029d7f151d8bf5ab7d8bfe2c3dbefd239759d642b211a677bc0709c9fdb96"},
-    {file = "coverage-7.3.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:6879fe41c60080aa4bb59703a526c54e0412b77e649a0d06a61782ecf0853ee1"},
-    {file = "coverage-7.3.4-cp312-cp312-win32.whl", hash = "sha256:fd2f8a641f8f193968afdc8fd1697e602e199931012b574194052d132a79be13"},
-    {file = "coverage-7.3.4-cp312-cp312-win_amd64.whl", hash = "sha256:d1d0ce6c6947a3a4aa5479bebceff2c807b9f3b529b637e2b33dea4468d75fc7"},
-    {file = "coverage-7.3.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:36797b3625d1da885b369bdaaa3b0d9fb8865caed3c2b8230afaa6005434aa2f"},
-    {file = "coverage-7.3.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bfed0ec4b419fbc807dec417c401499ea869436910e1ca524cfb4f81cf3f60e7"},
-    {file = "coverage-7.3.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f97ff5a9fc2ca47f3383482858dd2cb8ddbf7514427eecf5aa5f7992d0571429"},
-    {file = "coverage-7.3.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:607b6c6b35aa49defaebf4526729bd5238bc36fe3ef1a417d9839e1d96ee1e4c"},
-    {file = "coverage-7.3.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8e258dcc335055ab59fe79f1dec217d9fb0cdace103d6b5c6df6b75915e7959"},
-    {file = "coverage-7.3.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a02ac7c51819702b384fea5ee033a7c202f732a2a2f1fe6c41e3d4019828c8d3"},
-    {file = "coverage-7.3.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b710869a15b8caf02e31d16487a931dbe78335462a122c8603bb9bd401ff6fb2"},
-    {file = "coverage-7.3.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c6a23ae9348a7a92e7f750f9b7e828448e428e99c24616dec93a0720342f241d"},
-    {file = "coverage-7.3.4-cp38-cp38-win32.whl", hash = "sha256:758ebaf74578b73f727acc4e8ab4b16ab6f22a5ffd7dd254e5946aba42a4ce76"},
-    {file = "coverage-7.3.4-cp38-cp38-win_amd64.whl", hash = "sha256:309ed6a559bc942b7cc721f2976326efbfe81fc2b8f601c722bff927328507dc"},
-    {file = "coverage-7.3.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:aefbb29dc56317a4fcb2f3857d5bce9b881038ed7e5aa5d3bcab25bd23f57328"},
-    {file = "coverage-7.3.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:183c16173a70caf92e2dfcfe7c7a576de6fa9edc4119b8e13f91db7ca33a7923"},
-    {file = "coverage-7.3.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a4184dcbe4f98d86470273e758f1d24191ca095412e4335ff27b417291f5964"},
-    {file = "coverage-7.3.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93698ac0995516ccdca55342599a1463ed2e2d8942316da31686d4d614597ef9"},
-    {file = "coverage-7.3.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb220b3596358a86361139edce40d97da7458412d412e1e10c8e1970ee8c09ab"},
-    {file = "coverage-7.3.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d5b14abde6f8d969e6b9dd8c7a013d9a2b52af1235fe7bebef25ad5c8f47fa18"},
-    {file = "coverage-7.3.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:610afaf929dc0e09a5eef6981edb6a57a46b7eceff151947b836d869d6d567c1"},
-    {file = "coverage-7.3.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d6ed790728fb71e6b8247bd28e77e99d0c276dff952389b5388169b8ca7b1c28"},
-    {file = "coverage-7.3.4-cp39-cp39-win32.whl", hash = "sha256:c15fdfb141fcf6a900e68bfa35689e1256a670db32b96e7a931cab4a0e1600e5"},
-    {file = "coverage-7.3.4-cp39-cp39-win_amd64.whl", hash = "sha256:38d0b307c4d99a7aca4e00cad4311b7c51b7ac38fb7dea2abe0d182dd4008e05"},
-    {file = "coverage-7.3.4-pp38.pp39.pp310-none-any.whl", hash = "sha256:b1e0f25ae99cf247abfb3f0fac7ae25739e4cd96bf1afa3537827c576b4847e5"},
-    {file = "coverage-7.3.4.tar.gz", hash = "sha256:020d56d2da5bc22a0e00a5b0d54597ee91ad72446fa4cf1b97c35022f6b6dbf0"},
+    {file = "coverage-7.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:36b0ea8ab20d6a7564e89cb6135920bc9188fb5f1f7152e94e8300b7b189441a"},
+    {file = "coverage-7.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0676cd0ba581e514b7f726495ea75aba3eb20899d824636c6f59b0ed2f88c471"},
+    {file = "coverage-7.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0ca5c71a5a1765a0f8f88022c52b6b8be740e512980362f7fdbb03725a0d6b9"},
+    {file = "coverage-7.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7c97726520f784239f6c62506bc70e48d01ae71e9da128259d61ca5e9788516"},
+    {file = "coverage-7.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:815ac2d0f3398a14286dc2cea223a6f338109f9ecf39a71160cd1628786bc6f5"},
+    {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:80b5ee39b7f0131ebec7968baa9b2309eddb35b8403d1869e08f024efd883566"},
+    {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5b2ccb7548a0b65974860a78c9ffe1173cfb5877460e5a229238d985565574ae"},
+    {file = "coverage-7.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:995ea5c48c4ebfd898eacb098164b3cc826ba273b3049e4a889658548e321b43"},
+    {file = "coverage-7.4.0-cp310-cp310-win32.whl", hash = "sha256:79287fd95585ed36e83182794a57a46aeae0b64ca53929d1176db56aacc83451"},
+    {file = "coverage-7.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:5b14b4f8760006bfdb6e08667af7bc2d8d9bfdb648351915315ea17645347137"},
+    {file = "coverage-7.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:04387a4a6ecb330c1878907ce0dc04078ea72a869263e53c72a1ba5bbdf380ca"},
+    {file = "coverage-7.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea81d8f9691bb53f4fb4db603203029643caffc82bf998ab5b59ca05560f4c06"},
+    {file = "coverage-7.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74775198b702868ec2d058cb92720a3c5a9177296f75bd97317c787daf711505"},
+    {file = "coverage-7.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76f03940f9973bfaee8cfba70ac991825611b9aac047e5c80d499a44079ec0bc"},
+    {file = "coverage-7.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:485e9f897cf4856a65a57c7f6ea3dc0d4e6c076c87311d4bc003f82cfe199d25"},
+    {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6ae8c9d301207e6856865867d762a4b6fd379c714fcc0607a84b92ee63feff70"},
+    {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bf477c355274a72435ceb140dc42de0dc1e1e0bf6e97195be30487d8eaaf1a09"},
+    {file = "coverage-7.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:83c2dda2666fe32332f8e87481eed056c8b4d163fe18ecc690b02802d36a4d26"},
+    {file = "coverage-7.4.0-cp311-cp311-win32.whl", hash = "sha256:697d1317e5290a313ef0d369650cfee1a114abb6021fa239ca12b4849ebbd614"},
+    {file = "coverage-7.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:26776ff6c711d9d835557ee453082025d871e30b3fd6c27fcef14733f67f0590"},
+    {file = "coverage-7.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:13eaf476ec3e883fe3e5fe3707caeb88268a06284484a3daf8250259ef1ba143"},
+    {file = "coverage-7.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846f52f46e212affb5bcf131c952fb4075b55aae6b61adc9856222df89cbe3e2"},
+    {file = "coverage-7.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26f66da8695719ccf90e794ed567a1549bb2644a706b41e9f6eae6816b398c4a"},
+    {file = "coverage-7.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:164fdcc3246c69a6526a59b744b62e303039a81e42cfbbdc171c91a8cc2f9446"},
+    {file = "coverage-7.4.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:316543f71025a6565677d84bc4df2114e9b6a615aa39fb165d697dba06a54af9"},
+    {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bb1de682da0b824411e00a0d4da5a784ec6496b6850fdf8c865c1d68c0e318dd"},
+    {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:0e8d06778e8fbffccfe96331a3946237f87b1e1d359d7fbe8b06b96c95a5407a"},
+    {file = "coverage-7.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a56de34db7b7ff77056a37aedded01b2b98b508227d2d0979d373a9b5d353daa"},
+    {file = "coverage-7.4.0-cp312-cp312-win32.whl", hash = "sha256:51456e6fa099a8d9d91497202d9563a320513fcf59f33991b0661a4a6f2ad450"},
+    {file = "coverage-7.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:cd3c1e4cb2ff0083758f09be0f77402e1bdf704adb7f89108007300a6da587d0"},
+    {file = "coverage-7.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e9d1bf53c4c8de58d22e0e956a79a5b37f754ed1ffdbf1a260d9dcfa2d8a325e"},
+    {file = "coverage-7.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:109f5985182b6b81fe33323ab4707011875198c41964f014579cf82cebf2bb85"},
+    {file = "coverage-7.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cc9d4bc55de8003663ec94c2f215d12d42ceea128da8f0f4036235a119c88ac"},
+    {file = "coverage-7.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc6d65b21c219ec2072c1293c505cf36e4e913a3f936d80028993dd73c7906b1"},
+    {file = "coverage-7.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a10a4920def78bbfff4eff8a05c51be03e42f1c3735be42d851f199144897ba"},
+    {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b8e99f06160602bc64da35158bb76c73522a4010f0649be44a4e167ff8555952"},
+    {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7d360587e64d006402b7116623cebf9d48893329ef035278969fa3bbf75b697e"},
+    {file = "coverage-7.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:29f3abe810930311c0b5d1a7140f6395369c3db1be68345638c33eec07535105"},
+    {file = "coverage-7.4.0-cp38-cp38-win32.whl", hash = "sha256:5040148f4ec43644702e7b16ca864c5314ccb8ee0751ef617d49aa0e2d6bf4f2"},
+    {file = "coverage-7.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:9864463c1c2f9cb3b5db2cf1ff475eed2f0b4285c2aaf4d357b69959941aa555"},
+    {file = "coverage-7.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:936d38794044b26c99d3dd004d8af0035ac535b92090f7f2bb5aa9c8e2f5cd42"},
+    {file = "coverage-7.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:799c8f873794a08cdf216aa5d0531c6a3747793b70c53f70e98259720a6fe2d7"},
+    {file = "coverage-7.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7defbb9737274023e2d7af02cac77043c86ce88a907c58f42b580a97d5bcca9"},
+    {file = "coverage-7.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a1526d265743fb49363974b7aa8d5899ff64ee07df47dd8d3e37dcc0818f09ed"},
+    {file = "coverage-7.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf635a52fc1ea401baf88843ae8708591aa4adff875e5c23220de43b1ccf575c"},
+    {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:756ded44f47f330666843b5781be126ab57bb57c22adbb07d83f6b519783b870"},
+    {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0eb3c2f32dabe3a4aaf6441dde94f35687224dfd7eb2a7f47f3fd9428e421058"},
+    {file = "coverage-7.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bfd5db349d15c08311702611f3dccbef4b4e2ec148fcc636cf8739519b4a5c0f"},
+    {file = "coverage-7.4.0-cp39-cp39-win32.whl", hash = "sha256:53d7d9158ee03956e0eadac38dfa1ec8068431ef8058fe6447043db1fb40d932"},
+    {file = "coverage-7.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:cfd2a8b6b0d8e66e944d47cdec2f47c48fef2ba2f2dff5a9a75757f64172857e"},
+    {file = "coverage-7.4.0-pp38.pp39.pp310-none-any.whl", hash = "sha256:c530833afc4707fe48524a44844493f36d8727f04dcce91fb978c414a8556cc6"},
+    {file = "coverage-7.4.0.tar.gz", hash = "sha256:707c0f58cb1712b8809ece32b68996ee1e609f71bd14615bd8f87a1293cb610e"},
 ]
 
 [package.extras]
@@ -544,25 +544,27 @@ typing-inspect = ">=0.4.0,<1"
 
 [[package]]
 name = "datasets"
-version = "2.14.4"
+version = "2.16.0"
 description = "HuggingFace community-driven open-source library of datasets"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "datasets-2.14.4-py3-none-any.whl", hash = "sha256:29336bd316a7d827ccd4da2236596279b20ca2ac78f64c04c9483da7cbc2459b"},
-    {file = "datasets-2.14.4.tar.gz", hash = "sha256:ef29c2b5841de488cd343cfc26ab979bff77efa4d2285af51f1ad7db5c46a83b"},
+    {file = "datasets-2.16.0-py3-none-any.whl", hash = "sha256:301cc39b3d81cd751100b79c85f8ae8626c17b0b113819ba2831c204d90b43f2"},
+    {file = "datasets-2.16.0.tar.gz", hash = "sha256:91b06f7a8f0329179e7d603004102a6cc7a424a2f599315297a061caa1f8fa64"},
 ]
 
 [package.dependencies]
 aiohttp = "*"
 dill = ">=0.3.0,<0.3.8"
-fsspec = {version = ">=2021.11.1", extras = ["http"]}
-huggingface-hub = ">=0.14.0,<1.0.0"
+filelock = "*"
+fsspec = {version = ">=2023.1.0,<=2023.10.0", extras = ["http"]}
+huggingface-hub = ">=0.19.4"
 multiprocess = "*"
 numpy = ">=1.17"
 packaging = "*"
 pandas = "*"
 pyarrow = ">=8.0.0"
+pyarrow-hotfix = "*"
 pyyaml = ">=5.1"
 requests = ">=2.19.0"
 tqdm = ">=4.62.1"
@@ -572,15 +574,15 @@ xxhash = "*"
 apache-beam = ["apache-beam (>=2.26.0,<2.44.0)"]
 audio = ["librosa", "soundfile (>=0.12.1)"]
 benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
-dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "black (>=23.1,<24.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "pyyaml (>=5.3.1)", "rarfile (>=4.0)", "ruff (>=0.0.241)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "zstandard"]
+dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.1.5)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
 docs = ["s3fs", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos", "torch", "transformers"]
-jax = ["jax (>=0.2.8,!=0.3.2,<=0.3.25)", "jaxlib (>=0.1.65,<=0.3.25)"]
+jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
 metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"]
-quality = ["black (>=23.1,<24.0)", "pyyaml (>=5.3.1)", "ruff (>=0.0.241)"]
+quality = ["ruff (>=0.1.5)"]
 s3 = ["s3fs"]
 tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos"]
 tensorflow-gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"]
-tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "zstandard"]
+tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
 torch = ["torch"]
 vision = ["Pillow (>=6.2.1)"]
 
@@ -806,13 +808,13 @@ files = [
 
 [[package]]
 name = "fsspec"
-version = "2023.12.2"
+version = "2023.10.0"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "fsspec-2023.12.2-py3-none-any.whl", hash = "sha256:d800d87f72189a745fa3d6b033b9dc4a34ad069f60ca60b943a63599f5501960"},
-    {file = "fsspec-2023.12.2.tar.gz", hash = "sha256:8548d39e8810b59c38014934f6b31e57f40c1b20f911f4cc2b85389c7e9bf0cb"},
+    {file = "fsspec-2023.10.0-py3-none-any.whl", hash = "sha256:346a8f024efeb749d2a5fca7ba8854474b1ff9af7c3faaf636a4548781136529"},
+    {file = "fsspec-2023.10.0.tar.gz", hash = "sha256:330c66757591df346ad3091a53bd907e15348c2ba17d63fd54f5c39c4457d2a5"},
 ]
 
 [package.dependencies]
@@ -2142,6 +2144,17 @@ files = [
 [package.dependencies]
 numpy = ">=1.16.6"
 
+[[package]]
+name = "pyarrow-hotfix"
+version = "0.6"
+description = ""
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "pyarrow_hotfix-0.6-py3-none-any.whl", hash = "sha256:dcc9ae2d220dff0083be6a9aa8e0cdee5182ad358d4931fce825c545e5c89178"},
+    {file = "pyarrow_hotfix-0.6.tar.gz", hash = "sha256:79d3e030f7ff890d408a100ac16d6f00b14d44a502d7897cd9fc3e3a534e9945"},
+]
+
 [[package]]
 name = "pydantic"
 version = "2.5.3"
@@ -3879,4 +3892,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.12"
-content-hash = "999515af5757ca9c0f111d90fe0bcb416dd62e1f9fc04bfa6183a5a87c49b10e"
+content-hash = "8b882533d1c305fc650e8609cb93c420c3becbff3abb398cb247f5bbd42a73ec"
diff --git a/pyproject.toml b/pyproject.toml
index 3888e2aab..c12a227af 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,6 +41,8 @@ protobuf = "*"
 typer = "*"
 setuptools = "*"
 wheel = "*"
+aiohttp = "*"
+
 
 [tool.black]
 line-length = 80

From 1cf39f4f9386ecabb4594deb50cff366186db439 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 28 Dec 2023 21:28:56 +0800
Subject: [PATCH 17/46] rename

---
 deepeval/event.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/deepeval/event.py b/deepeval/event.py
index 4a20344db..73b6d6b1e 100644
--- a/deepeval/event.py
+++ b/deepeval/event.py
@@ -34,7 +34,7 @@ def track(
     conversation_id: Optional[str] = None,
     additional_data: Optional[Dict] = None,
     fail_silently: Optional[bool] = True,
-    run_background_thread: Optional[bool] = True,
+    run_on_background_thread: Optional[bool] = True,
 ):
     def track_event(event: APIEvent, api: Api, fail_silently: bool):
         try:
@@ -60,7 +60,7 @@ def track_event(event: APIEvent, api: Api, fail_silently: bool):
         additionalData=additional_data,
     )
     api = Api()
-    if run_background_thread:
+    if run_on_background_thread:
         thread = threading.Thread(
             target=track_event, args=(event, api, fail_silently), daemon=True
         )

From 2a0514636ae5d5c1db937c8def802d94e6e01c0c Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 28 Dec 2023 22:01:59 +0800
Subject: [PATCH 18/46] LLamaindex tracing

---
 deepeval/tracing/integrations/llama_index.py | 142 +++++++++++++++++++
 1 file changed, 142 insertions(+)
 create mode 100644 deepeval/tracing/integrations/llama_index.py

diff --git a/deepeval/tracing/integrations/llama_index.py b/deepeval/tracing/integrations/llama_index.py
new file mode 100644
index 000000000..748a5975d
--- /dev/null
+++ b/deepeval/tracing/integrations/llama_index.py
@@ -0,0 +1,142 @@
+from typing import Any, Dict, List, Optional, Union
+from time import perf_counter
+
+from llama_index.bridge.pydantic import BaseModel
+from llama_index.callbacks.base_handler import BaseCallbackHandler
+from llama_index.callbacks.schema import CBEventType, EventPayload
+from llama_index.llms import ChatMessage
+
+from deepeval.tracing import (
+    trace_manager,
+    get_trace_stack,
+    LlmTrace,
+    GenericTrace,
+    EmbeddingTrace,
+    TraceStatus,
+    LlmMetadata,
+    EmbeddingMetadata,
+)
+from deepeval.utils import dataclass_to_dict
+
+events_to_ignore = [
+    CBEventType.CHUNKING,
+    CBEventType.NODE_PARSING,
+    CBEventType.EMBEDDING,
+    CBEventType.TREE,
+    CBEventType.SUB_QUESTION,
+    CBEventType.FUNCTION_CALL,
+    CBEventType.EXCEPTION,
+    CBEventType.AGENT_STEP,
+]
+
+
+class LlamaIndexCallbackHandler(BaseCallbackHandler):
+    def __init__(self) -> None:
+        self.event_map = {}
+        super().__init__(
+            event_starts_to_ignore=events_to_ignore,
+            event_ends_to_ignore=events_to_ignore,
+        )
+
+    def start_trace(self, trace_id: Optional[str] = None) -> None:
+        self.event_map = {}
+        trace_manager.clear_trace_stack()
+        return
+
+    def end_trace(
+        self,
+        trace_id: Optional[str] = None,
+        trace_map: Optional[Dict[str, List[str]]] = None,
+    ) -> None:
+        # TODO:
+        print(get_trace_stack())
+        return
+
+    def on_event_start(
+        self,
+        event_type: CBEventType,
+        payload: Optional[Dict[str, Any]] = None,
+        event_id: str = "",
+        parent_id: str = "",
+        **kwargs: Any,
+    ) -> str:
+        trace_instance = self.create_trace_instance(event_type)
+        self.event_map[event_id] = trace_instance
+        trace_manager.append_to_trace_stack(trace_instance)
+        return
+
+    def on_event_end(
+        self,
+        event_type: CBEventType,
+        payload: Optional[Dict[str, Any]] = None,
+        event_id: str = "",
+        **kwargs: Any,
+    ) -> None:
+        trace_instance = self.event_map[event_id]
+        trace_instance.executionTime = (
+            perf_counter() - trace_instance.executionTime
+        )
+        print(trace_instance.executionTime)
+        # TODO: get inputs and outputs fron payload into kwargs based on CBEventType
+
+        current_trace_stack = trace_manager.get_trace_stack()
+        if len(current_trace_stack) > 1:
+            parent_trace = current_trace_stack[-2]
+            parent_trace.traces.append(trace_instance)
+
+        if len(current_trace_stack) == 1:
+            dict_representation = dataclass_to_dict(current_trace_stack[0])
+            trace_manager.set_dict_trace_stack(dict_representation)
+            trace_manager.clear_trace_stack()
+        else:
+            trace_manager.pop_trace_stack()
+
+        return
+
+    def create_trace_instance(
+        self, event_type: CBEventType
+    ) -> Union[EmbeddingTrace, LlmMetadata, GenericTrace]:
+        current_time = perf_counter()
+        type = self.convert_event_type_to_deepeval_trace_type(event_type)
+        name = event_type.capitalize()
+        trace_instance_input = {"args": None, "kwargs": None}
+        if event_type == CBEventType.LLM:
+            trace_instance = LlmTrace(
+                type=type,
+                executionTime=current_time,
+                name=name,
+                input=trace_instance_input,
+                output=None,
+                status=TraceStatus.SUCCESS,
+                traces=[],
+                llmMetadata=LlmMetadata(model="None"),
+            )
+        elif event_type == CBEventType.EMBEDDING:
+            trace_instance = EmbeddingTrace(
+                type=type,
+                executionTime=current_time,
+                name=name,
+                input=trace_instance_input,
+                output=None,
+                status=TraceStatus.SUCCESS,
+                traces=[],
+                embeddingMetadata=EmbeddingMetadata(model="None"),
+            )
+        else:
+            trace_instance = GenericTrace(
+                type=type,
+                executionTime=current_time,
+                name=name,
+                input=trace_instance_input,
+                output=None,
+                status=TraceStatus.SUCCESS,
+                traces=[],
+            )
+
+        return trace_instance
+
+    # TODO
+    def convert_event_type_to_deepeval_trace_type(
+        self, event_type: CBEventType
+    ):
+        pass

From 25bd4c9a88e8bccbbd39a3059492d06305f911b6 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 28 Dec 2023 22:03:53 +0800
Subject: [PATCH 19/46] Added llama

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index c12a227af..86830629a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,7 +42,7 @@ typer = "*"
 setuptools = "*"
 wheel = "*"
 aiohttp = "*"
-
+llama-index = {path = "/Users/jeffreyip/mrgpt/repos/llama_index"}
 
 [tool.black]
 line-length = 80

From 8a5e382d5ae9fd6e237fda4be0dfe314bee42909 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Tue, 2 Jan 2024 03:52:37 +0800
Subject: [PATCH 20/46] llamaindex tracing

---
 deepeval/tracing/__init__.py                 |  13 +-
 deepeval/tracing/integrations/llama_index.py |  22 ++-
 llama_test/chatbot.py                        |  14 ++
 llama_test/data/data.txt                     |  25 +++
 llama_test/main.py                           |   6 +
 poetry.lock                                  | 157 ++++++++++---------
 pyproject.toml                               |   1 -
 7 files changed, 152 insertions(+), 86 deletions(-)
 create mode 100644 llama_test/chatbot.py
 create mode 100644 llama_test/data/data.txt
 create mode 100644 llama_test/main.py

diff --git a/deepeval/tracing/__init__.py b/deepeval/tracing/__init__.py
index 76ea3893c..ce4c4642a 100644
--- a/deepeval/tracing/__init__.py
+++ b/deepeval/tracing/__init__.py
@@ -1 +1,12 @@
-from .tracing import trace, TraceType, get_trace_stack
+from .tracing import (
+    trace,
+    trace_manager,
+    get_trace_stack,
+    TraceType,
+    TraceStatus,
+    LlmTrace,
+    EmbeddingTrace,
+    GenericTrace,
+    LlmMetadata,
+    EmbeddingMetadata,
+)
diff --git a/deepeval/tracing/integrations/llama_index.py b/deepeval/tracing/integrations/llama_index.py
index 748a5975d..0cd2d1736 100644
--- a/deepeval/tracing/integrations/llama_index.py
+++ b/deepeval/tracing/integrations/llama_index.py
@@ -15,6 +15,7 @@
     TraceStatus,
     LlmMetadata,
     EmbeddingMetadata,
+    TraceType,
 )
 from deepeval.utils import dataclass_to_dict
 
@@ -48,8 +49,6 @@ def end_trace(
         trace_id: Optional[str] = None,
         trace_map: Optional[Dict[str, List[str]]] = None,
     ) -> None:
-        # TODO:
-        print(get_trace_stack())
         return
 
     def on_event_start(
@@ -76,8 +75,12 @@ def on_event_end(
         trace_instance.executionTime = (
             perf_counter() - trace_instance.executionTime
         )
-        print(trace_instance.executionTime)
-        # TODO: get inputs and outputs fron payload into kwargs based on CBEventType
+        input_kwargs = {}
+        if payload is not None:
+            for event in EventPayload:
+                value = payload.get(event.value)
+                if value is not None:
+                    input_kwargs[event.value] = value
 
         current_trace_stack = trace_manager.get_trace_stack()
         if len(current_trace_stack) > 1:
@@ -135,8 +138,15 @@ def create_trace_instance(
 
         return trace_instance
 
-    # TODO
     def convert_event_type_to_deepeval_trace_type(
         self, event_type: CBEventType
     ):
-        pass
+        # TODO: add more types
+        if event_type == CBEventType.LLM:
+            return TraceType.LLM
+        elif event_type == CBEventType.RETRIEVE:
+            return TraceType.RETRIEVER
+        elif event_type == CBEventType.EMBEDDING:
+            return TraceType.EMBEDDING
+
+        return event_type.value.capitalize()
diff --git a/llama_test/chatbot.py b/llama_test/chatbot.py
new file mode 100644
index 000000000..a07724b5e
--- /dev/null
+++ b/llama_test/chatbot.py
@@ -0,0 +1,14 @@
+from llama_index import VectorStoreIndex, SimpleDirectoryReader
+from llama_index import ServiceContext
+import llama_index
+
+llama_index.set_global_handler("deepeval")
+
+service_context = ServiceContext.from_defaults(chunk_size=1000)
+documents = SimpleDirectoryReader("data").load_data()
+index = VectorStoreIndex.from_documents(documents)
+query_engine = index.as_query_engine(similarity_top_k=5)
+
+
+def query(user_input):
+    return query_engine.query(user_input).response
diff --git a/llama_test/data/data.txt b/llama_test/data/data.txt
new file mode 100644
index 000000000..c2bd37022
--- /dev/null
+++ b/llama_test/data/data.txt
@@ -0,0 +1,25 @@
+About MadeUpCompany
+MadeUpCompany is a pioneering technology firm founded in 2010, specializing in cloud computing, data analytics, and machine learning. Our headquarters is based in San Francisco, California, with satellite offices spread across New York, London, and Tokyo. We are committed to offering state-of-the-art solutions that help businesses and individuals achieve their full potential. With a diverse team of experts from various industries, we strive to redefine the boundaries of innovation and efficiency.
+
+Products and Services
+We offer a suite of services ranging from cloud storage solutions, data analytics platforms, to custom machine learning models tailored for specific business needs. Our most popular product is CloudMate, a cloud storage solution designed for businesses of all sizes. It offers seamless data migration, top-tier security protocols, and an easy-to-use interface. Our data analytics service, DataWiz, helps companies turn raw data into actionable insights using advanced algorithms.
+
+Pricing
+We have a variety of pricing options tailored to different needs. Our basic cloud storage package starts at $9.99 per month, with premium plans offering more storage and functionalities. We also provide enterprise solutions on a case-by-case basis, so it’s best to consult with our sales team for customized pricing.
+
+Technical Support
+Our customer support team is available 24/7 to assist with any technical issues. We offer multiple channels for support including live chat, email, and a toll-free number. Most issues are typically resolved within 24 hours. We also have an extensive FAQ section on our website and a community forum for peer support.
+
+Security and Compliance
+MadeUpCompany places the utmost importance on security and compliance. All our products are GDPR compliant and adhere to the highest security standards, including end-to-end encryption and multi-factor authentication.
+
+Account Management
+Customers can easily manage their accounts through our online portal, which allows you to upgrade your service, view billing history, and manage users in your organization. If you encounter any issues or have questions about your account, our account management team is available weekdays from 9 AM to 6 PM.
+
+Refund and Cancellation Policy
+We offer a 30-day money-back guarantee on all our products. If you're not satisfied for any reason, you can request a full refund within the first 30 days of your purchase. After that, you can still cancel your service at any time, but a prorated refund will be issued based on the remaining term of your subscription.
+
+Upcoming Features
+We’re constantly working to improve our services and offer new features. Keep an eye out for updates on machine learning functionalities in DataWiz and more collaborative tools in CloudMate in the upcoming quarters.
+
+Your customer support staff can use these paragraphs to build their responses to customer inquiries, providing both detailed and precise information to address various questions.
\ No newline at end of file
diff --git a/llama_test/main.py b/llama_test/main.py
new file mode 100644
index 000000000..9ddcfa104
--- /dev/null
+++ b/llama_test/main.py
@@ -0,0 +1,6 @@
+from chatbot import query
+
+while True:
+    user_input = input("Enter your question: ")
+    response = query(user_input)
+    print("Bot response:", response)
diff --git a/poetry.lock b/poetry.lock
index 161ba87c4..e12b30b58 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -167,21 +167,22 @@ files = [
 
 [[package]]
 name = "attrs"
-version = "23.1.0"
+version = "23.2.0"
 description = "Classes Without Boilerplate"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"},
-    {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"},
+    {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"},
+    {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"},
 ]
 
 [package.extras]
 cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
-dev = ["attrs[docs,tests]", "pre-commit"]
+dev = ["attrs[tests]", "pre-commit"]
 docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"]
 tests = ["attrs[tests-no-zope]", "zope-interface"]
-tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
+tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"]
+tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"]
 
 [[package]]
 name = "bert-score"
@@ -544,13 +545,13 @@ typing-inspect = ">=0.4.0,<1"
 
 [[package]]
 name = "datasets"
-version = "2.16.0"
+version = "2.16.1"
 description = "HuggingFace community-driven open-source library of datasets"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "datasets-2.16.0-py3-none-any.whl", hash = "sha256:301cc39b3d81cd751100b79c85f8ae8626c17b0b113819ba2831c204d90b43f2"},
-    {file = "datasets-2.16.0.tar.gz", hash = "sha256:91b06f7a8f0329179e7d603004102a6cc7a424a2f599315297a061caa1f8fa64"},
+    {file = "datasets-2.16.1-py3-none-any.whl", hash = "sha256:fafa300c78ff92d521473a3d47d60c2d3e0d6046212cc03ceb6caf6550737257"},
+    {file = "datasets-2.16.1.tar.gz", hash = "sha256:ad3215e9b1984d1de4fda2123bc7319ccbdf1e17d0c3d5590d13debff308a080"},
 ]
 
 [package.dependencies]
@@ -1194,13 +1195,13 @@ files = [
 
 [[package]]
 name = "langchain"
-version = "0.0.352"
+version = "0.0.353"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langchain-0.0.352-py3-none-any.whl", hash = "sha256:43ab580e1223e5d7c3495b3c0cb79e2f3a0ecb52caf8126271fb10d42cede2d0"},
-    {file = "langchain-0.0.352.tar.gz", hash = "sha256:8928d7b63d73af9681fe1b2a2b99b84238efef61ed537de666160fd001f41efd"},
+    {file = "langchain-0.0.353-py3-none-any.whl", hash = "sha256:54cac8b74fbefacddcdf0c443619a7331d6b59fe94fa2a48a4d7da2b59cf1f63"},
+    {file = "langchain-0.0.353.tar.gz", hash = "sha256:a095ea819f13a3606ced699182a8369eb2d77034ec8c913983675d6dd9a98196"},
 ]
 
 [package.dependencies]
@@ -1209,7 +1210,7 @@ async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\
 dataclasses-json = ">=0.5.7,<0.7"
 jsonpatch = ">=1.33,<2.0"
 langchain-community = ">=0.0.2,<0.1"
-langchain-core = ">=0.1,<0.2"
+langchain-core = ">=0.1.4,<0.2"
 langsmith = ">=0.0.70,<0.1.0"
 numpy = ">=1,<2"
 pydantic = ">=1,<3"
@@ -1234,13 +1235,13 @@ text-helpers = ["chardet (>=5.1.0,<6.0.0)"]
 
 [[package]]
 name = "langchain-community"
-version = "0.0.6"
+version = "0.0.7"
 description = "Community contributed LangChain integrations."
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langchain_community-0.0.6-py3-none-any.whl", hash = "sha256:13b16da0f89c328df456911ff03069e4d919f647c7dd3bfc5062525cf956ed82"},
-    {file = "langchain_community-0.0.6.tar.gz", hash = "sha256:b7deb63fd8205d54b51cf8b1702de15d1da77987f8465c356b158a65adff378c"},
+    {file = "langchain_community-0.0.7-py3-none-any.whl", hash = "sha256:468af187bfffe753426cc4548132824be7df9404d38ceef2f873087290d8ff0e"},
+    {file = "langchain_community-0.0.7.tar.gz", hash = "sha256:cfbeb25cac7dff3c021f3c82aa243fc80f80082d6f6fdcc79daf36b1408828cc"},
 ]
 
 [package.dependencies]
@@ -1256,17 +1257,17 @@ tenacity = ">=8.1.0,<9.0.0"
 
 [package.extras]
 cli = ["typer (>=0.9.0,<0.10.0)"]
-extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "gradientai (>=1.4.0,<2.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "oracle-ads (>=2.9.1,<3.0.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
+extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-ai-documentintelligence (>=1.0.0b1,<2.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "gradientai (>=1.4.0,<2.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "oracle-ads (>=2.9.1,<3.0.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
 
 [[package]]
 name = "langchain-core"
-version = "0.1.3"
+version = "0.1.4"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langchain_core-0.1.3-py3-none-any.whl", hash = "sha256:bfbbc5dfeb06cfe3fd078e7a12db3a4cfb9d28b715b200a64f7abb7ae1976b17"},
-    {file = "langchain_core-0.1.3.tar.gz", hash = "sha256:d8898254dfea1c4ab614f470db40909969604775f7524175f6d9167ea58050c9"},
+    {file = "langchain_core-0.1.4-py3-none-any.whl", hash = "sha256:c62bd362d5abf5359436a99b29629e12a4d1ede9f1704dc958cdb8530a791efd"},
+    {file = "langchain_core-0.1.4.tar.gz", hash = "sha256:f700138689c9014e23d3c29796a892dccf7f2a42901cb8817671823e1a24724c"},
 ]
 
 [package.dependencies]
@@ -2332,13 +2333,13 @@ files = [
 
 [[package]]
 name = "pytest"
-version = "7.4.3"
+version = "7.4.4"
 description = "pytest: simple powerful testing with Python"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "pytest-7.4.3-py3-none-any.whl", hash = "sha256:0d009c083ea859a71b76adf7c1d502e4bc170b80a8ef002da5806527b9591fac"},
-    {file = "pytest-7.4.3.tar.gz", hash = "sha256:d989d136982de4e3b29dabcc838ad581c64e8ed52c11fbe86ddebd9da0818cd5"},
+    {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"},
+    {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"},
 ]
 
 [package.dependencies]
@@ -3061,60 +3062,60 @@ files = [
 
 [[package]]
 name = "sqlalchemy"
-version = "2.0.23"
+version = "2.0.24"
 description = "Database Abstraction Library"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "SQLAlchemy-2.0.23-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:638c2c0b6b4661a4fd264f6fb804eccd392745c5887f9317feb64bb7cb03b3ea"},
-    {file = "SQLAlchemy-2.0.23-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e3b5036aa326dc2df50cba3c958e29b291a80f604b1afa4c8ce73e78e1c9f01d"},
-    {file = "SQLAlchemy-2.0.23-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:787af80107fb691934a01889ca8f82a44adedbf5ef3d6ad7d0f0b9ac557e0c34"},
-    {file = "SQLAlchemy-2.0.23-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c14eba45983d2f48f7546bb32b47937ee2cafae353646295f0e99f35b14286ab"},
-    {file = "SQLAlchemy-2.0.23-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0666031df46b9badba9bed00092a1ffa3aa063a5e68fa244acd9f08070e936d3"},
-    {file = "SQLAlchemy-2.0.23-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:89a01238fcb9a8af118eaad3ffcc5dedaacbd429dc6fdc43fe430d3a941ff965"},
-    {file = "SQLAlchemy-2.0.23-cp310-cp310-win32.whl", hash = "sha256:cabafc7837b6cec61c0e1e5c6d14ef250b675fa9c3060ed8a7e38653bd732ff8"},
-    {file = "SQLAlchemy-2.0.23-cp310-cp310-win_amd64.whl", hash = "sha256:87a3d6b53c39cd173990de2f5f4b83431d534a74f0e2f88bd16eabb5667e65c6"},
-    {file = "SQLAlchemy-2.0.23-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d5578e6863eeb998980c212a39106ea139bdc0b3f73291b96e27c929c90cd8e1"},
-    {file = "SQLAlchemy-2.0.23-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:62d9e964870ea5ade4bc870ac4004c456efe75fb50404c03c5fd61f8bc669a72"},
-    {file = "SQLAlchemy-2.0.23-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c80c38bd2ea35b97cbf7c21aeb129dcbebbf344ee01a7141016ab7b851464f8e"},
-    {file = "SQLAlchemy-2.0.23-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75eefe09e98043cff2fb8af9796e20747ae870c903dc61d41b0c2e55128f958d"},
-    {file = "SQLAlchemy-2.0.23-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bd45a5b6c68357578263d74daab6ff9439517f87da63442d244f9f23df56138d"},
-    {file = "SQLAlchemy-2.0.23-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a86cb7063e2c9fb8e774f77fbf8475516d270a3e989da55fa05d08089d77f8c4"},
-    {file = "SQLAlchemy-2.0.23-cp311-cp311-win32.whl", hash = "sha256:b41f5d65b54cdf4934ecede2f41b9c60c9f785620416e8e6c48349ab18643855"},
-    {file = "SQLAlchemy-2.0.23-cp311-cp311-win_amd64.whl", hash = "sha256:9ca922f305d67605668e93991aaf2c12239c78207bca3b891cd51a4515c72e22"},
-    {file = "SQLAlchemy-2.0.23-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d0f7fb0c7527c41fa6fcae2be537ac137f636a41b4c5a4c58914541e2f436b45"},
-    {file = "SQLAlchemy-2.0.23-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7c424983ab447dab126c39d3ce3be5bee95700783204a72549c3dceffe0fc8f4"},
-    {file = "SQLAlchemy-2.0.23-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f508ba8f89e0a5ecdfd3761f82dda2a3d7b678a626967608f4273e0dba8f07ac"},
-    {file = "SQLAlchemy-2.0.23-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6463aa765cf02b9247e38b35853923edbf2f6fd1963df88706bc1d02410a5577"},
-    {file = "SQLAlchemy-2.0.23-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e599a51acf3cc4d31d1a0cf248d8f8d863b6386d2b6782c5074427ebb7803bda"},
-    {file = "SQLAlchemy-2.0.23-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fd54601ef9cc455a0c61e5245f690c8a3ad67ddb03d3b91c361d076def0b4c60"},
-    {file = "SQLAlchemy-2.0.23-cp312-cp312-win32.whl", hash = "sha256:42d0b0290a8fb0165ea2c2781ae66e95cca6e27a2fbe1016ff8db3112ac1e846"},
-    {file = "SQLAlchemy-2.0.23-cp312-cp312-win_amd64.whl", hash = "sha256:227135ef1e48165f37590b8bfc44ed7ff4c074bf04dc8d6f8e7f1c14a94aa6ca"},
-    {file = "SQLAlchemy-2.0.23-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:14aebfe28b99f24f8a4c1346c48bc3d63705b1f919a24c27471136d2f219f02d"},
-    {file = "SQLAlchemy-2.0.23-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e983fa42164577d073778d06d2cc5d020322425a509a08119bdcee70ad856bf"},
-    {file = "SQLAlchemy-2.0.23-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e0dc9031baa46ad0dd5a269cb7a92a73284d1309228be1d5935dac8fb3cae24"},
-    {file = "SQLAlchemy-2.0.23-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5f94aeb99f43729960638e7468d4688f6efccb837a858b34574e01143cf11f89"},
-    {file = "SQLAlchemy-2.0.23-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:63bfc3acc970776036f6d1d0e65faa7473be9f3135d37a463c5eba5efcdb24c8"},
-    {file = "SQLAlchemy-2.0.23-cp37-cp37m-win32.whl", hash = "sha256:f48ed89dd11c3c586f45e9eec1e437b355b3b6f6884ea4a4c3111a3358fd0c18"},
-    {file = "SQLAlchemy-2.0.23-cp37-cp37m-win_amd64.whl", hash = "sha256:1e018aba8363adb0599e745af245306cb8c46b9ad0a6fc0a86745b6ff7d940fc"},
-    {file = "SQLAlchemy-2.0.23-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:64ac935a90bc479fee77f9463f298943b0e60005fe5de2aa654d9cdef46c54df"},
-    {file = "SQLAlchemy-2.0.23-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c4722f3bc3c1c2fcc3702dbe0016ba31148dd6efcd2a2fd33c1b4897c6a19693"},
-    {file = "SQLAlchemy-2.0.23-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4af79c06825e2836de21439cb2a6ce22b2ca129bad74f359bddd173f39582bf5"},
-    {file = "SQLAlchemy-2.0.23-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:683ef58ca8eea4747737a1c35c11372ffeb84578d3aab8f3e10b1d13d66f2bc4"},
-    {file = "SQLAlchemy-2.0.23-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d4041ad05b35f1f4da481f6b811b4af2f29e83af253bf37c3c4582b2c68934ab"},
-    {file = "SQLAlchemy-2.0.23-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aeb397de65a0a62f14c257f36a726945a7f7bb60253462e8602d9b97b5cbe204"},
-    {file = "SQLAlchemy-2.0.23-cp38-cp38-win32.whl", hash = "sha256:42ede90148b73fe4ab4a089f3126b2cfae8cfefc955c8174d697bb46210c8306"},
-    {file = "SQLAlchemy-2.0.23-cp38-cp38-win_amd64.whl", hash = "sha256:964971b52daab357d2c0875825e36584d58f536e920f2968df8d581054eada4b"},
-    {file = "SQLAlchemy-2.0.23-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:616fe7bcff0a05098f64b4478b78ec2dfa03225c23734d83d6c169eb41a93e55"},
-    {file = "SQLAlchemy-2.0.23-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0e680527245895aba86afbd5bef6c316831c02aa988d1aad83c47ffe92655e74"},
-    {file = "SQLAlchemy-2.0.23-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9585b646ffb048c0250acc7dad92536591ffe35dba624bb8fd9b471e25212a35"},
-    {file = "SQLAlchemy-2.0.23-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4895a63e2c271ffc7a81ea424b94060f7b3b03b4ea0cd58ab5bb676ed02f4221"},
-    {file = "SQLAlchemy-2.0.23-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:cc1d21576f958c42d9aec68eba5c1a7d715e5fc07825a629015fe8e3b0657fb0"},
-    {file = "SQLAlchemy-2.0.23-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:967c0b71156f793e6662dd839da54f884631755275ed71f1539c95bbada9aaab"},
-    {file = "SQLAlchemy-2.0.23-cp39-cp39-win32.whl", hash = "sha256:0a8c6aa506893e25a04233bc721c6b6cf844bafd7250535abb56cb6cc1368884"},
-    {file = "SQLAlchemy-2.0.23-cp39-cp39-win_amd64.whl", hash = "sha256:f3420d00d2cb42432c1d0e44540ae83185ccbbc67a6054dcc8ab5387add6620b"},
-    {file = "SQLAlchemy-2.0.23-py3-none-any.whl", hash = "sha256:31952bbc527d633b9479f5f81e8b9dfada00b91d6baba021a869095f1a97006d"},
-    {file = "SQLAlchemy-2.0.23.tar.gz", hash = "sha256:c1bda93cbbe4aa2aa0aa8655c5aeda505cd219ff3e8da91d1d329e143e4aff69"},
+    {file = "SQLAlchemy-2.0.24-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5f801d85ba4753d4ed97181d003e5d3fa330ac7c4587d131f61d7f968f416862"},
+    {file = "SQLAlchemy-2.0.24-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b35c35e3923ade1e7ac44e150dec29f5863513246c8bf85e2d7d313e3832bcfb"},
+    {file = "SQLAlchemy-2.0.24-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d9b3fd5eca3c0b137a5e0e468e24ca544ed8ca4783e0e55341b7ed2807518ee"},
+    {file = "SQLAlchemy-2.0.24-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a6209e689d0ff206c40032b6418e3cfcfc5af044b3f66e381d7f1ae301544b4"},
+    {file = "SQLAlchemy-2.0.24-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:37e89d965b52e8b20571b5d44f26e2124b26ab63758bf1b7598a0e38fb2c4005"},
+    {file = "SQLAlchemy-2.0.24-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c6910eb4ea90c0889f363965cd3c8c45a620ad27b526a7899f0054f6c1b9219e"},
+    {file = "SQLAlchemy-2.0.24-cp310-cp310-win32.whl", hash = "sha256:d8e7e8a150e7b548e7ecd6ebb9211c37265991bf2504297d9454e01b58530fc6"},
+    {file = "SQLAlchemy-2.0.24-cp310-cp310-win_amd64.whl", hash = "sha256:396f05c552f7fa30a129497c41bef5b4d1423f9af8fe4df0c3dcd38f3e3b9a14"},
+    {file = "SQLAlchemy-2.0.24-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:adbd67dac4ebf54587198b63cd30c29fd7eafa8c0cab58893d9419414f8efe4b"},
+    {file = "SQLAlchemy-2.0.24-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a0f611b431b84f55779cbb7157257d87b4a2876b067c77c4f36b15e44ced65e2"},
+    {file = "SQLAlchemy-2.0.24-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56a0e90a959e18ac5f18c80d0cad9e90cb09322764f536e8a637426afb1cae2f"},
+    {file = "SQLAlchemy-2.0.24-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6db686a1d9f183c639f7e06a2656af25d4ed438eda581de135d15569f16ace33"},
+    {file = "SQLAlchemy-2.0.24-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f0cc0b486a56dff72dddae6b6bfa7ff201b0eeac29d4bc6f0e9725dc3c360d71"},
+    {file = "SQLAlchemy-2.0.24-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4a1d4856861ba9e73bac05030cec5852eabfa9ef4af8e56c19d92de80d46fc34"},
+    {file = "SQLAlchemy-2.0.24-cp311-cp311-win32.whl", hash = "sha256:a3c2753bf4f48b7a6024e5e8a394af49b1b12c817d75d06942cae03d14ff87b3"},
+    {file = "SQLAlchemy-2.0.24-cp311-cp311-win_amd64.whl", hash = "sha256:38732884eabc64982a09a846bacf085596ff2371e4e41d20c0734f7e50525d01"},
+    {file = "SQLAlchemy-2.0.24-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9f992e0f916201731993eab8502912878f02287d9f765ef843677ff118d0e0b1"},
+    {file = "SQLAlchemy-2.0.24-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2587e108463cc2e5b45a896b2e7cc8659a517038026922a758bde009271aed11"},
+    {file = "SQLAlchemy-2.0.24-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bb7cedcddffca98c40bb0becd3423e293d1fef442b869da40843d751785beb3"},
+    {file = "SQLAlchemy-2.0.24-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83fa6df0e035689df89ff77a46bf8738696785d3156c2c61494acdcddc75c69d"},
+    {file = "SQLAlchemy-2.0.24-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:cc889fda484d54d0b31feec409406267616536d048a450fc46943e152700bb79"},
+    {file = "SQLAlchemy-2.0.24-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57ef6f2cb8b09a042d0dbeaa46a30f2df5dd1e1eb889ba258b0d5d7d6011b81c"},
+    {file = "SQLAlchemy-2.0.24-cp312-cp312-win32.whl", hash = "sha256:ea490564435b5b204d8154f0e18387b499ea3cedc1e6af3b3a2ab18291d85aa7"},
+    {file = "SQLAlchemy-2.0.24-cp312-cp312-win_amd64.whl", hash = "sha256:ccfd336f96d4c9bbab0309f2a565bf15c468c2d8b2d277a32f89c5940f71fcf9"},
+    {file = "SQLAlchemy-2.0.24-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9aaaaa846b10dfbe1bda71079d0e31a7e2cebedda9409fa7dba3dfed1ae803e8"},
+    {file = "SQLAlchemy-2.0.24-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95bae3d38f8808d79072da25d5e5a6095f36fe1f9d6c614dd72c59ca8397c7c0"},
+    {file = "SQLAlchemy-2.0.24-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a04191a7c8d77e63f6fc1e8336d6c6e93176c0c010833e74410e647f0284f5a1"},
+    {file = "SQLAlchemy-2.0.24-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:acc58b7c2e40235712d857fdfc8f2bda9608f4a850d8d9ac0dd1fc80939ca6ac"},
+    {file = "SQLAlchemy-2.0.24-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:00d76fe5d7cdb5d84d625ce002ce29fefba0bfd98e212ae66793fed30af73931"},
+    {file = "SQLAlchemy-2.0.24-cp37-cp37m-win32.whl", hash = "sha256:29e51f848f843bbd75d74ae64ab1ab06302cb1dccd4549d1f5afe6b4a946edb2"},
+    {file = "SQLAlchemy-2.0.24-cp37-cp37m-win_amd64.whl", hash = "sha256:e9d036e343a604db3f5a6c33354018a84a1d3f6dcae3673358b404286204798c"},
+    {file = "SQLAlchemy-2.0.24-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9bafaa05b19dc07fa191c1966c5e852af516840b0d7b46b7c3303faf1a349bc9"},
+    {file = "SQLAlchemy-2.0.24-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e69290b921b7833c04206f233d6814c60bee1d135b09f5ae5d39229de9b46cd4"},
+    {file = "SQLAlchemy-2.0.24-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8398593ccc4440ce6dffcc4f47d9b2d72b9fe7112ac12ea4a44e7d4de364db1"},
+    {file = "SQLAlchemy-2.0.24-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f073321a79c81e1a009218a21089f61d87ee5fa3c9563f6be94f8b41ff181812"},
+    {file = "SQLAlchemy-2.0.24-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9036ebfd934813990c5b9f71f297e77ed4963720db7d7ceec5a3fdb7cd2ef6ce"},
+    {file = "SQLAlchemy-2.0.24-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fcf84fe93397a0f67733aa2a38ed4eab9fc6348189fc950e656e1ea198f45668"},
+    {file = "SQLAlchemy-2.0.24-cp38-cp38-win32.whl", hash = "sha256:6f5e75de91c754365c098ac08c13fdb267577ce954fa239dd49228b573ca88d7"},
+    {file = "SQLAlchemy-2.0.24-cp38-cp38-win_amd64.whl", hash = "sha256:9f29c7f0f4b42337ec5a779e166946a9f86d7d56d827e771b69ecbdf426124ac"},
+    {file = "SQLAlchemy-2.0.24-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:07cc423892f2ceda9ae1daa28c0355757f362ecc7505b1ab1a3d5d8dc1c44ac6"},
+    {file = "SQLAlchemy-2.0.24-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2a479aa1ab199178ff1956b09ca8a0693e70f9c762875d69292d37049ffd0d8f"},
+    {file = "SQLAlchemy-2.0.24-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b8d0e8578e7f853f45f4512b5c920f6a546cd4bed44137460b2a56534644205"},
+    {file = "SQLAlchemy-2.0.24-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17e7e27af178d31b436dda6a596703b02a89ba74a15e2980c35ecd9909eea3a"},
+    {file = "SQLAlchemy-2.0.24-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1ca7903d5e7db791a355b579c690684fac6304478b68efdc7f2ebdcfe770d8d7"},
+    {file = "SQLAlchemy-2.0.24-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db09e424d7bb89b6215a184ca93b4f29d7f00ea261b787918a1af74143b98c06"},
+    {file = "SQLAlchemy-2.0.24-cp39-cp39-win32.whl", hash = "sha256:a5cd7d30e47f87b21362beeb3e86f1b5886e7d9b0294b230dde3d3f4a1591375"},
+    {file = "SQLAlchemy-2.0.24-cp39-cp39-win_amd64.whl", hash = "sha256:7ae5d44517fe81079ce75cf10f96978284a6db2642c5932a69c82dbae09f009a"},
+    {file = "SQLAlchemy-2.0.24-py3-none-any.whl", hash = "sha256:8f358f5cfce04417b6ff738748ca4806fe3d3ae8040fb4e6a0c9a6973ccf9b6e"},
+    {file = "SQLAlchemy-2.0.24.tar.gz", hash = "sha256:6db97656fd3fe3f7e5b077f12fa6adb5feb6e0b567a3e99f47ecf5f7ea0a09e3"},
 ]
 
 [package.dependencies]
@@ -3124,7 +3125,7 @@ typing-extensions = ">=4.2.0"
 [package.extras]
 aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"]
 aioodbc = ["aioodbc", "greenlet (!=0.4.17)"]
-aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"]
+aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"]
 asyncio = ["greenlet (!=0.4.17)"]
 asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"]
 mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5)"]
@@ -3134,7 +3135,7 @@ mssql-pyodbc = ["pyodbc"]
 mypy = ["mypy (>=0.910)"]
 mysql = ["mysqlclient (>=1.4.0)"]
 mysql-connector = ["mysql-connector-python"]
-oracle = ["cx-oracle (>=8)"]
+oracle = ["cx_oracle (>=8)"]
 oracle-oracledb = ["oracledb (>=1.0.1)"]
 postgresql = ["psycopg2 (>=2.7)"]
 postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"]
@@ -3144,7 +3145,7 @@ postgresql-psycopg2binary = ["psycopg2-binary"]
 postgresql-psycopg2cffi = ["psycopg2cffi"]
 postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"]
 pymysql = ["pymysql"]
-sqlcipher = ["sqlcipher3-binary"]
+sqlcipher = ["sqlcipher3_binary"]
 
 [[package]]
 name = "sympy"
@@ -3630,13 +3631,13 @@ typing-extensions = ">=3.7.4"
 
 [[package]]
 name = "tzdata"
-version = "2023.3"
+version = "2023.4"
 description = "Provider of IANA time zone data"
 optional = false
 python-versions = ">=2"
 files = [
-    {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"},
-    {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"},
+    {file = "tzdata-2023.4-py2.py3-none-any.whl", hash = "sha256:aa3ace4329eeacda5b7beb7ea08ece826c28d761cda36e747cfbf97996d39bf3"},
+    {file = "tzdata-2023.4.tar.gz", hash = "sha256:dd54c94f294765522c77399649b4fefd95522479a664a0cec87f41bebc6148c9"},
 ]
 
 [[package]]
diff --git a/pyproject.toml b/pyproject.toml
index 86830629a..ab821bbe3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,7 +42,6 @@ typer = "*"
 setuptools = "*"
 wheel = "*"
 aiohttp = "*"
-llama-index = {path = "/Users/jeffreyip/mrgpt/repos/llama_index"}
 
 [tool.black]
 line-length = 80

From 903352ada676c551b55c6d4343ca927c594d2f7b Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Tue, 2 Jan 2024 03:54:46 +0800
Subject: [PATCH 21/46] added dependency

---
 poetry.lock    | 166 ++++++++++++++++++++++++++++++++++++++++++++++++-
 pyproject.toml |   1 +
 2 files changed, 165 insertions(+), 2 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index e12b30b58..34c10e78c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -184,6 +184,24 @@ tests = ["attrs[tests-no-zope]", "zope-interface"]
 tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"]
 tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"]
 
+[[package]]
+name = "beautifulsoup4"
+version = "4.12.2"
+description = "Screen-scraping library"
+optional = false
+python-versions = ">=3.6.0"
+files = [
+    {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"},
+    {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"},
+]
+
+[package.dependencies]
+soupsieve = ">1.2"
+
+[package.extras]
+html5lib = ["html5lib"]
+lxml = ["lxml"]
+
 [[package]]
 name = "bert-score"
 version = "0.3.13"
@@ -587,6 +605,23 @@ tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elast
 torch = ["torch"]
 vision = ["Pillow (>=6.2.1)"]
 
+[[package]]
+name = "deprecated"
+version = "1.2.14"
+description = "Python @deprecated decorator to deprecate old python classes, functions or methods."
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+    {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"},
+    {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"},
+]
+
+[package.dependencies]
+wrapt = ">=1.10,<2"
+
+[package.extras]
+dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"]
+
 [[package]]
 name = "dill"
 version = "0.3.7"
@@ -1298,6 +1333,43 @@ files = [
 pydantic = ">=1,<3"
 requests = ">=2,<3"
 
+[[package]]
+name = "llama-index"
+version = "0.9.24"
+description = "Interface between LLMs and your data"
+optional = false
+python-versions = ">=3.8.1,<4.0"
+files = [
+    {file = "llama_index-0.9.24-py3-none-any.whl", hash = "sha256:aeef8a4fb478d45474261289046f37c2805e3bf3453c156c84088c0414465e5e"},
+    {file = "llama_index-0.9.24.tar.gz", hash = "sha256:48175a35c30427f361068693d6f384baf76865831569ca4e04a1a8b6f10ba269"},
+]
+
+[package.dependencies]
+aiohttp = ">=3.8.6,<4.0.0"
+beautifulsoup4 = ">=4.12.2,<5.0.0"
+dataclasses-json = "*"
+deprecated = ">=1.2.9.3"
+fsspec = ">=2023.5.0"
+httpx = "*"
+nest-asyncio = ">=1.5.8,<2.0.0"
+nltk = ">=3.8.1,<4.0.0"
+numpy = "*"
+openai = ">=1.1.0"
+pandas = "*"
+requests = ">=2.31.0"
+SQLAlchemy = {version = ">=1.4.49", extras = ["asyncio"]}
+tenacity = ">=8.2.0,<9.0.0"
+tiktoken = ">=0.3.3"
+typing-extensions = ">=4.5.0"
+typing-inspect = ">=0.8.0"
+
+[package.extras]
+gradientai = ["gradientai (>=1.4.0)"]
+langchain = ["langchain (>=0.0.303)"]
+local-models = ["optimum[onnxruntime] (>=1.13.2,<2.0.0)", "sentencepiece (>=0.1.99,<0.2.0)", "transformers[torch] (>=4.34.0,<5.0.0)"]
+postgres = ["asyncpg (>=0.28.0,<0.29.0)", "pgvector (>=0.1.0,<0.2.0)", "psycopg-binary (>=3.1.12,<4.0.0)"]
+query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "lm-format-enforcer (>=0.4.3,<0.5.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "scikit-learn", "spacy (>=3.7.1,<4.0.0)"]
+
 [[package]]
 name = "markdown-it-py"
 version = "3.0.0"
@@ -3060,6 +3132,17 @@ files = [
     {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"},
 ]
 
+[[package]]
+name = "soupsieve"
+version = "2.5"
+description = "A modern CSS selector implementation for Beautiful Soup."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"},
+    {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"},
+]
+
 [[package]]
 name = "sqlalchemy"
 version = "2.0.24"
@@ -3119,7 +3202,7 @@ files = [
 ]
 
 [package.dependencies]
-greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""}
+greenlet = {version = "!=0.4.17", optional = true, markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\" or extra == \"asyncio\""}
 typing-extensions = ">=4.2.0"
 
 [package.extras]
@@ -3670,6 +3753,85 @@ files = [
 [package.extras]
 test = ["pytest (>=6.0.0)", "setuptools (>=65)"]
 
+[[package]]
+name = "wrapt"
+version = "1.16.0"
+description = "Module for decorators, wrappers and monkey patching."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"},
+    {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"},
+    {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"},
+    {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"},
+    {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"},
+    {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"},
+    {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"},
+    {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"},
+    {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"},
+    {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"},
+    {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"},
+    {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"},
+    {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"},
+    {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"},
+    {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"},
+    {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"},
+    {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"},
+    {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"},
+    {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"},
+    {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"},
+    {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"},
+    {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"},
+    {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"},
+    {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"},
+    {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"},
+    {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"},
+    {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"},
+    {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"},
+    {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"},
+    {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"},
+    {file = "wrapt-1.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8"},
+    {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39"},
+    {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c"},
+    {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40"},
+    {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc"},
+    {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e"},
+    {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465"},
+    {file = "wrapt-1.16.0-cp36-cp36m-win32.whl", hash = "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e"},
+    {file = "wrapt-1.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966"},
+    {file = "wrapt-1.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593"},
+    {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292"},
+    {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5"},
+    {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf"},
+    {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228"},
+    {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f"},
+    {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c"},
+    {file = "wrapt-1.16.0-cp37-cp37m-win32.whl", hash = "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c"},
+    {file = "wrapt-1.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00"},
+    {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"},
+    {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"},
+    {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"},
+    {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"},
+    {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"},
+    {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"},
+    {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"},
+    {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"},
+    {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"},
+    {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"},
+    {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"},
+    {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"},
+    {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"},
+    {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"},
+    {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"},
+    {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"},
+    {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"},
+    {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"},
+    {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"},
+    {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"},
+    {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"},
+    {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"},
+]
+
 [[package]]
 name = "xxhash"
 version = "3.4.1"
@@ -3893,4 +4055,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.12"
-content-hash = "8b882533d1c305fc650e8609cb93c420c3becbff3abb398cb247f5bbd42a73ec"
+content-hash = "2b0939b9cce5fa623a932eb89645a6ef3cc7384addd232339faded4a4c35cab7"
diff --git a/pyproject.toml b/pyproject.toml
index ab821bbe3..4fd145270 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,6 +42,7 @@ typer = "*"
 setuptools = "*"
 wheel = "*"
 aiohttp = "*"
+llama-index = "*"
 
 [tool.black]
 line-length = 80

From 03d767f072af5fb01ffa768532a81ea90928b64c Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 3 Jan 2024 03:18:11 +0800
Subject: [PATCH 22/46] updated docs

---
 docs/docs/confident-ai-evals-in-production.mdx | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/docs/confident-ai-evals-in-production.mdx b/docs/docs/confident-ai-evals-in-production.mdx
index 7a1fc87b9..e08abb723 100644
--- a/docs/docs/confident-ai-evals-in-production.mdx
+++ b/docs/docs/confident-ai-evals-in-production.mdx
@@ -25,6 +25,10 @@ Simply add `deepeval.track(...)` in your application to start tracking events. T
 - [Optional] `additional_data`: type `dict`
 - [Optional] `fail_silently`: type `bool`, defaults to True
 
+:::note
+Please do **NOT** provide placeholder values for optional parameters. Leave it blank instead.
+:::
+
 ```python
 import deepeval
 

From 4069536f34fb15a37dd252e4c969a6033f95ea93 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 3 Jan 2024 18:42:01 +0800
Subject: [PATCH 23/46] updated docs

---
 docs/docs/metrics-toxicity.mdx | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/docs/metrics-toxicity.mdx b/docs/docs/metrics-toxicity.mdx
index 1604d30e7..0af3e030a 100644
--- a/docs/docs/metrics-toxicity.mdx
+++ b/docs/docs/metrics-toxicity.mdx
@@ -6,6 +6,14 @@ sidebar_label: Toxicity
 
 The toxicity metric is another **referenceless** metric that evaluates toxicness in your LLM's outputs. This is particularly useful for a fine-tuning use case.
 
+## Installation
+
+Toxicity in `deepeval` requires an additional installation:
+
+```
+pip install detoxify
+```
+
 ## Required Parameters
 
 To use the `NonToxicMetric`, you'll have to provide the following parameters when creating an `LLMTestCase`:

From 6eb34e265261326fac9a5e78053ccaa8c9b47cc5 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 3 Jan 2024 19:18:09 +0800
Subject: [PATCH 24/46] Sentry counter

---
 deepeval/__init__.py  |  2 +-
 deepeval/cli/test.py  |  2 ++
 deepeval/evaluate.py  |  3 +++
 deepeval/telemetry.py | 50 ++++++++++++++++++++-----------------------
 4 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/deepeval/__init__.py b/deepeval/__init__.py
index 099bbdfb8..6d6c39dc6 100644
--- a/deepeval/__init__.py
+++ b/deepeval/__init__.py
@@ -2,13 +2,13 @@
 import re
 
 # Optionally add telemtry
-from .telemetry import *
 from ._version import __version__
 
 from .decorators.hyperparameters import set_hyperparameters
 from deepeval.event import track
 from deepeval.evaluate import evaluate, run_test, assert_test
 from deepeval.test_run import on_test_run_end
+from deepeval.telemetry import *
 
 __all__ = [
     "set_hyperparameters",
diff --git a/deepeval/cli/test.py b/deepeval/cli/test.py
index 0187476e8..7ea86e1bb 100644
--- a/deepeval/cli/test.py
+++ b/deepeval/cli/test.py
@@ -6,6 +6,7 @@
 from deepeval.test_run import test_run_manager, TEMP_FILE_NAME
 from deepeval.utils import delete_file_if_exists
 from deepeval.test_run import invoke_test_run_end_hook
+from deepeval.telemetry import capture_evaluation_count
 
 app = typer.Typer(name="test")
 
@@ -74,6 +75,7 @@ def run(
     pytest_args.extend(["-p", "plugins"])
 
     retcode = pytest.main(pytest_args)
+    capture_evaluation_count()
     test_run_manager.wrap_up_test_run()
     invoke_test_run_end_hook()
 
diff --git a/deepeval/evaluate.py b/deepeval/evaluate.py
index 53ddf5a36..e433931d5 100644
--- a/deepeval/evaluate.py
+++ b/deepeval/evaluate.py
@@ -6,6 +6,7 @@
 from dataclasses import dataclass
 import copy
 
+from deepeval.telemetry import capture_evaluation_count
 from deepeval.progress_context import progress_context
 from deepeval.metrics import BaseMetric
 from deepeval.test_case import LLMTestCase
@@ -90,6 +91,7 @@ def run_test(
     test_run_manager.reset()
     with progress_context("Executing run_test()..."):
         test_result = execute_test([test_case], metrics, False)[0]
+        capture_evaluation_count()
         print_test_result(test_result)
         print("")
         print("-" * 70)
@@ -120,6 +122,7 @@ def evaluate(test_cases: List[LLMTestCase], metrics: List[BaseMetric]):
     test_run_manager.reset()
     with progress_context("Evaluating testcases..."):
         test_results = execute_test(test_cases, metrics, True)
+        capture_evaluation_count()
         for test_result in test_results:
             print_test_result(test_result)
         print("")
diff --git a/deepeval/telemetry.py b/deepeval/telemetry.py
index c0c2e5364..d6915bf11 100644
--- a/deepeval/telemetry.py
+++ b/deepeval/telemetry.py
@@ -1,9 +1,10 @@
 import os
 import socket
 import sys
+import sentry_sdk
 
 
-def check_firewall():
+def blocked_by_firewall():
     try:
         socket.create_connection(("www.google.com", 80))
         return False
@@ -11,29 +12,24 @@ def check_firewall():
         return True
 
 
-if os.getenv("ERROR_REPORTING") == "YES" and not check_firewall():
-    try:
-        import sentry_sdk
-
-        sentry_sdk.init(
-            dsn="https://5ef587d58109ee45d6544f3657efdd1f@o4506098477236224.ingest.sentry.io/4506098479136768",
-            # Set traces_sample_rate to 1.0 to capture 100%
-            # of transactions for performance monitoring.
-            traces_sample_rate=1.0,
-            # Set profiles_sample_rate to 1.0 to profile 100%
-            # of sampled transactions.
-            # We recommend adjusting this value in production.
-            profiles_sample_rate=1.0,
-        )
-
-        # Add a global error handler
-        def handle_exception(exc_type, exc_value, exc_traceback):
-            print({"exc_type": exc_type, "exc_value": exc_value})
-            sentry_sdk.capture_exception(exc_value)
-            sys.__excepthook__(exc_type, exc_value, exc_traceback)
-
-        sys.excepthook = handle_exception
-
-    except ModuleNotFoundError:
-        # sentry_sdk not installed
-        pass
+def capture_evaluation_count():
+    sentry_sdk.capture_message("evaluation ran!")
+
+
+sentry_sdk.init(
+    dsn="https://5ef587d58109ee45d6544f3657efdd1f@o4506098477236224.ingest.sentry.io/4506098479136768",
+    profiles_sample_rate=1.0,
+    traces_sample_rate=1.0,  # For performance monitoring
+    send_default_pii=False,  # Don't send personally identifiable information
+    attach_stacktrace=False,  # Don't attach stack traces to messages
+    default_integrations=False,  # Disable Sentry's default integrations
+)
+
+if os.getenv("ERROR_REPORTING") == "YES" and not blocked_by_firewall():
+
+    def handle_exception(exc_type, exc_value, exc_traceback):
+        print({"exc_type": exc_type, "exc_value": exc_value})
+        sentry_sdk.capture_exception(exc_value)
+        sys.__excepthook__(exc_type, exc_value, exc_traceback)
+
+    sys.excepthook = handle_exception

From 5c7a40a6e22f766d6b184a7cbf3235cc67ef235e Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 3 Jan 2024 19:21:52 +0800
Subject: [PATCH 25/46] Make threshold dynamic

---
 deepeval/test_run/test_run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepeval/test_run/test_run.py b/deepeval/test_run/test_run.py
index 4d1f3d1e0..7ef742632 100644
--- a/deepeval/test_run/test_run.py
+++ b/deepeval/test_run/test_run.py
@@ -91,7 +91,7 @@ def add_llm_test_case(
         metrics_metadata = MetricsMetadata(
             metric=metric.__name__,
             score=metric.score,
-            minimumScore=0.5,
+            minimumScore=metric.minimum_score,
             reason=metric.reason,
         )
 

From 8689b7b299a2ba1caa93ba6068e0b07930a75b33 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <143328635+penguine-ip@users.noreply.github.com>
Date: Wed, 3 Jan 2024 19:32:35 +0800
Subject: [PATCH 26/46] Update README.md

---
 README.md | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index cf3851ef2..286915e03 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@
     </a>
 </p>
 
-**DeepEval** is a simple-to-use, open-source evaluation framework for LLM applications. It is similar to Pytest but specialized for unit testing LLM applications. DeepEval evaluates performance based on metrics such as hallucination, answer relevancy, RAGAS, etc., using LLMs and various other NLP models **locally on your machine**.
+**DeepEval** is a simple-to-use, open-source LLM evaluation framework for LLM applications. It is similar to Pytest but specialized for unit testing LLM applications. DeepEval evaluates performance based on metrics such as hallucination, answer relevancy, RAGAS, etc., using LLMs and various other NLP models that runs **locally on your machine**.
 
 Whether your application is implemented via RAG or fine-tuning, LangChain or LlamaIndex, DeepEval has you covered. With it, you can easily determine the optimal hyperparameters to improve your RAG pipeline, prevent prompt drifting, or even transition from OpenAI to hosting your own Llama2 with confidence.
 
@@ -26,7 +26,7 @@ Whether your application is implemented via RAG or fine-tuning, LangChain or Lla
 
 # Features
 
-- Large variety of ready-to-use evaluation metrics powered by LLMs (all with explanations), statistical methods, or NLP models that runs **locally on your machine**:
+- Large variety of ready-to-use LLM evaluation metrics powered by LLMs (all with explanations), statistical methods, or NLP models that runs **locally on your machine**:
   - Hallucination
   - Summarization
   - Answer Relevancy
@@ -38,8 +38,8 @@ Whether your application is implemented via RAG or fine-tuning, LangChain or Lla
   - Toxicity
   - Bias
   - etc.
+- Evaluate your entire dataset in bulk in under 20 lines of Python code **in parallel**. Do this via the CLI in a Pytest-like manner, or through our `evaluate()` function.
 - Easily create your own custom metrics that are automatically integrated with DeepEval's ecosystem by inheriting DeepEval's base metric class.
-- Evaluate your entire dataset in bulk in under 20 lines of Python code **in parallel**.
 - [Automatically integrated with Confident AI](https://app.confident-ai.com) for continous evaluation throughout the lifetime of your LLM (app):
   - log evaluation results and analyze metrics pass / fails
   - compare and pick the optimal hyperparameters (eg. prompt templates, chunk size, models used, etc.) based on evaluation results
@@ -115,6 +115,29 @@ deepeval test run test_chatbot.py
 
 <br />
 
+## Evaluating Without Pytest Integration
+
+Alternatively, you can evaluate without Pytest, which is more suited for a notebook environment.
+
+```python
+from deepeval import evalate
+from deepeval.metrics import HallucinationMetric
+from deepeval.test_case import LLMTestCase
+
+input = "What if these shoes don't fit?"
+context = ["All customers are eligible for a 30 day full refund at no extra costs."]
+# Replace this with the actual output from your LLM application
+actual_output = "We offer a 30-day full refund at no extra costs."
+
+hallucination_metric = HallucinationMetric(minimum_score=0.7)
+test_case = LLMTestCase(
+    input=input,
+    actual_output=actual_output,
+    context=context
+)
+evalate([test_case], [hallucination_metric])
+```
+
 ## Evaluting a Dataset / Test Cases in Bulk
 
 In DeepEval, a dataset is simply a collection of test cases. Here is how you can evaluate things in bulk:
@@ -148,7 +171,7 @@ deepeval test run test_<filename>.py -n 4
 
 <br/>
 
-Alternatively, although we recommend using `deepeval test run`, you can evaluate a dataset/test cases without using pytest:
+Alternatively, although we recommend using `deepeval test run`, you can evaluate a dataset/test cases without using our Pytest integration:
 
 ```python
 from deepeval import evaluate
@@ -168,6 +191,7 @@ We offer a [free web platform](https://app.confident-ai.com) for you to:
 3. Compare and pick the optimal hyperparameteres (prompt templates, models, chunk size, etc.).
 4. Create, manage, and centralize your evaluation datasets.
 5. Track events in production and augment your evaluation dataset for continous evaluation in production.
+6. Track events in production and view live evaluation results over time.
 
 Everything on Confident AI, including how to use Confident is available [here](https://docs.confident-ai.com/docs/confident-ai-introduction).
 

From 7ec7044ef5a2fb210a21d9809a5d43981340c391 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 3 Jan 2024 20:55:30 +0800
Subject: [PATCH 27/46] new release

---
 deepeval/_version.py | 2 +-
 pyproject.toml       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/deepeval/_version.py b/deepeval/_version.py
index f81ac3edf..ee489183b 100644
--- a/deepeval/_version.py
+++ b/deepeval/_version.py
@@ -1 +1 @@
-__version__: str = "0.20.43"
+__version__: str = "0.20.44"
diff --git a/pyproject.toml b/pyproject.toml
index 4fd145270..df3ada338 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "deepeval"
-version = "0.20.43"
+version = "0.20.44"
 description = "The Evaluation Framework for LLMs"
 authors = ["Jeffrey Ip <jeffreyip@confident-ai.com>"]
 license = "Apache-2.0"

From 5497a1858c8db2788981494fd9b76bb2289458dd Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 3 Jan 2024 21:23:16 +0800
Subject: [PATCH 28/46] added progress loading

---
 deepeval/metrics/answer_relevancy.py     | 31 +++++++++++----------
 deepeval/metrics/contextual_precision.py | 34 ++++++++++++------------
 deepeval/metrics/contextual_recall.py    | 27 ++++++++++---------
 deepeval/metrics/contextual_relevancy.py | 29 ++++++++++----------
 deepeval/metrics/faithfulness.py         | 31 +++++++++++----------
 deepeval/progress_context.py             | 16 +++++++++++
 deepeval/templates.py                    |  4 ++-
 tests/test_answer_relevancy.py           |  2 +-
 8 files changed, 95 insertions(+), 79 deletions(-)

diff --git a/deepeval/metrics/answer_relevancy.py b/deepeval/metrics/answer_relevancy.py
index 8380976ff..a4cdadbb2 100644
--- a/deepeval/metrics/answer_relevancy.py
+++ b/deepeval/metrics/answer_relevancy.py
@@ -7,6 +7,7 @@
 from deepeval.metrics import BaseMetric
 from deepeval.models import GPTModel
 from deepeval.templates import AnswerRelevancyTemplate
+from deepeval.progress_context import metrics_progress_context
 
 
 class AnswerRelvancyVerdict(BaseModel):
@@ -35,24 +36,22 @@ def measure(self, test_case: LLMTestCase) -> float:
             raise ValueError(
                 "Input, actual output, or retrieval context cannot be None"
             )
-        print(
-            "✨ 🍰 ✨ You're using DeepEval's latest Answer Relevancy Metric! This may take a minute..."
-        )
-        self.key_points: List[str] = self._generate_key_points(
-            test_case.actual_output, "\n".join(test_case.retrieval_context)
-        )
-        self.verdicts: List[AnswerRelvancyVerdict] = self._generate_verdicts(
-            test_case.input
-        )
+        with metrics_progress_context(self.__name__):
+            self.key_points: List[str] = self._generate_key_points(
+                test_case.actual_output, "\n".join(test_case.retrieval_context)
+            )
+            self.verdicts: List[
+                AnswerRelvancyVerdict
+            ] = self._generate_verdicts(test_case.input)
 
-        answer_relevancy_score = self._generate_score()
+            answer_relevancy_score = self._generate_score()
 
-        self.reason = self._generate_reason(
-            test_case.input, test_case.actual_output, answer_relevancy_score
-        )
-        self.success = answer_relevancy_score >= self.minimum_score
-        self.score = answer_relevancy_score
-        return self.score
+            self.reason = self._generate_reason(
+                test_case.input, test_case.actual_output, answer_relevancy_score
+            )
+            self.success = answer_relevancy_score >= self.minimum_score
+            self.score = answer_relevancy_score
+            return self.score
 
     def _generate_score(self):
         relevant_count = 0
diff --git a/deepeval/metrics/contextual_precision.py b/deepeval/metrics/contextual_precision.py
index 622c6c1b1..8c7f0b9d6 100644
--- a/deepeval/metrics/contextual_precision.py
+++ b/deepeval/metrics/contextual_precision.py
@@ -7,6 +7,7 @@
 from deepeval.metrics import BaseMetric
 from deepeval.models import GPTModel
 from deepeval.templates import ContextualPrecisionTemplate
+from deepeval.progress_context import metrics_progress_context
 
 
 class ContextualPrecisionVerdict(BaseModel):
@@ -36,25 +37,24 @@ def measure(self, test_case: LLMTestCase) -> float:
             raise ValueError(
                 "Input, actual output, expected output, or retrieval context cannot be None"
             )
-        print(
-            "✨ 🍰 ✨ You're using DeepEval's latest Contextual Precision Metric! This may take a minute..."
-        )
-        self.verdicts: List[
-            ContextualPrecisionVerdict
-        ] = self._generate_verdicts(
-            test_case.input,
-            test_case.expected_output,
-            test_case.retrieval_context,
-        )
-        contextual_precision_score = self._generate_score()
 
-        self.reason = self._generate_reason(
-            test_case.input, contextual_precision_score
-        )
+        with metrics_progress_context(self.__name__):
+            self.verdicts: List[
+                ContextualPrecisionVerdict
+            ] = self._generate_verdicts(
+                test_case.input,
+                test_case.expected_output,
+                test_case.retrieval_context,
+            )
+            contextual_precision_score = self._generate_score()
+
+            self.reason = self._generate_reason(
+                test_case.input, contextual_precision_score
+            )
 
-        self.success = contextual_precision_score >= self.minimum_score
-        self.score = contextual_precision_score
-        return self.score
+            self.success = contextual_precision_score >= self.minimum_score
+            self.score = contextual_precision_score
+            return self.score
 
     def _generate_reason(self, input: str, score: float):
         if self.include_reason is False:
diff --git a/deepeval/metrics/contextual_recall.py b/deepeval/metrics/contextual_recall.py
index bdf65878a..347a97463 100644
--- a/deepeval/metrics/contextual_recall.py
+++ b/deepeval/metrics/contextual_recall.py
@@ -7,6 +7,7 @@
 from deepeval.metrics import BaseMetric
 from deepeval.models import GPTModel
 from deepeval.templates import ContextualRecallTemplate
+from deepeval.progress_context import metrics_progress_context
 
 
 class ContextualRecallVerdict(BaseModel):
@@ -36,22 +37,22 @@ def measure(self, test_case: LLMTestCase) -> float:
             raise ValueError(
                 "Input, actual output, expected output, or retrieval context cannot be None"
             )
-        print(
-            "✨ 🍰 ✨ You're using DeepEval's latest Contextual Recall Metric! This may take a minute..."
-        )
-        self.verdicts: List[ContextualRecallVerdict] = self._generate_verdicts(
-            test_case.expected_output, test_case.retrieval_context
-        )
+        with metrics_progress_context(self.__name__):
+            self.verdicts: List[
+                ContextualRecallVerdict
+            ] = self._generate_verdicts(
+                test_case.expected_output, test_case.retrieval_context
+            )
 
-        contextual_recall_score = self._generate_score()
+            contextual_recall_score = self._generate_score()
 
-        self.reason = self._generate_reason(
-            test_case.expected_output, contextual_recall_score
-        )
+            self.reason = self._generate_reason(
+                test_case.expected_output, contextual_recall_score
+            )
 
-        self.success = contextual_recall_score >= self.minimum_score
-        self.score = contextual_recall_score
-        return self.score
+            self.success = contextual_recall_score >= self.minimum_score
+            self.score = contextual_recall_score
+            return self.score
 
     def _generate_reason(self, expected_output: str, score: float):
         if self.include_reason is False:
diff --git a/deepeval/metrics/contextual_relevancy.py b/deepeval/metrics/contextual_relevancy.py
index 73ccbfc43..556d0fe5b 100644
--- a/deepeval/metrics/contextual_relevancy.py
+++ b/deepeval/metrics/contextual_relevancy.py
@@ -8,6 +8,7 @@
 from deepeval.metrics import BaseMetric
 from deepeval.models import GPTModel
 from deepeval.templates import ContextualRelevancyTemplate
+from deepeval.progress_context import metrics_progress_context
 
 
 class ContextualRelevancyVerdict(BaseModel):
@@ -35,24 +36,22 @@ def measure(self, test_case: LLMTestCase) -> float:
             raise ValueError(
                 "Input, actual output, or retrieval context cannot be None"
             )
-        print(
-            "✨ 🍰 ✨ You're using DeepEval's latest Contextual Relevancy Metric! This may take a minute..."
-        )
-        self.verdicts_list: List[
-            List[ContextualRelevancyVerdict]
-        ] = self._generate_verdicts_list(
-            test_case.input, test_case.retrieval_context
-        )
-        contextual_recall_score = self._generate_score()
+        with metrics_progress_context(self.__name__):
+            self.verdicts_list: List[
+                List[ContextualRelevancyVerdict]
+            ] = self._generate_verdicts_list(
+                test_case.input, test_case.retrieval_context
+            )
+            contextual_recall_score = self._generate_score()
 
-        self.reason = self._generate_reason(
-            test_case.input, contextual_recall_score
-        )
+            self.reason = self._generate_reason(
+                test_case.input, contextual_recall_score
+            )
 
-        self.success = contextual_recall_score >= self.minimum_score
-        self.score = contextual_recall_score
+            self.success = contextual_recall_score >= self.minimum_score
+            self.score = contextual_recall_score
 
-        return self.score
+            return self.score
 
     def _generate_reason(self, input: str, score: float):
         if self.include_reason is False:
diff --git a/deepeval/metrics/faithfulness.py b/deepeval/metrics/faithfulness.py
index 643ef2c40..74a5ddc45 100644
--- a/deepeval/metrics/faithfulness.py
+++ b/deepeval/metrics/faithfulness.py
@@ -8,6 +8,7 @@
 from deepeval.utils import trimToJson
 from deepeval.models import GPTModel
 from deepeval.templates import FaithfulnessTemplate
+from deepeval.progress_context import metrics_progress_context
 
 
 class FaithfulnessVerdict(BaseModel):
@@ -37,22 +38,20 @@ def measure(self, test_case: LLMTestCase):
             raise ValueError(
                 "Input, actual output, or retrieval context cannot be None"
             )
-        print(
-            "✨ 🍰 ✨ You're using DeepEval's latest Faithfulness Metric! This may take a minute..."
-        )
-        self.truths_list: List[List[str]] = self._generate_truths_list(
-            test_case.retrieval_context
-        )
-        self.verdicts_list: List[
-            List[FaithfulnessVerdict]
-        ] = self._generate_verdicts_list(
-            self.truths_list, test_case.actual_output
-        )
-        faithfulness_score = self._generate_score()
-        self.reason = self._generate_reason(faithfulness_score)
-        self.success = faithfulness_score >= self.minimum_score
-        self.score = faithfulness_score
-        return self.score
+        with metrics_progress_context(self.__name__):
+            self.truths_list: List[List[str]] = self._generate_truths_list(
+                test_case.retrieval_context
+            )
+            self.verdicts_list: List[
+                List[FaithfulnessVerdict]
+            ] = self._generate_verdicts_list(
+                self.truths_list, test_case.actual_output
+            )
+            faithfulness_score = self._generate_score()
+            self.reason = self._generate_reason(faithfulness_score)
+            self.success = faithfulness_score >= self.minimum_score
+            self.score = faithfulness_score
+            return self.score
 
     def _generate_score(self):
         total_verdicts = 0
diff --git a/deepeval/progress_context.py b/deepeval/progress_context.py
index ed1b3c523..f7986a602 100644
--- a/deepeval/progress_context.py
+++ b/deepeval/progress_context.py
@@ -17,3 +17,19 @@ def progress_context(
     ) as progress:
         progress.add_task(description=description, total=total)
         yield
+
+
+@contextmanager
+def metrics_progress_context(
+    metric_name: str, total: int = 9999, transient: bool = True
+):
+    description = f"✨ 🍰 ✨ You're using DeepEval's latest {metric_name} Metric! This may take a minute..."
+    console = Console(file=sys.stderr)  # Direct output to standard error
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        console=console,  # Use the custom console
+        transient=transient,
+    ) as progress:
+        progress.add_task(description=description, total=total)
+        yield
diff --git a/deepeval/templates.py b/deepeval/templates.py
index b4ae5d584..f32f4b10a 100644
--- a/deepeval/templates.py
+++ b/deepeval/templates.py
@@ -97,6 +97,7 @@ def generate_verdicts(truths, text):
 
 You should NOT incorporate any prior knowledge you have and take each context at face value. Since you are going to generate a verdict for each context, the number of 'verdicts' SHOULD BE STRICTLY EQUAL to that of contexts.
 You DON'T have to provide a reason if the answer is 'yes'.
+You should ONLY provide a 'no' answer if IT IS A CONTRADICTION.
 **
 
 Retrieval Contexts:
@@ -113,7 +114,7 @@ def generate_reason(score, contradictions):
         return f"""Below is a list of Contradictions. It is a list of JSON with the `contradiction` and `rank` key.
 The `contradiction` explains why the 'actual output' does not align with a certain node in the 'retrieval context'. Contradictions happen in the 'actual output', NOT the 'retrieval context'.
 The `rank` tells you which node in the 'retrieval context' the actual output contradicted with.
-Given the faithfulness score, which is a 0-1 score indicating how faithful the `actual output` is to the retrieval context (higher the better), concisely summarize the contradictions to justify the score. 
+Given the faithfulness score, which is a 0-1 score indicating how faithful the `actual output` is to the retrieval context (higher the better), CONCISELY summarize the contradictions to justify the score. 
 
 Faithfulness Score:
 {score}
@@ -128,6 +129,7 @@ def generate_reason(score, contradictions):
 IMPORTANT: 
 If there are no contradictions, just say something positive with an upbeat encouraging tone (but don't overdo it otherwise it gets annoying).
 Your reason MUST use information in `contradiction` and the node RANK (eg., first node of the retrieval context) in your reason.
+Be sure in your reason, as if you know what the actual output is from the contradictions.
 **
 
 Reason:
diff --git a/tests/test_answer_relevancy.py b/tests/test_answer_relevancy.py
index 8d828cc82..c1de4e866 100644
--- a/tests/test_answer_relevancy.py
+++ b/tests/test_answer_relevancy.py
@@ -2,7 +2,7 @@
 """
 import pytest
 from deepeval.test_case import LLMTestCase
-from deepeval.metrics import AnswerRelevancyMetric
+from deepeval.metrics import AnswerRelevancyMetric, FaithfulnessMetric
 from deepeval import assert_test
 
 question = "What are the primary benefits of meditation?"

From 7ea687e9eec24fd3836bf06f043099d254c83d40 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 3 Jan 2024 21:50:50 +0800
Subject: [PATCH 29/46] Remove import

---
 tests/test_answer_relevancy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_answer_relevancy.py b/tests/test_answer_relevancy.py
index c1de4e866..8d828cc82 100644
--- a/tests/test_answer_relevancy.py
+++ b/tests/test_answer_relevancy.py
@@ -2,7 +2,7 @@
 """
 import pytest
 from deepeval.test_case import LLMTestCase
-from deepeval.metrics import AnswerRelevancyMetric, FaithfulnessMetric
+from deepeval.metrics import AnswerRelevancyMetric
 from deepeval import assert_test
 
 question = "What are the primary benefits of meditation?"

From f476ea61332910a82b952f99abbf73e26ed112eb Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 3 Jan 2024 22:35:53 +0800
Subject: [PATCH 30/46] updated docs

---
 docs/docs/confident-ai-evals-in-production.mdx | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/docs/confident-ai-evals-in-production.mdx b/docs/docs/confident-ai-evals-in-production.mdx
index e08abb723..98db35258 100644
--- a/docs/docs/confident-ai-evals-in-production.mdx
+++ b/docs/docs/confident-ai-evals-in-production.mdx
@@ -23,7 +23,8 @@ Simply add `deepeval.track(...)` in your application to start tracking events. T
 - [Optional] `token_usage`: type `float`
 - [Optional] `token_cost`: type `float`
 - [Optional] `additional_data`: type `dict`
-- [Optional] `fail_silently`: type `bool`, defaults to True
+- [Optional] `fail_silently`: type `bool`, defaults to True. You should try setting this to `False` if your events are not logging properly.
+- [Optional] `run_on_background_thread`: type `bool`, defaults to True. You should try setting this to `False` if your events are not logging properly.
 
 :::note
 Please do **NOT** provide placeholder values for optional parameters. Leave it blank instead.
@@ -48,6 +49,7 @@ deepeval.track(
     token_cost=0.23,
     additional_data={"example": "example"},
     fail_silently=True
+    run_on_background_thread=True
 )
 
 ```

From 6fad5cb29f669b3ed7a9eacc1486e24c79021916 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Tue, 9 Jan 2024 23:05:46 -0800
Subject: [PATCH 31/46] Add maximum score base metric

---
 deepeval/metrics/answer_relevancy.py     |  1 +
 deepeval/metrics/base_metric.py          | 26 +++++++++++----------
 deepeval/metrics/contextual_precision.py |  1 +
 deepeval/metrics/contextual_recall.py    |  1 +
 deepeval/metrics/contextual_relevancy.py |  1 +
 deepeval/metrics/faithfulness.py         |  1 +
 deepeval/metrics/hallucination_metric.py |  3 ++-
 deepeval/metrics/judgemental_gpt.py      |  3 ++-
 deepeval/metrics/llm_eval_metric.py      |  3 ++-
 deepeval/metrics/non_toxic_metric.py     |  3 ++-
 deepeval/metrics/summarization.py        |  1 +
 deepeval/metrics/unbias_metric.py        |  3 ++-
 deepeval/models/summac_model.py          | 18 +++++++--------
 deepeval/test_case.py                    |  4 ++++
 deepeval/test_run/test_run.py            |  2 +-
 tests/test_custom_execution_time.py      | 29 ++++++++++++++++++++++++
 16 files changed, 73 insertions(+), 27 deletions(-)
 create mode 100644 tests/test_custom_execution_time.py

diff --git a/deepeval/metrics/answer_relevancy.py b/deepeval/metrics/answer_relevancy.py
index a4cdadbb2..2d26e8edb 100644
--- a/deepeval/metrics/answer_relevancy.py
+++ b/deepeval/metrics/answer_relevancy.py
@@ -115,6 +115,7 @@ def _generate_key_points(
         return data["key_points"]
 
     def is_successful(self) -> bool:
+        self.success = self.score >= self.minimum_score
         return self.success
 
     @property
diff --git a/deepeval/metrics/base_metric.py b/deepeval/metrics/base_metric.py
index ce2cdb866..8a541d842 100644
--- a/deepeval/metrics/base_metric.py
+++ b/deepeval/metrics/base_metric.py
@@ -9,6 +9,8 @@ class BaseMetric:
     score: float = 0
     score_metadata: Dict = None
     reason: Optional[str] = None
+    # max_execution_time: Optional[float] = None
+    # max_cost: Optional[float] = None
 
     @property
     def minimum_score(self) -> float:
@@ -20,18 +22,18 @@ def minimum_score(self, value: float):
 
     # Measure function signature is subject to be different - not sure
     # how applicable this is - might need a better abstraction
-    @abstractmethod
-    def measure(self, test_case: LLMTestCase, *args, **kwargs) -> float:
-        raise NotImplementedError
-
-    def _get_init_values(self):
-        # We use this method for sending useful metadata
-        init_values = {
-            param: getattr(self, param)
-            for param in vars(self)
-            if isinstance(getattr(self, param), (str, int, float))
-        }
-        return init_values
+    # @abstractmethod
+    # def measure(self, test_case: LLMTestCase, *args, **kwargs) -> float:
+    #     raise NotImplementedError
+
+    # def _get_init_values(self):
+    #     # We use this method for sending useful metadata
+    #     init_values = {
+    #         param: getattr(self, param)
+    #         for param in vars(self)
+    #         if isinstance(getattr(self, param), (str, int, float))
+    #     }
+    #     return init_values
 
     @abstractmethod
     def is_successful(self) -> bool:
diff --git a/deepeval/metrics/contextual_precision.py b/deepeval/metrics/contextual_precision.py
index 8c7f0b9d6..d7d8e4b7b 100644
--- a/deepeval/metrics/contextual_precision.py
+++ b/deepeval/metrics/contextual_precision.py
@@ -136,6 +136,7 @@ def _generate_verdicts(
         return verdicts
 
     def is_successful(self) -> bool:
+        self.success = self.score >= self.minimum_score
         return self.success
 
     @property
diff --git a/deepeval/metrics/contextual_recall.py b/deepeval/metrics/contextual_recall.py
index 347a97463..bc2fa99b0 100644
--- a/deepeval/metrics/contextual_recall.py
+++ b/deepeval/metrics/contextual_recall.py
@@ -102,6 +102,7 @@ def _generate_verdicts(
         return verdicts
 
     def is_successful(self) -> bool:
+        self.success = self.score >= self.minimum_score
         return self.success
 
     @property
diff --git a/deepeval/metrics/contextual_relevancy.py b/deepeval/metrics/contextual_relevancy.py
index 556d0fe5b..b5860c4a0 100644
--- a/deepeval/metrics/contextual_relevancy.py
+++ b/deepeval/metrics/contextual_relevancy.py
@@ -131,6 +131,7 @@ def _generate_verdicts_list(
         return verdicts_list
 
     def is_successful(self) -> bool:
+        self.success = self.score >= self.minimum_score
         return self.success
 
     @property
diff --git a/deepeval/metrics/faithfulness.py b/deepeval/metrics/faithfulness.py
index 74a5ddc45..8e1f96feb 100644
--- a/deepeval/metrics/faithfulness.py
+++ b/deepeval/metrics/faithfulness.py
@@ -172,6 +172,7 @@ def _generate_verdicts_list(
         return verdicts_list
 
     def is_successful(self) -> bool:
+        self.success = self.score >= self.minimum_score
         return self.success
 
     @property
diff --git a/deepeval/metrics/hallucination_metric.py b/deepeval/metrics/hallucination_metric.py
index 3f63c6910..67810415a 100644
--- a/deepeval/metrics/hallucination_metric.py
+++ b/deepeval/metrics/hallucination_metric.py
@@ -25,11 +25,12 @@ def measure(self, test_case: LLMTestCase):
             if score > max_score:
                 max_score = score
 
-        self.success = max_score > self.minimum_score
+        self.success = max_score >= self.minimum_score
         self.score = max_score
         return max_score
 
     def is_successful(self) -> bool:
+        self.success = self.score >= self.minimum_score
         return self.success
 
     @property
diff --git a/deepeval/metrics/judgemental_gpt.py b/deepeval/metrics/judgemental_gpt.py
index 30f44836f..5054b8b8c 100644
--- a/deepeval/metrics/judgemental_gpt.py
+++ b/deepeval/metrics/judgemental_gpt.py
@@ -74,5 +74,6 @@ def measure(self, test_case: LLMTestCase):
 
         return self.score
 
-    def is_successful(self):
+    def is_successful(self) -> bool:
+        self.success = self.score >= self.minimum_score
         return self.success
diff --git a/deepeval/metrics/llm_eval_metric.py b/deepeval/metrics/llm_eval_metric.py
index 4d08674f5..461823912 100644
--- a/deepeval/metrics/llm_eval_metric.py
+++ b/deepeval/metrics/llm_eval_metric.py
@@ -76,7 +76,8 @@ def measure(self, test_case: LLMTestCase):
         self.success = score >= self.minimum_score
         return self.score
 
-    def is_successful(self):
+    def is_successful(self) -> bool:
+        self.success = self.score >= self.minimum_score
         return self.success
 
     def generate_evaluation_steps(self):
diff --git a/deepeval/metrics/non_toxic_metric.py b/deepeval/metrics/non_toxic_metric.py
index 96318bef2..1b41aa414 100644
--- a/deepeval/metrics/non_toxic_metric.py
+++ b/deepeval/metrics/non_toxic_metric.py
@@ -62,7 +62,8 @@ def measure(self, test_case: LLMTestCase):
         self.score = average_score
         return self.score
 
-    def is_successful(self):
+    def is_successful(self) -> bool:
+        self.success = self.score >= self.minimum_score
         return self.success
 
     @property
diff --git a/deepeval/metrics/summarization.py b/deepeval/metrics/summarization.py
index 9a005e6fd..d3ee73101 100644
--- a/deepeval/metrics/summarization.py
+++ b/deepeval/metrics/summarization.py
@@ -134,6 +134,7 @@ def get_answer(self, question: str, text: str) -> str:
         return res.content
 
     def is_successful(self) -> bool:
+        self.success = self.score >= self.minimum_score
         return self.success
 
     @property
diff --git a/deepeval/metrics/unbias_metric.py b/deepeval/metrics/unbias_metric.py
index 945d07c67..9511edbad 100644
--- a/deepeval/metrics/unbias_metric.py
+++ b/deepeval/metrics/unbias_metric.py
@@ -69,7 +69,8 @@ def measure(self, test_case: LLMTestCase, return_all_scores: bool = False):
 
         return average_score
 
-    def is_successful(self):
+    def is_successful(self) -> bool:
+        self.success = self.score >= self.minimum_score
         return self.success
 
     @property
diff --git a/deepeval/models/summac_model.py b/deepeval/models/summac_model.py
index 7df978794..daa6ab258 100644
--- a/deepeval/models/summac_model.py
+++ b/deepeval/models/summac_model.py
@@ -1,5 +1,5 @@
 import torch
-from typing import Union, List
+from typing import Union, List, Optional
 from typing import List, Union, get_origin
 from deepeval.models.base import DeepEvalBaseModel
 from deepeval.models._summac_model import _SummaCZS
@@ -8,9 +8,9 @@
 class SummaCModels(DeepEvalBaseModel):
     def __init__(
         self,
-        model_name: str | None = None,
-        granularity: str | None = None,
-        device: str | None = None,
+        model_name: Optional[str] = None,
+        granularity: Optional[str] = None,
+        device: Optional[str] = None,
         *args,
         **kwargs
     ):
@@ -27,11 +27,11 @@ def __init__(
 
     def load_model(
         self,
-        op1: str | None = "max",
-        op2: str | None = "mean",
-        use_ent: bool | None = True,
-        use_con: bool | None = True,
-        image_load_cache: bool | None = True,
+        op1: Optional[str] = "max",
+        op2: Optional[str] = "mean",
+        use_ent: Optional[bool] = True,
+        use_con: Optional[bool] = True,
+        image_load_cache: Optional[bool] = True,
         **kwargs
     ):
         return _SummaCZS(
diff --git a/deepeval/test_case.py b/deepeval/test_case.py
index ca3b2757d..48e17647c 100644
--- a/deepeval/test_case.py
+++ b/deepeval/test_case.py
@@ -20,6 +20,8 @@ def __init__(
         expected_output: Optional[str] = None,
         context: Optional[List[str]] = None,
         retrieval_context: Optional[List[str]] = None,
+        execution_time: Optional[float] = None,
+        cost: Optional[float] = None,
         id: Optional[str] = None,
     ):
         self.id = id
@@ -28,3 +30,5 @@ def __init__(
         self.expected_output = expected_output
         self.context = context
         self.retrieval_context = retrieval_context
+        self.execution_time = execution_time
+        self.cost = cost
diff --git a/deepeval/test_run/test_run.py b/deepeval/test_run/test_run.py
index 7ef742632..3fe5a0092 100644
--- a/deepeval/test_run/test_run.py
+++ b/deepeval/test_run/test_run.py
@@ -100,7 +100,7 @@ def add_llm_test_case(
             existing_test_case.metrics_metadata.append(metrics_metadata)
             success = all(
                 [
-                    metric.score >= metric.minimum_score
+                    metric.is_successful()
                     for metric in existing_test_case.metrics_metadata
                 ]
             )
diff --git a/tests/test_custom_execution_time.py b/tests/test_custom_execution_time.py
new file mode 100644
index 000000000..4a9d50da2
--- /dev/null
+++ b/tests/test_custom_execution_time.py
@@ -0,0 +1,29 @@
+# from deepeval.metrics import BaseMetric
+# from deepeval.test_case import LLMTestCase
+# from deepeval import assert_test
+
+# class ExecutionTimeMetric(BaseMetric):
+#     def __init__(self, max_execution_time: float):
+#         self.max_execution_time = max_execution_time
+    
+#     def measure(self, test_case: LLMTestCase):
+#         self.success = test_case.execution_time <= self.max_execution_time
+#         if self.success:
+#             self.score = 1
+#         else:
+#             self.score = 0
+
+#         return self.score
+    
+#     def is_successful(self):
+#         return self.success
+
+#     @property
+#     def name(self):
+#         return "Execution Time"
+    
+
+# def test_execution_time():
+#     test_case = LLMTestCase(input="...", actual_output="...", execution_time=4.57)
+#     execution_time_metric = ExecutionTimeMetric(max_execution_time=5)
+#     assert_test(test_case, [execution_time_metric])
\ No newline at end of file

From f0d6d3eefc024094746eea0e90e8f949d6e801b8 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Tue, 9 Jan 2024 23:42:26 -0800
Subject: [PATCH 32/46] reformat

---
 deepeval/metrics/base_metric.py     | 20 +++-----------------
 tests/test_custom_execution_time.py |  8 ++++----
 2 files changed, 7 insertions(+), 21 deletions(-)

diff --git a/deepeval/metrics/base_metric.py b/deepeval/metrics/base_metric.py
index 8a541d842..24aa8abe6 100644
--- a/deepeval/metrics/base_metric.py
+++ b/deepeval/metrics/base_metric.py
@@ -5,12 +5,9 @@
 
 
 class BaseMetric:
-    # set an arbitrary minimum score that will get over-ridden later
     score: float = 0
     score_metadata: Dict = None
     reason: Optional[str] = None
-    # max_execution_time: Optional[float] = None
-    # max_cost: Optional[float] = None
 
     @property
     def minimum_score(self) -> float:
@@ -20,20 +17,9 @@ def minimum_score(self) -> float:
     def minimum_score(self, value: float):
         self._minimum_score = value
 
-    # Measure function signature is subject to be different - not sure
-    # how applicable this is - might need a better abstraction
-    # @abstractmethod
-    # def measure(self, test_case: LLMTestCase, *args, **kwargs) -> float:
-    #     raise NotImplementedError
-
-    # def _get_init_values(self):
-    #     # We use this method for sending useful metadata
-    #     init_values = {
-    #         param: getattr(self, param)
-    #         for param in vars(self)
-    #         if isinstance(getattr(self, param), (str, int, float))
-    #     }
-    #     return init_values
+    @abstractmethod
+    def measure(self, test_case: LLMTestCase, *args, **kwargs) -> float:
+        raise NotImplementedError
 
     @abstractmethod
     def is_successful(self) -> bool:
diff --git a/tests/test_custom_execution_time.py b/tests/test_custom_execution_time.py
index 4a9d50da2..488432cad 100644
--- a/tests/test_custom_execution_time.py
+++ b/tests/test_custom_execution_time.py
@@ -5,7 +5,7 @@
 # class ExecutionTimeMetric(BaseMetric):
 #     def __init__(self, max_execution_time: float):
 #         self.max_execution_time = max_execution_time
-    
+
 #     def measure(self, test_case: LLMTestCase):
 #         self.success = test_case.execution_time <= self.max_execution_time
 #         if self.success:
@@ -14,16 +14,16 @@
 #             self.score = 0
 
 #         return self.score
-    
+
 #     def is_successful(self):
 #         return self.success
 
 #     @property
 #     def name(self):
 #         return "Execution Time"
-    
+
 
 # def test_execution_time():
 #     test_case = LLMTestCase(input="...", actual_output="...", execution_time=4.57)
 #     execution_time_metric = ExecutionTimeMetric(max_execution_time=5)
-#     assert_test(test_case, [execution_time_metric])
\ No newline at end of file
+#     assert_test(test_case, [execution_time_metric])

From 28d1072cbc2ef0a114059d0dcff1c8998630ffb3 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 10 Jan 2024 11:19:41 -0800
Subject: [PATCH 33/46] Fix langchain azure

---
 tests/test_faithfulness.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_faithfulness.py b/tests/test_faithfulness.py
index 7b3a5c27c..d3595be69 100644
--- a/tests/test_faithfulness.py
+++ b/tests/test_faithfulness.py
@@ -38,7 +38,7 @@
 """
 
 
-@pytest.mark.skip(reason="openai is expensive")
+# @pytest.mark.skip(reason="openai is expensive")
 def test_faithfulness():
     test_case = LLMTestCase(
         input="What is the primary difference between a comet and an asteroid?",

From 8d8c291cfff4a211175ba7ae218888b5d0a28b4a Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Wed, 10 Jan 2024 11:21:36 -0800
Subject: [PATCH 34/46] Fix docs

---
 docs/docs/metrics-toxicity.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docs/metrics-toxicity.mdx b/docs/docs/metrics-toxicity.mdx
index 0af3e030a..90d34aec6 100644
--- a/docs/docs/metrics-toxicity.mdx
+++ b/docs/docs/metrics-toxicity.mdx
@@ -33,7 +33,7 @@ from deepeval.test_case import LLMTestCase, LLMTestCaseParams
 # Replace this with the actual output from your LLM application
 actual_output = "We offer a 30-day full refund at no extra cost."
 
-metric = UnBiasedMetric(
+metric = NonToxicMetric(
     evaluation_params=[LLMTestCaseParams.INPUT, LLMTestCaseParams.ACTUAL_OUTPUT],
     minimum_score=0.5
 )

From af29c18cee1d3bec48343ed620cff9df8e9ab314 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 11 Jan 2024 14:17:35 -0800
Subject: [PATCH 35/46] Migrate minimum score to threshold

---
 .DS_Store                                    | Bin 8196 -> 6148 bytes
 README.md                                    |  12 ++--
 deepeval/evaluate.py                         |   6 +-
 deepeval/metrics/__init__.py                 |   1 +
 deepeval/metrics/answer_relevancy.py         |   8 +--
 deepeval/metrics/base_metric.py              |  10 +--
 deepeval/metrics/contextual_precision.py     |   8 +--
 deepeval/metrics/contextual_recall.py        |   8 +--
 deepeval/metrics/contextual_relevancy.py     |   8 +--
 deepeval/metrics/faithfulness.py             |   8 +--
 deepeval/metrics/hallucination_metric.py     |   8 +--
 deepeval/metrics/judgemental_gpt.py          |   8 +--
 deepeval/metrics/llm_eval_metric.py          |   8 +--
 deepeval/metrics/non_toxic_metric.py         |   8 +--
 deepeval/metrics/ragas_metric.py             |  66 +++++++++----------
 deepeval/metrics/summarization.py            |   8 +--
 deepeval/metrics/unbias_metric.py            |  10 +--
 deepeval/test_run/api.py                     |   2 +-
 deepeval/test_run/test_run.py                |  21 +++---
 docs/docs/confident-ai-debug-evaluations.mdx |   2 +-
 docs/docs/confident-ai-evaluate-datasets.mdx |   4 +-
 docs/docs/evaluation-datasets.mdx            |   8 +--
 docs/docs/evaluation-test-cases.mdx          |   8 +--
 docs/docs/getting-started.mdx                |  18 ++---
 docs/docs/metrics-answer-relevancy.mdx       |   2 +-
 docs/docs/metrics-bias.mdx                   |   2 +-
 docs/docs/metrics-contextual-precision.mdx   |   2 +-
 docs/docs/metrics-contextual-recall.mdx      |   2 +-
 docs/docs/metrics-contextual-relevancy.mdx   |   2 +-
 docs/docs/metrics-custom.mdx                 |   6 +-
 docs/docs/metrics-faithfulness.mdx           |   2 +-
 docs/docs/metrics-hallucination.mdx          |   2 +-
 docs/docs/metrics-introduction.mdx           |   8 +--
 docs/docs/metrics-judgemental.mdx            |   4 +-
 docs/docs/metrics-llm-evals.mdx              |   4 +-
 docs/docs/metrics-ragas.mdx                  |   2 +-
 docs/docs/metrics-summarization.mdx          |   4 +-
 docs/docs/metrics-toxicity.mdx               |   2 +-
 examples/getting_started/test_example.py     |  12 ++--
 examples/tracing/test_chatbot.py             |   2 +-
 tests/test_answer_relevancy.py               |   2 +-
 tests/test_contextual_precision.py           |   2 +-
 tests/test_custom_metric.py                  |   6 +-
 tests/test_dataset.py                        |   2 +-
 tests/test_hallucination_metric.py           |   6 +-
 tests/test_judgemental.py                    |   2 +-
 tests/test_llm_metric.py                     |   4 +-
 tests/test_quickstart.py                     |   4 +-
 tests/test_ragas.py                          |  48 +++++++-------
 49 files changed, 191 insertions(+), 191 deletions(-)

diff --git a/.DS_Store b/.DS_Store
index 03d385f228cccda13efc3ef9b54b507c402f9232..b4ec85712a36e2cb9a2c5164239c2a4239bd311c 100644
GIT binary patch
delta 132
zcmZp1XfcprU|?W$DortDU=RQ@Ie-{Mvv5r;6q~50D9Q_x2a6>$Br+s3WH6*M#BVHI
z&dkWWSw}FDY4S;7t;y0NJR2KQ7#Fj1a0oI3H2{GCH;`}z8MLwRJM(0I8BdV03``I!
NK(;b$j^~-f3;=)46$$_V

delta 504
zcmZoMXmOBWU|?W$DortDU;r^WfEYvza8E20o2aKKDh!eb@);Nm84MVV7|a>s84NZH
za)_~PR^aGmUd+zHF|mQA9;8eJC=F6#jI0#MHwE%RP>&&tp_n0`A<r`>KRGEUKM7<H
z&~#ZKt@j@cfGm9Gm;g;P!e&lAi3XVh9c6;eAQH_n2YS{RXpS-wKHJR0lE`Gp$56tM
z3Jiu4U^o^tBr@bO6foofMf8AlC1$`d+f7blGgW3`C<Cg=V<<yY@sojp*$xtPo5gs3
nFmp+80|U?%6o0@t`p!I=U&M2=KMw~7FbY9Y%dk0~XAUy}rH*Eb

diff --git a/README.md b/README.md
index 286915e03..1e4afff12 100644
--- a/README.md
+++ b/README.md
@@ -94,7 +94,7 @@ def test_case():
 
     # Replace this with the actual output from your LLM application
     actual_output = "We offer a 30-day full refund at no extra costs."
-    hallucination_metric = HallucinationMetric(minimum_score=0.7)
+    hallucination_metric = HallucinationMetric(threshold=0.7)
     test_case = LLMTestCase(input=input, actual_output=actual_output, context=context)
     assert_test(test_case, [hallucination_metric])
 ```
@@ -108,8 +108,8 @@ deepeval test run test_chatbot.py
 **Your test should have passed ✅** Let's breakdown what happened.
 
 - The variable `input` mimics user input, and `actual_output` is a placeholder for your chatbot's intended output based on this query.
-- The variable `context` contains the relevant information from your knowledge base, and `HallucinationMetric(minimum_score=0.7)` is an out-of-the-box metric provided by DeepEval. It helps you evaluate the factual accuracy of your chatbot's output based on the provided context.
-- The metric score ranges from 0 - 1. The `minimum_score=0.7` threshold ultimately determines whether your test has passed or not.
+- The variable `context` contains the relevant information from your knowledge base, and `HallucinationMetric(threshold=0.7)` is an out-of-the-box metric provided by DeepEval. It helps you evaluate the factual accuracy of your chatbot's output based on the provided context.
+- The metric score ranges from 0 - 1. The `threshold=0.7` threshold ultimately determines whether your test has passed or not.
 
 [Read our documentation](https://docs.confident-ai.com/docs/getting-started) for more information on how to use additional metrics, create your own custom metrics, and tutorials on how to integrate with other tools like LangChain and LlamaIndex.
 
@@ -129,7 +129,7 @@ context = ["All customers are eligible for a 30 day full refund at no extra cost
 # Replace this with the actual output from your LLM application
 actual_output = "We offer a 30-day full refund at no extra costs."
 
-hallucination_metric = HallucinationMetric(minimum_score=0.7)
+hallucination_metric = HallucinationMetric(threshold=0.7)
 test_case = LLMTestCase(
     input=input,
     actual_output=actual_output,
@@ -159,8 +159,8 @@ dataset = EvaluationDataset(test_cases=[first_test_case, second_test_case])
     dataset,
 )
 def test_customer_chatbot(test_case: LLMTestCase):
-    hallucination_metric = HallucinationMetric(minimum_score=0.3)
-    answer_relevancy_metric = AnswerRelevancyMetric(minimum_score=0.5)
+    hallucination_metric = HallucinationMetric(threshold=0.3)
+    answer_relevancy_metric = AnswerRelevancyMetric(threshold=0.5)
     assert_test(test_case, [hallucination_metric, answer_relevancy_metric])
 ```
 
diff --git a/deepeval/evaluate.py b/deepeval/evaluate.py
index e433931d5..ae36a49d3 100644
--- a/deepeval/evaluate.py
+++ b/deepeval/evaluate.py
@@ -111,7 +111,7 @@ def assert_test(test_case: LLMTestCase, metrics: List[BaseMetric]):
         ]
         failed_metrics_str = ", ".join(
             [
-                f"{metric.__name__} (score: {metric.score}, minimum_score: {metric.minimum_score})"
+                f"{metric.__name__} (score: {metric.score}, threshold: {metric.threshold})"
                 for metric in failed_metrics
             ]
         )
@@ -138,11 +138,11 @@ def print_test_result(test_result: TestResult):
     for metric in test_result.metrics:
         if not metric.is_successful():
             print(
-                f"  - ❌ {metric.__name__} (score: {metric.score}, minimum_score: {metric.minimum_score}, reason: {metric.reason})"
+                f"  - ❌ {metric.__name__} (score: {metric.score}, threshold: {metric.threshold}, reason: {metric.reason})"
             )
         else:
             print(
-                f"  - ✅ {metric.__name__} (score: {metric.score}, minimum_score: {metric.minimum_score}, reason: {metric.reason})"
+                f"  - ✅ {metric.__name__} (score: {metric.score}, threshold: {metric.threshold}, reason: {metric.reason})"
             )
         if metric.score_metadata:
             for metric_name, score in metric.score_metadata.items():
diff --git a/deepeval/metrics/__init__.py b/deepeval/metrics/__init__.py
index 51a7dd2b5..fb096b0bd 100644
--- a/deepeval/metrics/__init__.py
+++ b/deepeval/metrics/__init__.py
@@ -15,6 +15,7 @@
     RAGASContextualRecallMetric,
     RAGASContextualRelevancyMetric,
     RAGASContextualPrecisionMetric,
+    RAGASAnswerRelevancyMetric,
     RAGASConcisenessMetric as ConcisenessMetric,
     RAGASCorrectnessMetric as CorrectnessMetric,
     RAGASCoherenceMetric as CoherenceMetric,
diff --git a/deepeval/metrics/answer_relevancy.py b/deepeval/metrics/answer_relevancy.py
index 2d26e8edb..d60a6091f 100644
--- a/deepeval/metrics/answer_relevancy.py
+++ b/deepeval/metrics/answer_relevancy.py
@@ -18,11 +18,11 @@ class AnswerRelvancyVerdict(BaseModel):
 class AnswerRelevancyMetric(BaseMetric):
     def __init__(
         self,
-        minimum_score: float = 0.5,
+        threshold: float = 0.5,
         model: Optional[str] = None,
         include_reason: bool = True,
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.model = model
         self.include_reason = include_reason
         self.n = 5
@@ -49,7 +49,7 @@ def measure(self, test_case: LLMTestCase) -> float:
             self.reason = self._generate_reason(
                 test_case.input, test_case.actual_output, answer_relevancy_score
             )
-            self.success = answer_relevancy_score >= self.minimum_score
+            self.success = answer_relevancy_score >= self.threshold
             self.score = answer_relevancy_score
             return self.score
 
@@ -115,7 +115,7 @@ def _generate_key_points(
         return data["key_points"]
 
     def is_successful(self) -> bool:
-        self.success = self.score >= self.minimum_score
+        self.success = self.score >= self.threshold
         return self.success
 
     @property
diff --git a/deepeval/metrics/base_metric.py b/deepeval/metrics/base_metric.py
index 24aa8abe6..f73a60143 100644
--- a/deepeval/metrics/base_metric.py
+++ b/deepeval/metrics/base_metric.py
@@ -10,12 +10,12 @@ class BaseMetric:
     reason: Optional[str] = None
 
     @property
-    def minimum_score(self) -> float:
-        return self._minimum_score
+    def threshold(self) -> float:
+        return self._threshold
 
-    @minimum_score.setter
-    def minimum_score(self, value: float):
-        self._minimum_score = value
+    @threshold.setter
+    def threshold(self, value: float):
+        self._threshold = value
 
     @abstractmethod
     def measure(self, test_case: LLMTestCase, *args, **kwargs) -> float:
diff --git a/deepeval/metrics/contextual_precision.py b/deepeval/metrics/contextual_precision.py
index d7d8e4b7b..5591ef50d 100644
--- a/deepeval/metrics/contextual_precision.py
+++ b/deepeval/metrics/contextual_precision.py
@@ -19,11 +19,11 @@ class ContextualPrecisionVerdict(BaseModel):
 class ContextualPrecisionMetric(BaseMetric):
     def __init__(
         self,
-        minimum_score: float = 0.5,
+        threshold: float = 0.5,
         model: Optional[str] = None,
         include_reason: bool = True,
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.include_reason = include_reason
         self.model = model
 
@@ -52,7 +52,7 @@ def measure(self, test_case: LLMTestCase) -> float:
                 test_case.input, contextual_precision_score
             )
 
-            self.success = contextual_precision_score >= self.minimum_score
+            self.success = contextual_precision_score >= self.threshold
             self.score = contextual_precision_score
             return self.score
 
@@ -136,7 +136,7 @@ def _generate_verdicts(
         return verdicts
 
     def is_successful(self) -> bool:
-        self.success = self.score >= self.minimum_score
+        self.success = self.score >= self.threshold
         return self.success
 
     @property
diff --git a/deepeval/metrics/contextual_recall.py b/deepeval/metrics/contextual_recall.py
index bc2fa99b0..6cf2aeec5 100644
--- a/deepeval/metrics/contextual_recall.py
+++ b/deepeval/metrics/contextual_recall.py
@@ -18,11 +18,11 @@ class ContextualRecallVerdict(BaseModel):
 class ContextualRecallMetric(BaseMetric):
     def __init__(
         self,
-        minimum_score: float = 0.5,
+        threshold: float = 0.5,
         model: Optional[str] = None,
         include_reason: bool = True,
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.model = model
         self.include_reason = include_reason
         self.n = 5
@@ -50,7 +50,7 @@ def measure(self, test_case: LLMTestCase) -> float:
                 test_case.expected_output, contextual_recall_score
             )
 
-            self.success = contextual_recall_score >= self.minimum_score
+            self.success = contextual_recall_score >= self.threshold
             self.score = contextual_recall_score
             return self.score
 
@@ -102,7 +102,7 @@ def _generate_verdicts(
         return verdicts
 
     def is_successful(self) -> bool:
-        self.success = self.score >= self.minimum_score
+        self.success = self.score >= self.threshold
         return self.success
 
     @property
diff --git a/deepeval/metrics/contextual_relevancy.py b/deepeval/metrics/contextual_relevancy.py
index b5860c4a0..aa717fea4 100644
--- a/deepeval/metrics/contextual_relevancy.py
+++ b/deepeval/metrics/contextual_relevancy.py
@@ -19,11 +19,11 @@ class ContextualRelevancyVerdict(BaseModel):
 class ContextualRelevancyMetric(BaseMetric):
     def __init__(
         self,
-        minimum_score: float = 0.5,
+        threshold: float = 0.5,
         model: Optional[str] = "gpt-4",
         include_reason: bool = True,
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.model = model
         self.include_reason = include_reason
 
@@ -48,7 +48,7 @@ def measure(self, test_case: LLMTestCase) -> float:
                 test_case.input, contextual_recall_score
             )
 
-            self.success = contextual_recall_score >= self.minimum_score
+            self.success = contextual_recall_score >= self.threshold
             self.score = contextual_recall_score
 
             return self.score
@@ -131,7 +131,7 @@ def _generate_verdicts_list(
         return verdicts_list
 
     def is_successful(self) -> bool:
-        self.success = self.score >= self.minimum_score
+        self.success = self.score >= self.threshold
         return self.success
 
     @property
diff --git a/deepeval/metrics/faithfulness.py b/deepeval/metrics/faithfulness.py
index 8e1f96feb..85409fd5f 100644
--- a/deepeval/metrics/faithfulness.py
+++ b/deepeval/metrics/faithfulness.py
@@ -20,11 +20,11 @@ class FaithfulnessVerdict(BaseModel):
 class FaithfulnessMetric(BaseMetric):
     def __init__(
         self,
-        minimum_score: float = 0.5,
+        threshold: float = 0.5,
         model: Optional[str] = None,
         include_reason: bool = True,
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         # Don't set self.chat_model when using threading
         self.model = model
         self.include_reason = include_reason
@@ -49,7 +49,7 @@ def measure(self, test_case: LLMTestCase):
             )
             faithfulness_score = self._generate_score()
             self.reason = self._generate_reason(faithfulness_score)
-            self.success = faithfulness_score >= self.minimum_score
+            self.success = faithfulness_score >= self.threshold
             self.score = faithfulness_score
             return self.score
 
@@ -172,7 +172,7 @@ def _generate_verdicts_list(
         return verdicts_list
 
     def is_successful(self) -> bool:
-        self.success = self.score >= self.minimum_score
+        self.success = self.score >= self.threshold
         return self.success
 
     @property
diff --git a/deepeval/metrics/hallucination_metric.py b/deepeval/metrics/hallucination_metric.py
index 67810415a..b9eb3346c 100644
--- a/deepeval/metrics/hallucination_metric.py
+++ b/deepeval/metrics/hallucination_metric.py
@@ -7,9 +7,9 @@
 class HallucinationMetric(BaseMetric, metaclass=Singleton):
     def __init__(
         self,
-        minimum_score: float = 0.5,
+        threshold: float = 0.5,
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
 
     def measure(self, test_case: LLMTestCase):
         if test_case.actual_output is None or test_case.context is None:
@@ -25,12 +25,12 @@ def measure(self, test_case: LLMTestCase):
             if score > max_score:
                 max_score = score
 
-        self.success = max_score >= self.minimum_score
+        self.success = max_score >= self.threshold
         self.score = max_score
         return max_score
 
     def is_successful(self) -> bool:
-        self.success = self.score >= self.minimum_score
+        self.success = self.score >= self.threshold
         return self.success
 
     @property
diff --git a/deepeval/metrics/judgemental_gpt.py b/deepeval/metrics/judgemental_gpt.py
index 5054b8b8c..d707adade 100644
--- a/deepeval/metrics/judgemental_gpt.py
+++ b/deepeval/metrics/judgemental_gpt.py
@@ -24,7 +24,7 @@ def __init__(
         criteria: str,
         evaluation_params: List[LLMTestCaseParams],
         language: Languages = Languages.ENGLISH,
-        minimum_score: float = 0.5,
+        threshold: float = 0.5,
     ):
         if not isinstance(language, Languages):
             raise TypeError("'language' must be an instance of Languages.")
@@ -33,7 +33,7 @@ def __init__(
         self.name = name
         self.evaluation_params = evaluation_params
         self.language = language.value
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.success = None
         self.reason = None
 
@@ -70,10 +70,10 @@ def measure(self, test_case: LLMTestCase):
         )
         self.reason = response.reason
         self.score = response.score / 10
-        self.success = self.score >= self.minimum_score
+        self.success = self.score >= self.threshold
 
         return self.score
 
     def is_successful(self) -> bool:
-        self.success = self.score >= self.minimum_score
+        self.success = self.score >= self.threshold
         return self.success
diff --git a/deepeval/metrics/llm_eval_metric.py b/deepeval/metrics/llm_eval_metric.py
index 461823912..8726f9a79 100644
--- a/deepeval/metrics/llm_eval_metric.py
+++ b/deepeval/metrics/llm_eval_metric.py
@@ -26,7 +26,7 @@ def __init__(
         criteria: Optional[str] = None,
         evaluation_steps: Optional[List[str]] = None,
         model: Optional[str] = None,
-        minimum_score: float = 0.5,
+        threshold: float = 0.5,
     ):
         self.name = name
         self.evaluation_params = evaluation_params
@@ -50,7 +50,7 @@ def __init__(
         self.criteria = criteria
         self.model = model
         self.evaluation_steps = evaluation_steps
-        self.minimum_score = minimum_score
+        self.threshold = threshold
 
     def measure(self, test_case: LLMTestCase):
         """LLM evaluated metric based on the GEval framework: https://arxiv.org/pdf/2303.16634.pdf"""
@@ -73,11 +73,11 @@ def measure(self, test_case: LLMTestCase):
         score, reason = self.evaluate(test_case)
         self.reason = reason
         self.score = float(score) / 10
-        self.success = score >= self.minimum_score
+        self.success = score >= self.threshold
         return self.score
 
     def is_successful(self) -> bool:
-        self.success = self.score >= self.minimum_score
+        self.success = self.score >= self.threshold
         return self.success
 
     def generate_evaluation_steps(self):
diff --git a/deepeval/metrics/non_toxic_metric.py b/deepeval/metrics/non_toxic_metric.py
index 1b41aa414..20c9f78f2 100644
--- a/deepeval/metrics/non_toxic_metric.py
+++ b/deepeval/metrics/non_toxic_metric.py
@@ -13,13 +13,13 @@ def __init__(
         self,
         evaluation_params: List[LLMTestCaseParams],
         model_name: str = "original",
-        minimum_score: float = 0.5,
+        threshold: float = 0.5,
     ):
         if not evaluation_params:
             raise ValueError("evaluation_params cannot be empty or None")
 
         self.evaluation_params = evaluation_params
-        self.minimum_score, self.model_name = minimum_score, model_name
+        self.threshold, self.model_name = threshold, model_name
 
     def __call__(self, test_case: LLMTestCase):
         score = self.measure(test_case.actual_output)
@@ -58,12 +58,12 @@ def measure(self, test_case: LLMTestCase):
         average_score = sum(total_scores) / len(total_scores)
 
         # Check if the average score meets the minimum requirement
-        self.success = average_score >= self.minimum_score
+        self.success = average_score >= self.threshold
         self.score = average_score
         return self.score
 
     def is_successful(self) -> bool:
-        self.success = self.score >= self.minimum_score
+        self.success = self.score >= self.threshold
         return self.success
 
     @property
diff --git a/deepeval/metrics/ragas_metric.py b/deepeval/metrics/ragas_metric.py
index cf27ce146..a1e1e4309 100644
--- a/deepeval/metrics/ragas_metric.py
+++ b/deepeval/metrics/ragas_metric.py
@@ -17,10 +17,10 @@ class RAGASContextualPrecisionMetric(BaseMetric):
 
     def __init__(
         self,
-        minimum_score: float = 0.3,
+        threshold: float = 0.3,
         model: Optional[str] = "gpt-3.5-turbo",
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.model = model
 
     def measure(self, test_case: LLMTestCase):
@@ -57,7 +57,7 @@ def measure(self, test_case: LLMTestCase):
 
         # Ragas only does dataset-level comparisons
         context_precision_score = scores["context_precision"]
-        self.success = context_precision_score >= self.minimum_score
+        self.success = context_precision_score >= self.threshold
         self.score = context_precision_score
         return self.score
 
@@ -74,10 +74,10 @@ class RAGASContextualRelevancyMetric(BaseMetric):
 
     def __init__(
         self,
-        minimum_score: float = 0.3,
+        threshold: float = 0.3,
         model: Optional[str] = "gpt-3.5-turbo",
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.model = model
 
     def measure(self, test_case: LLMTestCase):
@@ -113,7 +113,7 @@ def measure(self, test_case: LLMTestCase):
 
         # Ragas only does dataset-level comparisons
         context_relevancy_score = scores["context_relevancy"]
-        self.success = context_relevancy_score >= self.minimum_score
+        self.success = context_relevancy_score >= self.threshold
         self.score = context_relevancy_score
         return self.score
 
@@ -130,10 +130,10 @@ class RAGASAnswerRelevancyMetric(BaseMetric):
 
     def __init__(
         self,
-        minimum_score: float = 0.3,
+        threshold: float = 0.3,
         model: Optional[str] = "gpt-3.5-turbo",
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.model = model
 
     def measure(self, test_case: LLMTestCase):
@@ -165,7 +165,7 @@ def measure(self, test_case: LLMTestCase):
         dataset = Dataset.from_dict(data)
         scores = evaluate(dataset, metrics=[answer_relevancy])
         answer_relevancy_score = scores["answer_relevancy"]
-        self.success = answer_relevancy_score >= self.minimum_score
+        self.success = answer_relevancy_score >= self.threshold
         self.score = answer_relevancy_score
         return self.score
 
@@ -180,10 +180,10 @@ def __name__(self):
 class RAGASFaithfulnessMetric(BaseMetric):
     def __init__(
         self,
-        minimum_score: float = 0.3,
+        threshold: float = 0.3,
         model: Optional[str] = "gpt-3.5-turbo",
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.model = model
 
     def measure(self, test_case: LLMTestCase):
@@ -215,7 +215,7 @@ def measure(self, test_case: LLMTestCase):
         dataset = Dataset.from_dict(data)
         scores = evaluate(dataset, metrics=[faithfulness])
         faithfulness_score = scores["faithfulness"]
-        self.success = faithfulness_score >= self.minimum_score
+        self.success = faithfulness_score >= self.threshold
         self.score = faithfulness_score
         return self.score
 
@@ -232,10 +232,10 @@ class RAGASContextualRecallMetric(BaseMetric):
 
     def __init__(
         self,
-        minimum_score: float = 0.3,
+        threshold: float = 0.3,
         model: Optional[str] = "gpt-3.5-turbo",
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.model = model
 
     def measure(self, test_case: LLMTestCase):
@@ -267,7 +267,7 @@ def measure(self, test_case: LLMTestCase):
         dataset = Dataset.from_dict(data)
         scores = evaluate(dataset, [context_recall])
         context_recall_score = scores["context_recall"]
-        self.success = context_recall_score >= self.minimum_score
+        self.success = context_recall_score >= self.threshold
         self.score = context_recall_score
         return self.score
 
@@ -284,10 +284,10 @@ class RAGASHarmfulnessMetric(BaseMetric):
 
     def __init__(
         self,
-        minimum_score: float = 0.3,
+        threshold: float = 0.3,
         model: Optional[str] = "gpt-3.5-turbo",
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.model = model
 
     def measure(self, test_case: LLMTestCase):
@@ -320,7 +320,7 @@ def measure(self, test_case: LLMTestCase):
         dataset = Dataset.from_dict(data)
         scores = evaluate(dataset, [harmfulness])
         harmfulness_score = scores["harmfulness"]
-        self.success = harmfulness_score >= self.minimum_score
+        self.success = harmfulness_score >= self.threshold
         self.score = harmfulness_score
         return self.score
 
@@ -337,10 +337,10 @@ class RAGASCoherenceMetric(BaseMetric):
 
     def __init__(
         self,
-        minimum_score: float = 0.3,
+        threshold: float = 0.3,
         model: Optional[str] = "gpt-3.5-turbo",
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.model = model
 
     def measure(self, test_case: LLMTestCase):
@@ -371,7 +371,7 @@ def measure(self, test_case: LLMTestCase):
         dataset = Dataset.from_dict(data)
         scores = evaluate(dataset, [coherence])
         coherence_score = scores["coherence"]
-        self.success = coherence_score >= self.minimum_score
+        self.success = coherence_score >= self.threshold
         self.score = coherence_score
         return self.score
 
@@ -388,10 +388,10 @@ class RAGASMaliciousnessMetric(BaseMetric):
 
     def __init__(
         self,
-        minimum_score: float = 0.3,
+        threshold: float = 0.3,
         model: Optional[str] = "gpt-3.5-turbo",
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.model = model
 
     def measure(self, test_case: LLMTestCase):
@@ -423,7 +423,7 @@ def measure(self, test_case: LLMTestCase):
         dataset = Dataset.from_dict(data)
         scores = evaluate(dataset, [maliciousness])
         maliciousness_score = scores["maliciousness"]
-        self.success = maliciousness_score >= self.minimum_score
+        self.success = maliciousness_score >= self.threshold
         self.score = maliciousness_score
         return self.score
 
@@ -440,10 +440,10 @@ class RAGASCorrectnessMetric(BaseMetric):
 
     def __init__(
         self,
-        minimum_score: float = 0.3,
+        threshold: float = 0.3,
         model: Optional[str] = "gpt-3.5-turbo",
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.model = model
 
     def measure(self, test_case: LLMTestCase):
@@ -475,7 +475,7 @@ def measure(self, test_case: LLMTestCase):
         dataset = Dataset.from_dict(data)
         scores = evaluate(dataset, metrics=[correctness])
         correctness_score = scores["correctness"]
-        self.success = correctness_score >= self.minimum_score
+        self.success = correctness_score >= self.threshold
         self.score = correctness_score
         return self.score
 
@@ -492,10 +492,10 @@ class RAGASConcisenessMetric(BaseMetric):
 
     def __init__(
         self,
-        minimum_score: float = 0.3,
+        threshold: float = 0.3,
         model: Optional[str] = "gpt-3.5-turbo",
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.model = model
 
     def measure(self, test_case: LLMTestCase):
@@ -526,7 +526,7 @@ def measure(self, test_case: LLMTestCase):
         dataset = Dataset.from_dict(data)
         scores = evaluate(dataset, metrics=[conciseness])
         conciseness_score = scores["conciseness"]
-        self.success = conciseness_score >= self.minimum_score
+        self.success = conciseness_score >= self.threshold
         self.score = conciseness_score
         return self.score
 
@@ -543,10 +543,10 @@ class RagasMetric(BaseMetric):
 
     def __init__(
         self,
-        minimum_score: float = 0.3,
+        threshold: float = 0.3,
         model: Optional[str] = "gpt-3.5-turbo",
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.model = model
 
     def measure(self, test_case: LLMTestCase):
@@ -594,7 +594,7 @@ def measure(self, test_case: LLMTestCase):
                 1.0 / score for score in score_metadata.values()
             )
 
-        self.success = ragas_score >= self.minimum_score
+        self.success = ragas_score >= self.threshold
         self.score = ragas_score
         self.score_metadata = score_metadata
         return self.score
diff --git a/deepeval/metrics/summarization.py b/deepeval/metrics/summarization.py
index d3ee73101..c5c09ed1d 100644
--- a/deepeval/metrics/summarization.py
+++ b/deepeval/metrics/summarization.py
@@ -21,12 +21,12 @@ class ScoreType(Enum):
 class SummarizationMetric(BaseMetric):
     def __init__(
         self,
-        minimum_score: float = 0.5,
+        threshold: float = 0.5,
         model: Optional[str] = None,
         n: Optional[int] = 5,
         assessment_questions: Optional[List[str]] = None,
     ):
-        self.minimum_score = minimum_score
+        self.threshold = threshold
         self.model = model
         self.assessment_questions = assessment_questions
         self.n = n
@@ -54,7 +54,7 @@ def measure(self, test_case: LLMTestCase):
 
         summarization_score = min(alignment_score, inclusion_score)
 
-        self.success = summarization_score >= self.minimum_score
+        self.success = summarization_score >= self.threshold
         self.score_metadata = {
             "Alignment": alignment_score,
             "Inclusion": inclusion_score,
@@ -134,7 +134,7 @@ def get_answer(self, question: str, text: str) -> str:
         return res.content
 
     def is_successful(self) -> bool:
-        self.success = self.score >= self.minimum_score
+        self.success = self.score >= self.threshold
         return self.success
 
     @property
diff --git a/deepeval/metrics/unbias_metric.py b/deepeval/metrics/unbias_metric.py
index 9511edbad..fb137caab 100644
--- a/deepeval/metrics/unbias_metric.py
+++ b/deepeval/metrics/unbias_metric.py
@@ -15,18 +15,18 @@ def __init__(
         self,
         evaluation_params: List[LLMTestCaseParams],
         model_name: str = "original",
-        minimum_score: float = 0.5,
+        threshold: float = 0.5,
     ):  # see paper for rationale https://arxiv.org/pdf/2208.05777.pdf
         if not evaluation_params:
             raise ValueError("evaluation_params cannot be empty or None")
 
         self.evaluation_params = evaluation_params
         self.model_name = model_name
-        self.minimum_score = minimum_score
+        self.threshold = threshold
 
     def __call__(self, output, expected_output, query: Optional[str] = "-"):
         score = self.measure(output, expected_output)
-        success = score >= self.minimum_score
+        success = score >= self.threshold
         return score
 
     def measure(self, test_case: LLMTestCase, return_all_scores: bool = False):
@@ -61,7 +61,7 @@ def measure(self, test_case: LLMTestCase, return_all_scores: bool = False):
         # Calculate the average score
         average_score = total_score / len(self.evaluation_params)
 
-        self.success = average_score > self.minimum_score
+        self.success = average_score > self.threshold
         self.score = average_score
 
         if return_all_scores:
@@ -70,7 +70,7 @@ def measure(self, test_case: LLMTestCase, return_all_scores: bool = False):
         return average_score
 
     def is_successful(self) -> bool:
-        self.success = self.score >= self.minimum_score
+        self.success = self.score >= self.threshold
         return self.success
 
     @property
diff --git a/deepeval/test_run/api.py b/deepeval/test_run/api.py
index 3db64aec3..3a3f37b5b 100644
--- a/deepeval/test_run/api.py
+++ b/deepeval/test_run/api.py
@@ -5,7 +5,7 @@
 class MetricsMetadata(BaseModel):
     metric: str
     score: float
-    minimum_score: float = Field(None, alias="minimumScore")
+    threshold: float
     reason: Optional[str] = None
 
 
diff --git a/deepeval/test_run/test_run.py b/deepeval/test_run/test_run.py
index 3fe5a0092..3a1c41ffa 100644
--- a/deepeval/test_run/test_run.py
+++ b/deepeval/test_run/test_run.py
@@ -84,26 +84,25 @@ def add_llm_test_case(
     ):
         # Check if test case with the same ID already exists
         test_case_id = id(test_case)
-        existing_test_case: LLMTestCase = self.dict_test_cases.get(
+        existing_test_case: APITestCase = self.dict_test_cases.get(
             test_case_id, None
         )
 
         metrics_metadata = MetricsMetadata(
             metric=metric.__name__,
             score=metric.score,
-            minimumScore=metric.minimum_score,
+            threshold=metric.threshold,
             reason=metric.reason,
+            # success=metric.is_successful()
         )
 
         if existing_test_case:
             # If it exists, append the metrics to the existing test case
             existing_test_case.metrics_metadata.append(metrics_metadata)
-            success = all(
-                [
-                    metric.is_successful()
-                    for metric in existing_test_case.metrics_metadata
-                ]
-            )
+            if metric.is_successful() and existing_test_case.success == True:
+                success = True
+            else:
+                success = False
             # Update the success status
             existing_test_case.success = success
         else:
@@ -215,7 +214,7 @@ def display_results_table(self, test_run: TestRun):
                 test_case_name += f" ({test_case.id})"
 
             for metric_metadata in test_case.metrics_metadata:
-                if metric_metadata.score >= metric_metadata.minimum_score:
+                if metric_metadata.score >= metric_metadata.threshold:
                     pass_count += 1
                 else:
                     fail_count += 1
@@ -229,7 +228,7 @@ def display_results_table(self, test_run: TestRun):
             )
 
             for metric_metadata in test_case.metrics_metadata:
-                if metric_metadata.score >= metric_metadata.minimum_score:
+                if metric_metadata.score >= metric_metadata.threshold:
                     status = "[green]PASSED[/green]"
                 else:
                     status = "[red]FAILED[/red]"
@@ -237,7 +236,7 @@ def display_results_table(self, test_run: TestRun):
                 table.add_row(
                     "",
                     str(metric_metadata.metric),
-                    f"{round(metric_metadata.score,2)} (threshold={metric_metadata.minimum_score}, reason={metric_metadata.reason})",
+                    f"{round(metric_metadata.score,2)} (threshold={metric_metadata.threshold}, reason={metric_metadata.reason})",
                     status,
                     "",
                 )
diff --git a/docs/docs/confident-ai-debug-evaluations.mdx b/docs/docs/confident-ai-debug-evaluations.mdx
index c4aa15364..13867035c 100644
--- a/docs/docs/confident-ai-debug-evaluations.mdx
+++ b/docs/docs/confident-ai-debug-evaluations.mdx
@@ -152,7 +152,7 @@ def test_hallucination():
     ]
     input = "What are the requimrents to be president?"
 
-    metric = HallucinationMetric(minimum_score=0.8)
+    metric = HallucinationMetric(threshold=0.8)
     test_case = LLMTestCase(
         input=input,
         actual_output=chatbot.query(user_input=input),
diff --git a/docs/docs/confident-ai-evaluate-datasets.mdx b/docs/docs/confident-ai-evaluate-datasets.mdx
index b5b5b6b7a..712b6ffdf 100644
--- a/docs/docs/confident-ai-evaluate-datasets.mdx
+++ b/docs/docs/confident-ai-evaluate-datasets.mdx
@@ -111,7 +111,7 @@ dataset.pull(alias="My Confident Dataset")
     dataset,
 )
 def test_customer_chatbot(test_case: LLMTestCase):
-    hallucination_metric = HallucinationMetric(minimum_score=0.3)
+    hallucination_metric = HallucinationMetric(threshold=0.3)
     assert_test(test_case, [hallucination_metric])
 ```
 
@@ -128,7 +128,7 @@ from deepeval import evaluate
 from deepeval.metrics import HallucinationMetric
 from deepeval.dataset import EvaluationDataset
 
-hallucination_metric = HallucinationMetric(minimum_score=0.3)
+hallucination_metric = HallucinationMetric(threshold=0.3)
 
 # Initialize empty dataset object and pull from Confident
 dataset = EvaluationDataset()
diff --git a/docs/docs/evaluation-datasets.mdx b/docs/docs/evaluation-datasets.mdx
index b34503ea5..09fc2bfac 100644
--- a/docs/docs/evaluation-datasets.mdx
+++ b/docs/docs/evaluation-datasets.mdx
@@ -157,8 +157,8 @@ dataset = EvaluationDataset(test_cases=[...])
     dataset,
 )
 def test_customer_chatbot(test_case: LLMTestCase):
-    hallucination_metric = HallucinationMetric(minimum_score=0.3)
-    answer_relevancy_metric = AnswerRelevancyMetric(minimum_score=0.5)
+    hallucination_metric = HallucinationMetric(threshold=0.3)
+    answer_relevancy_metric = AnswerRelevancyMetric(threshold=0.5)
     assert_test(test_case, [hallucination_metric, answer_relevancy_metric])
 
 
@@ -186,8 +186,8 @@ from deepeval.dataset import EvaluationDataset
 
 dataset = EvaluationDataset(test_cases=[...])
 
-hallucination_metric = HallucinationMetric(minimum_score=0.3)
-answer_relevancy_metric = AnswerRelevancyMetric(minimum_score=0.5)
+hallucination_metric = HallucinationMetric(threshold=0.3)
+answer_relevancy_metric = AnswerRelevancyMetric(threshold=0.5)
 
 dataset.evaluate([hallucination_metric, answer_relevancy_metric])
 
diff --git a/docs/docs/evaluation-test-cases.mdx b/docs/docs/evaluation-test-cases.mdx
index e21f8e3c8..270f3be2d 100644
--- a/docs/docs/evaluation-test-cases.mdx
+++ b/docs/docs/evaluation-test-cases.mdx
@@ -202,7 +202,7 @@ test_case = LLMTestCase(
     context=context
 )
 
-metric = HallucinationMetric(minimum_score=0.7)
+metric = HallucinationMetric(threshold=0.7)
 run_test(test_case, [metric])
 ```
 
@@ -245,7 +245,7 @@ def test_case_1():
         expected_output="Me, ruff!",
         context=context
     )
-    metric = HallucinationMetric(minimum_score=0.7)
+    metric = HallucinationMetric(threshold=0.7)
     assert_test(test_case, metrics=[metric])
 
 def test_case_2():
@@ -259,7 +259,7 @@ def test_case_2():
         expected_output="Me, ruff!",
         context=context
     )
-    metric = HallucinationMetric(minimum_score=0.7)
+    metric = HallucinationMetric(threshold=0.7)
     assert_test(test_case, metrics=[metric])
 ```
 
@@ -307,7 +307,7 @@ second_test_case = LLMTestCase(
 
 test_cases = [first_test_case, second_test_case]
 
-metric = HallucinationMetric(minimum_score=0.7)
+metric = HallucinationMetric(threshold=0.7)
 evaluate(test_cases, [metric])
 ```
 
diff --git a/docs/docs/getting-started.mdx b/docs/docs/getting-started.mdx
index a34f15793..93fb66a4e 100644
--- a/docs/docs/getting-started.mdx
+++ b/docs/docs/getting-started.mdx
@@ -60,7 +60,7 @@ def test_hallucination():
 
     # Replace this with the actual output of your LLM application
     actual_output = "We offer a 30-day full refund at no extra cost."
-    hallucination_metric = HallucinationMetric(minimum_score=0.7)
+    hallucination_metric = HallucinationMetric(threshold=0.7)
     test_case = LLMTestCase(input=input, actual_output=actual_output, context=context)
     assert_test(test_case, [hallucination_metric])
 ```
@@ -74,8 +74,8 @@ deepeval test run test_example.py
 **Congratulations! Your test case should have passed ✅** Let's breakdown what happened.
 
 - The variable `input` mimics a user input, and `actual_output` is a placeholder for what your application's supposed to output based on this input.
-- The variable `context` contains the relevant information from your knowledge base, and `HallucinationMetric(minimum_score=0.7)` is an default metric provided by DeepEval for you to evaluate how factually correct your application's output is based on the provided context.
-- All metric scores range from 0 - 1, which the `minimum_score=0.7` threshold ultimately determines if your test have passed or not.
+- The variable `context` contains the relevant information from your knowledge base, and `HallucinationMetric(threshold=0.7)` is an default metric provided by DeepEval for you to evaluate how factually correct your application's output is based on the provided context.
+- All metric scores range from 0 - 1, which the `threshold=0.7` threshold ultimately determines if your test have passed or not.
 
 :::note
 `deepeval`'s default metrics are not evaluated using LLMs. Keep reading this tutorial to learn how to create an LLM based evaluation metric.
@@ -112,7 +112,7 @@ def test_summarization():
         name="Summarization",
         criteria="Summarization - determine if the actual output is an accurate and concise summarization of the input.",
         evaluation_params=[LLMTestCaseParams.INPUT, LLMTestCaseParams.ACTUAL_OUTPUT],
-        minimum_score=0.5
+        threshold=0.5
     )
     test_case = LLMTestCase(input=input, actual_output=actual_output)
     assert_test(test_case, [summarization_metric])
@@ -130,10 +130,10 @@ from deepeval.metrics import BaseMetric
 class LengthMetric(BaseMetric):
     # This metric checks if the output length is greater than 10 characters
     def __init__(self, max_length: int=10):
-        self.minimum_score = max_length
+        self.threshold = max_length
 
     def measure(self, test_case: LLMTestCase):
-        self.success = len(test_case.actual_output) > self.minimum_score
+        self.success = len(test_case.actual_output) > self.threshold
         if self.success:
             score = 1
         else:
@@ -184,13 +184,13 @@ def test_everything():
 
     # Replace this with the actual output of your LLM application
     actual_output = "We offer a 30-day full refund at no extra cost."
-    hallucination_metric = HallucinationMetric(minimum_score=0.7)
+    hallucination_metric = HallucinationMetric(threshold=0.7)
     length_metric = LengthMetric(max_length=10)
     summarization_metric = LLMEvalMetric(
         name="Summarization",
         criteria="Summarization - determine if the actual output is an accurate and concise summarization of the input.",
         evaluation_params=[LLMTestCaseParams.INPUT, LLMTestCaseParams.ACTUAL_OUTPUT],
-        minimum_score=0.5
+        threshold=0.5
     )
 
     test_case = LLMTestCase(input=input, actual_output=actual_output, context=context)
@@ -262,7 +262,7 @@ def test_hallucination(test_case: dict):
     # Replace this with the actual output of your LLM application
     actual_output = "We offer a 30-day full refund at no extra cost."
 
-    hallucination_metric = HallucinationMetric(minimum_score=0.7)
+    hallucination_metric = HallucinationMetric(threshold=0.7)
     test_case = LLMTestCase(
         input=input,
         actual_output=actual_output,
diff --git a/docs/docs/metrics-answer-relevancy.mdx b/docs/docs/metrics-answer-relevancy.mdx
index 9239f517a..2dc996da7 100644
--- a/docs/docs/metrics-answer-relevancy.mdx
+++ b/docs/docs/metrics-answer-relevancy.mdx
@@ -28,7 +28,7 @@ actual_output = "We offer a 30-day full refund at no extra cost."
 retrieval_context = ["All customers are eligible for a 30 day full refund at no extra cost."]
 
 metric = AnswerRelevancyMetric(
-    minimum_score=0.7,
+    threshold=0.7,
     model="gpt-4",
     include_reason=True
 )
diff --git a/docs/docs/metrics-bias.mdx b/docs/docs/metrics-bias.mdx
index 2c766ad6d..4cab96246 100644
--- a/docs/docs/metrics-bias.mdx
+++ b/docs/docs/metrics-bias.mdx
@@ -39,7 +39,7 @@ actual_output = "We offer a 30-day full refund at no extra cost."
 
 metric = UnBiasedMetric(
     evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT],
-    minimum_score=0.5
+    threshold=0.5
 )
 test_case = LLMTestCase(
     input="What if these shoes don't fit?",
diff --git a/docs/docs/metrics-contextual-precision.mdx b/docs/docs/metrics-contextual-precision.mdx
index 695ea5d93..76ed83c42 100644
--- a/docs/docs/metrics-contextual-precision.mdx
+++ b/docs/docs/metrics-contextual-precision.mdx
@@ -28,7 +28,7 @@ actual_output = "We offer a 30-day full refund at no extra cost."
 retrieval_context = ["All customers are eligible for a 30 day full refund at no extra cost."]
 
 metric = ContextualPrecisionMetric(
-    minimum_score=0.7,
+    threshold=0.7,
     model="gpt-4",
     include_reason=True
 )
diff --git a/docs/docs/metrics-contextual-recall.mdx b/docs/docs/metrics-contextual-recall.mdx
index b6afe519d..5be7ab128 100644
--- a/docs/docs/metrics-contextual-recall.mdx
+++ b/docs/docs/metrics-contextual-recall.mdx
@@ -32,7 +32,7 @@ expected_output = "You are eligible for a 30 day full refund at no extra cost."
 retrieval_context = ["All customers are eligible for a 30 day full refund at no extra cost."]
 
 metric = ContextualRecallMetric(
-    minimum_score=0.7,
+    threshold=0.7,
     model="gpt-4",
     include_reason=True
 )
diff --git a/docs/docs/metrics-contextual-relevancy.mdx b/docs/docs/metrics-contextual-relevancy.mdx
index 2e52fe43d..4fa3604ba 100644
--- a/docs/docs/metrics-contextual-relevancy.mdx
+++ b/docs/docs/metrics-contextual-relevancy.mdx
@@ -32,7 +32,7 @@ actual_output = "We offer a 30-day full refund at no extra cost."
 retrieval_context = ["All customers are eligible for a 30 day full refund at no extra cost."]
 
 metric = ContextualRelevancyMetric(
-    minimum_score=0.7,
+    threshold=0.7,
     model="gpt-4",
     include_reason=True
 )
diff --git a/docs/docs/metrics-custom.mdx b/docs/docs/metrics-custom.mdx
index 7e24304c9..2cadb8f3e 100644
--- a/docs/docs/metrics-custom.mdx
+++ b/docs/docs/metrics-custom.mdx
@@ -27,11 +27,11 @@ from deepeval.test_case import LLMTestCase
 class LengthMetric(BaseMetric):
     # This metric checks if the output length is greater than 10 characters
     def __init__(self, max_length: int=10):
-        self.minimum_score = max_length
+        self.threshold = max_length
 
     def measure(self, test_case: LLMTestCase):
         # Set self.success and self.score in the "measure" method
-        self.success = len(test_case.actual_output) > self.minimum_score
+        self.success = len(test_case.actual_output) > self.threshold
         if self.success:
             self.score = 1
         else:
@@ -52,7 +52,7 @@ class LengthMetric(BaseMetric):
 
 Notice that a few things has happened:
 
-- `self.minimum_score` was set in `__init__()`
+- `self.threshold` was set in `__init__()`
 - `self.success`, `self.score`, and `self.reason` was set in `measure()`
 - `measure()` takes in an `LLMTestCase`
 - `self.is_successful()` simply returns the success status
diff --git a/docs/docs/metrics-faithfulness.mdx b/docs/docs/metrics-faithfulness.mdx
index 6a2f8fefb..efed3eb5f 100644
--- a/docs/docs/metrics-faithfulness.mdx
+++ b/docs/docs/metrics-faithfulness.mdx
@@ -32,7 +32,7 @@ actual_output = "We offer a 30-day full refund at no extra cost."
 retrieval_context = ["All customers are eligible for a 30 day full refund at no extra cost."]
 
 metric = FaithfulnessMetric(
-    minimum_score=0.7,
+    threshold=0.7,
     model="gpt-4",
     include_reason=True
 )
diff --git a/docs/docs/metrics-hallucination.mdx b/docs/docs/metrics-hallucination.mdx
index 75b294266..8ed90e46a 100644
--- a/docs/docs/metrics-hallucination.mdx
+++ b/docs/docs/metrics-hallucination.mdx
@@ -40,7 +40,7 @@ test_case = LLMTestCase(
     actual_output=actual_output,
     context=context
 )
-metric = HallucinationMetric(minimum_score=0.5)
+metric = HallucinationMetric(threshold=0.5)
 
 metric.measure(test_case)
 print(metric.score)
diff --git a/docs/docs/metrics-introduction.mdx b/docs/docs/metrics-introduction.mdx
index f2cb51ecb..40eaf9115 100644
--- a/docs/docs/metrics-introduction.mdx
+++ b/docs/docs/metrics-introduction.mdx
@@ -32,7 +32,7 @@ A **_custom_** metric is a type of metric you can easily create by implementing
 - are extra reliable as LLMs are only used for extremely specific tasks during evaluation to greatly reduce stochasticity and flakiness in scores.
 - provide a comprehensive reason for the scores computed.
 
-All of `deepeval`'s default metrics output a score between 0-1, and require a `minimum_score` argument to instantiate. A default metric is only successful if the evaluation score is equal to or greater than `minimum_score`.
+All of `deepeval`'s default metrics output a score between 0-1, and require a `threshold` argument to instantiate. A default metric is only successful if the evaluation score is equal to or greater than `threshold`.
 
 :::info
 All GPT models from OpenAI are available for LLM-Evals (metrics that use LLMs for evaluation). You can switch between models by providing a string corresponding to OpenAI's model names via the optional `model` argument when instantiating an LLM-Eval.
@@ -82,7 +82,7 @@ All metrics in `deepeval`, including [custom metrics that you create](metrics-cu
 - can have its score accessed via `metric.score`
 - can have its status accessed via `metric.is_successful()`
 - can be used to evaluate test cases or entire datasets, with or without Pytest.
-- has a `minimum_score` that acts as the threshold for success. `metric.is_successful()` is only true if `metric.score` >= `minimum_score`.
+- has a `threshold` that acts as the threshold for success. `metric.is_successful()` is only true if `metric.score` >= `threshold`.
 
 In additional, most LLM-Evals in `deepeval` offers a reason for its score, which can be accessed via `metric.reason`.
 
@@ -99,8 +99,8 @@ from deepeval.test_case import LLMTestCase
 # Initialize a test case
 test_case = LLMTestCase(input="...", actual_output="...")
 
-# Initialize metric with minimum_score
-metric = AnswerRelevancyMetric(minimum_score=0.5)
+# Initialize metric with threshold
+metric = AnswerRelevancyMetric(threshold=0.5)
 ```
 
 Using this metric, you can either evaluate a test case using `deepeval test run`:
diff --git a/docs/docs/metrics-judgemental.mdx b/docs/docs/metrics-judgemental.mdx
index 933b1e3d9..cee908375 100644
--- a/docs/docs/metrics-judgemental.mdx
+++ b/docs/docs/metrics-judgemental.mdx
@@ -39,7 +39,7 @@ code_correctness_metric = JudgementalGPT(
     criteria="Code Correctness - determine whether the code in the 'actual output' produces a valid JSON.",
     evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT],
     language=Languages.SPANISH,
-    minimum_score=0.5,
+    threshold=0.5,
 )
 ```
 
@@ -49,7 +49,7 @@ Under the hood, `JudgementalGPT` sends a request to Confident AI's servers that
 - `criteria`: a description outlining the specific evaluation aspects for each test case.
 - `evaluation_params`: a list of type `LLMTestCaseParams`. Include only the parameters that are relevant for evaluation.
 - [Optional] `language`: type `Language`, specifies what language to return the reasoning in.
-- [Optional] `minimum_score`: the passing threshold, defaulted to 0.5.
+- [Optional] `threshold`: the passing threshold, defaulted to 0.5.
 
 Similar to `LLMEvalMetric`, you can access the judgemental `score` and `reason` for `JudgementalGPT`:
 
diff --git a/docs/docs/metrics-llm-evals.mdx b/docs/docs/metrics-llm-evals.mdx
index 814f035eb..a7cdaff7c 100644
--- a/docs/docs/metrics-llm-evals.mdx
+++ b/docs/docs/metrics-llm-evals.mdx
@@ -38,14 +38,14 @@ There are three mandatory and two optional parameters required when instantiatin
 - `criteria`: a description outlining the specific evaluation aspects for each test case.
 - `evaluation_params`: a list of type `LLMTestCaseParams`. Include only the parameters that are relevant for evaluation.
 - [Optional] `evaluation_steps`: a list of strings outlining the exact steps the LLM should take for evaluation. You can only provide either `evaluation_steps` or `criteria`, and not both.
-- [Optional] `minimum_score`: the passing threshold, defaulted to 0.5.
+- [Optional] `threshold`: the passing threshold, defaulted to 0.5.
 - [Optional] `model`: the model name. This is defaulted to 'gpt-4-1106-preview' and we currently only support models from (Azure) OpenAI.
 
 :::danger
 For accurate and valid results, only the parameters that are mentioned in `criteria` should be included as a member of `evaluation_params`.
 :::
 
-As mentioned in the [metrics introduction section](metrics-introduction), all of `deepeval`'s metrics return a score ranging from 0 - 1, and a metric is only successful if the evaluation score is equal to or greater than `minimum_score`. An `LLMEvalMetric` is no exception. You can access the `score` and `reason` for each individual `LLMEvalMetric`:
+As mentioned in the [metrics introduction section](metrics-introduction), all of `deepeval`'s metrics return a score ranging from 0 - 1, and a metric is only successful if the evaluation score is equal to or greater than `threshold`. An `LLMEvalMetric` is no exception. You can access the `score` and `reason` for each individual `LLMEvalMetric`:
 
 ```python
 from deepeval.test_case import LLMTestCase
diff --git a/docs/docs/metrics-ragas.mdx b/docs/docs/metrics-ragas.mdx
index 0ec7c2441..1fad689d7 100644
--- a/docs/docs/metrics-ragas.mdx
+++ b/docs/docs/metrics-ragas.mdx
@@ -42,7 +42,7 @@ expected_output = "You are eligible for a 30 day full refund at no extra cost."
 # Replace this with the actual retrieved context from your RAG pipeline
 retrieval_context = ["All customers are eligible for a 30 day full refund at no extra cost."]
 
-metric = RagasMetric(minimum_score=0.5, model="gpt-3.5-turbo")
+metric = RagasMetric(threshold=0.5, model="gpt-3.5-turbo")
 test_case = LLMTestCase(
     input="What if these shoes don't fit?",
     actual_output=actual_output,
diff --git a/docs/docs/metrics-summarization.mdx b/docs/docs/metrics-summarization.mdx
index 322b488c8..9b06deca9 100644
--- a/docs/docs/metrics-summarization.mdx
+++ b/docs/docs/metrics-summarization.mdx
@@ -46,7 +46,7 @@ from deepeval.test_case import LLMTestCase
 
 test_case = LLMTestCase(input=input, actual_output=actual_output)
 metric = SummarizationMetric(
-    minimum_score=0.5,
+    threshold=0.5,
     model="gpt-4",
     assessment_questions=[
         "Is the inclusion score based on a percentage of 'yes' answers?",
@@ -64,7 +64,7 @@ evaluate([test_case], [metric])
 
 There are five optional parameters when instantiating an `SummarizationMetric` class:
 
-- [Optional] `minimum_score`: the passing threshold, defaulted to 0.5.
+- [Optional] `threshold`: the passing threshold, defaulted to 0.5.
 - [Optional] `model`: the model name. This is defaulted to 'gpt-4-1106-preview' and we currently only support models from (Azure) OpenAI.
 - [Optional] `assessment_questions`: a list of **close-ended questions that can be answered with either a 'yes' or a 'no'**. These are questions you want your summary to be able to ideally answer, and is especially helpful if you already know what a good summary for your use case looks like. If `assessment_questions` is not provided, we will generate a set of `assessment_questions` for you at evaluation time. The `assessment_questions` are used to calculate the `inclusion_score`.
 - [Optional] `n`: the number of questions to generate when calculating the `alignment_score` and `inclusion_score`, defaulted to 5.
diff --git a/docs/docs/metrics-toxicity.mdx b/docs/docs/metrics-toxicity.mdx
index 0af3e030a..8ab957d52 100644
--- a/docs/docs/metrics-toxicity.mdx
+++ b/docs/docs/metrics-toxicity.mdx
@@ -35,7 +35,7 @@ actual_output = "We offer a 30-day full refund at no extra cost."
 
 metric = UnBiasedMetric(
     evaluation_params=[LLMTestCaseParams.INPUT, LLMTestCaseParams.ACTUAL_OUTPUT],
-    minimum_score=0.5
+    threshold=0.5
 )
 test_case = LLMTestCase(
     input="What if these shoes don't fit?",
diff --git a/examples/getting_started/test_example.py b/examples/getting_started/test_example.py
index d910a2526..ff66655f5 100644
--- a/examples/getting_started/test_example.py
+++ b/examples/getting_started/test_example.py
@@ -15,7 +15,7 @@ def test_hallucination():
 
     # Replace this with the actual output from your LLM application
     actual_output = "We offer a 30-day full refund at no extra cost."
-    hallucination_metric = HallucinationMetric(minimum_score=0.7)
+    hallucination_metric = HallucinationMetric(threshold=0.7)
     test_case = LLMTestCase(
         input=input, actual_output=actual_output, context=context
     )
@@ -35,7 +35,7 @@ def test_summarization():
             LLMTestCaseParams.INPUT,
             LLMTestCaseParams.ACTUAL_OUTPUT,
         ],
-        minimum_score=0.5,
+        threshold=0.5,
     )
     test_case = LLMTestCase(input=input, actual_output=actual_output)
     assert_test(test_case, [summarization_metric])
@@ -44,10 +44,10 @@ def test_summarization():
 class LengthMetric(BaseMetric):
     # This metric checks if the output length is greater than 10 characters
     def __init__(self, max_length: int = 10):
-        self.minimum_score = max_length
+        self.threshold = max_length
 
     def measure(self, test_case: LLMTestCase):
-        self.success = len(test_case.actual_output) > self.minimum_score
+        self.success = len(test_case.actual_output) > self.threshold
         if self.success:
             self.score = 1
         else:
@@ -80,7 +80,7 @@ def test_everything():
 
     # Replace this with the actual output from your LLM application
     actual_output = "We offer a 30-day full refund at no extra cost."
-    hallucination_metric = HallucinationMetric(minimum_score=0.7)
+    hallucination_metric = HallucinationMetric(threshold=0.7)
     length_metric = LengthMetric(max_length=10)
     summarization_metric = LLMEvalMetric(
         name="Summarization",
@@ -89,7 +89,7 @@ def test_everything():
             LLMTestCaseParams.INPUT,
             LLMTestCaseParams.ACTUAL_OUTPUT,
         ],
-        minimum_score=0.5,
+        threshold=0.5,
     )
 
     test_case = LLMTestCase(
diff --git a/examples/tracing/test_chatbot.py b/examples/tracing/test_chatbot.py
index 62177cf5d..c8a55ee8b 100644
--- a/examples/tracing/test_chatbot.py
+++ b/examples/tracing/test_chatbot.py
@@ -83,7 +83,7 @@ def test_hallucination():
     ]
     input = "What are the requimrents to be president?"
 
-    metric = HallucinationMetric(minimum_score=0.8)
+    metric = HallucinationMetric(threshold=0.8)
     test_case = LLMTestCase(
         input=input,
         actual_output=chatbot.query(user_input=input),
diff --git a/tests/test_answer_relevancy.py b/tests/test_answer_relevancy.py
index 8d828cc82..2863bf091 100644
--- a/tests/test_answer_relevancy.py
+++ b/tests/test_answer_relevancy.py
@@ -47,7 +47,7 @@
 
 @pytest.mark.skip(reason="openai is expensive")
 def test_answer_relevancy():
-    metric = AnswerRelevancyMetric(minimum_score=0.5)
+    metric = AnswerRelevancyMetric(threshold=0.5)
     test_case = LLMTestCase(
         input=question,
         actual_output=answer,
diff --git a/tests/test_contextual_precision.py b/tests/test_contextual_precision.py
index 6bd36a189..0916f167a 100644
--- a/tests/test_contextual_precision.py
+++ b/tests/test_contextual_precision.py
@@ -61,7 +61,7 @@
 
 @pytest.mark.skip(reason="openai is expensive")
 def test_contextual_precision():
-    metric = ContextualPrecisionMetric(minimum_score=0.5)
+    metric = ContextualPrecisionMetric(threshold=0.5)
     test_case = LLMTestCase(
         input=question,
         actual_output=answer,
diff --git a/tests/test_custom_metric.py b/tests/test_custom_metric.py
index f9e75db0a..407b378e4 100644
--- a/tests/test_custom_metric.py
+++ b/tests/test_custom_metric.py
@@ -9,14 +9,14 @@
 class LengthMetric(BaseMetric):
     """This metric checks if the output is more than 3 letters"""
 
-    def __init__(self, minimum_score: int = 3):
-        self.minimum_score = minimum_score
+    def __init__(self, threshold: int = 3):
+        self.threshold = threshold
 
     def measure(self, test_case: LLMTestCase):
         # sends to server
         text = test_case.actual_output
         score = len(text)
-        self.success = score > self.minimum_score
+        self.success = score > self.threshold
         # Optional: Logs it to the server
         return score
 
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index 770bb05fc..d99ba8e98 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -41,5 +41,5 @@ def test_create_dataset():
 #     dataset,
 # )
 # def test_customer_chatbot(test_case: LLMTestCase):
-#     hallucination_metric = HallucinationMetric(minimum_score=0.3)
+#     hallucination_metric = HallucinationMetric(threshold=0.3)
 #     assert_test(test_case, [hallucination_metric])
diff --git a/tests/test_hallucination_metric.py b/tests/test_hallucination_metric.py
index c37518785..13560d23c 100644
--- a/tests/test_hallucination_metric.py
+++ b/tests/test_hallucination_metric.py
@@ -5,7 +5,7 @@
 
 
 def test_hallucination_metric():
-    metric = HallucinationMetric(minimum_score=0.5)
+    metric = HallucinationMetric(threshold=0.5)
     test_case = LLMTestCase(
         input="placeholder",
         actual_output="A blond drinking water in public.",
@@ -17,7 +17,7 @@ def test_hallucination_metric():
 
 
 def test_hallucination_metric_2():
-    metric = HallucinationMetric(minimum_score=0.6)
+    metric = HallucinationMetric(threshold=0.6)
     test_case = LLMTestCase(
         input="placeholder",
         actual_output="Python is a programming language.",
@@ -28,7 +28,7 @@ def test_hallucination_metric_2():
 
 
 def test_hallucination_metric_3():
-    metric = HallucinationMetric(minimum_score=0.6)
+    metric = HallucinationMetric(threshold=0.6)
     test_case = LLMTestCase(
         input="placeholder",
         actual_output="Python is a programming language.",
diff --git a/tests/test_judgemental.py b/tests/test_judgemental.py
index 3b2181655..f7f8bfd62 100644
--- a/tests/test_judgemental.py
+++ b/tests/test_judgemental.py
@@ -20,7 +20,7 @@ def test_judgemntal():
             LLMTestCaseParams.INPUT,
         ],
         language=Languages.SPANISH,
-        minimum_score=0.5,
+        threshold=0.5,
     )
 
     assert_test(test_case, [metric])
diff --git a/tests/test_llm_metric.py b/tests/test_llm_metric.py
index 4183ee2ff..54f34fb46 100644
--- a/tests/test_llm_metric.py
+++ b/tests/test_llm_metric.py
@@ -10,7 +10,7 @@ def test_chat_completion():
     metric = LLMEvalMetric(
         name="Validity",
         criteria="The response is a valid response to the prompt.",
-        minimum_score=0.5,
+        threshold=0.5,
         evaluation_params=[
             LLMTestCaseParams.INPUT,
             LLMTestCaseParams.ACTUAL_OUTPUT,
@@ -41,7 +41,7 @@ def test_chat_completion():
 #     metric = LLMEvalMetric(
 #         name="Validity",
 #         criteria="The response is a valid response to the prompt.",
-#         minimum_score=0.5,
+#         threshold=0.5,
 #         evaluation_params=[
 #             LLMTestCaseParams.INPUT,
 #             LLMTestCaseParams.ACTUAL_OUTPUT,
diff --git a/tests/test_quickstart.py b/tests/test_quickstart.py
index 504cb2b87..d33a71148 100644
--- a/tests/test_quickstart.py
+++ b/tests/test_quickstart.py
@@ -19,7 +19,7 @@ def test_llm_output():
     test_case = LLMTestCase(
         input=input, actual_output=generate_llm_output(input), context=context
     )
-    assert_test(test_case, [HallucinationMetric(minimum_score=0.5)])
+    assert_test(test_case, [HallucinationMetric(threshold=0.5)])
 
 
 def test_llm_output_custom():
@@ -29,7 +29,7 @@ def test_llm_output_custom():
         input="Placerholder", actual_output=actual_output, context=context
     )
     with pytest.raises(AssertionError):
-        assert_test(test_case, [HallucinationMetric(minimum_score=0.5)])
+        assert_test(test_case, [HallucinationMetric(threshold=0.5)])
 
 
 def test_0():
diff --git a/tests/test_ragas.py b/tests/test_ragas.py
index a2e38d306..caa93a868 100644
--- a/tests/test_ragas.py
+++ b/tests/test_ragas.py
@@ -2,17 +2,17 @@
 from deepeval.test_case import LLMTestCase
 from deepeval.metrics import (
     RagasMetric,
-    ContextualPrecisionMetric,
-    ContextualRelevancyMetric,
+    RAGASContextualPrecisionMetric,
+    RAGASContextualRelevancyMetric,
     RAGASFaithfulnessMetric,
-    ContextualRecallMetric,
+    RAGASContextualRecallMetric,
     ConcisenessMetric,
     CorrectnessMetric,
     CoherenceMetric,
     MaliciousnessMetric,
+    RAGASAnswerRelevancyMetric,
 )
-from deepeval.metrics.ragas_metric import RAGASAnswerRelevancyMetric
-from deepeval import assert_test, evaluate
+from deepeval import assert_test
 
 query = "Who won the FIFA World Cup in 2018 and what was the score?"
 output = "Winners of the FIFA world cup were the French national football team"
@@ -51,28 +51,28 @@ def test_everything():
         retrieval_context=context,
         context=context,
     )
-    # metric1 = ContextualRelevancyMetric(model="gpt-4")
-    # metric2 = RAGASFaithfulnessMetric(model="gpt-4")
-    # metric3 = ContextualRecallMetric(model="gpt-4")
-    # metric4 = ConcisenessMetric(model="gpt-4")
-    # metric5 = CorrectnessMetric(model="gpt-4")
-    # metric6 = CoherenceMetric(model="gpt-4")
-    # metric7 = MaliciousnessMetric(model="gpt-4")
-    # metric8 = RAGASAnswerRelevancyMetric(model="gpt-4")
-    metric9 = ContextualPrecisionMetric()
-    # metric10 = RagasMetric()
+    metric1 = RAGASContextualRelevancyMetric(model="gpt-4")
+    metric2 = RAGASFaithfulnessMetric(model="gpt-4")
+    metric3 = RAGASContextualRecallMetric(model="gpt-4")
+    metric4 = ConcisenessMetric(model="gpt-4")
+    metric5 = CorrectnessMetric(model="gpt-4")
+    metric6 = CoherenceMetric(model="gpt-4")
+    metric7 = MaliciousnessMetric(model="gpt-4")
+    metric8 = RAGASAnswerRelevancyMetric(model="gpt-4")
+    metric9 = RAGASContextualPrecisionMetric()
+    metric10 = RagasMetric()
     assert_test(
         test_case,
         [
-            # metric1,
-            # metric2,
-            # metric3,
-            # metric4,
-            # metric5,
-            # metric6,
-            # metric7,
-            # metric8,
+            metric1,
+            metric2,
+            metric3,
+            metric4,
+            metric5,
+            metric6,
+            metric7,
+            metric8,
             metric9,
-            # metric10,
+            metric10,
         ],
     )

From 1e5fbab87d58ac2fbc8f5b5fb4ae59688b5ad350 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 11 Jan 2024 14:49:41 -0800
Subject: [PATCH 36/46] Fix langchain chat models

---
 deepeval/metrics/__init__.py |   1 +
 deepeval/models/gpt_model.py |  20 +-
 poetry.lock                  | 518 +++++++++++++++++++----------------
 pyproject.toml               |   1 +
 setup.py                     |   1 +
 tests/test_faithfulness.py   |   2 +-
 tests/test_ragas.py          |  48 ++--
 7 files changed, 316 insertions(+), 275 deletions(-)

diff --git a/deepeval/metrics/__init__.py b/deepeval/metrics/__init__.py
index 51a7dd2b5..619d75ea0 100644
--- a/deepeval/metrics/__init__.py
+++ b/deepeval/metrics/__init__.py
@@ -11,6 +11,7 @@
 from .contextual_precision import ContextualPrecisionMetric
 from .ragas_metric import (
     RagasMetric,
+    RAGASAnswerRelevancyMetric,
     RAGASFaithfulnessMetric,
     RAGASContextualRecallMetric,
     RAGASContextualRelevancyMetric,
diff --git a/deepeval/models/gpt_model.py b/deepeval/models/gpt_model.py
index 6ca7cda8a..49637c8b7 100644
--- a/deepeval/models/gpt_model.py
+++ b/deepeval/models/gpt_model.py
@@ -1,7 +1,7 @@
 import os
 from typing import Dict, Optional
 
-from langchain.chat_models import ChatOpenAI, AzureChatOpenAI
+from langchain_openai import ChatOpenAI, AzureChatOpenAI
 from deepeval.key_handler import KeyValues, KEY_FILE_HANDLER
 from deepeval.models.base import DeepEvalBaseModel
 from deepeval.chat_completion.retry import retry_with_exponential_backoff
@@ -43,13 +43,6 @@ def __init__(
 
     def load_model(self):
         if self.should_use_azure_openai():
-            model_version = KEY_FILE_HANDLER.fetch_data(
-                KeyValues.AZURE_MODEL_VERSION
-            )
-            model_kwargs = {}
-            if model_version is not None:
-                model_kwargs["model_version"] = model_version
-
             openai_api_key = KEY_FILE_HANDLER.fetch_data(
                 KeyValues.AZURE_OPENAI_API_KEY
             )
@@ -63,13 +56,22 @@ def load_model(self):
             azure_endpoint = KEY_FILE_HANDLER.fetch_data(
                 KeyValues.AZURE_OPENAI_ENDPOINT
             )
+
+            model_version = KEY_FILE_HANDLER.fetch_data(
+                KeyValues.AZURE_MODEL_VERSION
+            )
+
+            if model_version is None:
+                model_version = ""
+
             return AzureChatOpenAI(
                 openai_api_version=openai_api_version,
                 azure_deployment=azure_deployment,
                 azure_endpoint=azure_endpoint,
                 openai_api_key=openai_api_key,
-                model_kwargs=model_kwargs,
+                model_version=model_version,
             )
+
         return ChatOpenAI(
             model_name=self.model_name, model_kwargs=self.model_kwargs
         )
diff --git a/poetry.lock b/poetry.lock
index 34c10e78c..ac4bcea85 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -693,53 +693,53 @@ typing = ["typing-extensions (>=4.8)"]
 
 [[package]]
 name = "fonttools"
-version = "4.47.0"
+version = "4.47.2"
 description = "Tools to manipulate font files"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "fonttools-4.47.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2d2404107626f97a221dc1a65b05396d2bb2ce38e435f64f26ed2369f68675d9"},
-    {file = "fonttools-4.47.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c01f409be619a9a0f5590389e37ccb58b47264939f0e8d58bfa1f3ba07d22671"},
-    {file = "fonttools-4.47.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d986b66ff722ef675b7ee22fbe5947a41f60a61a4da15579d5e276d897fbc7fa"},
-    {file = "fonttools-4.47.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8acf6dd0434b211b3bd30d572d9e019831aae17a54016629fa8224783b22df8"},
-    {file = "fonttools-4.47.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:495369c660e0c27233e3c572269cbe520f7f4978be675f990f4005937337d391"},
-    {file = "fonttools-4.47.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c59227d7ba5b232281c26ae04fac2c73a79ad0e236bca5c44aae904a18f14faf"},
-    {file = "fonttools-4.47.0-cp310-cp310-win32.whl", hash = "sha256:59a6c8b71a245800e923cb684a2dc0eac19c56493e2f896218fcf2571ed28984"},
-    {file = "fonttools-4.47.0-cp310-cp310-win_amd64.whl", hash = "sha256:52c82df66201f3a90db438d9d7b337c7c98139de598d0728fb99dab9fd0495ca"},
-    {file = "fonttools-4.47.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:854421e328d47d70aa5abceacbe8eef231961b162c71cbe7ff3f47e235e2e5c5"},
-    {file = "fonttools-4.47.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:511482df31cfea9f697930f61520f6541185fa5eeba2fa760fe72e8eee5af88b"},
-    {file = "fonttools-4.47.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0e2c88c8c985b7b9a7efcd06511fb0a1fe3ddd9a6cd2895ef1dbf9059719d7"},
-    {file = "fonttools-4.47.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7a0a8848726956e9d9fb18c977a279013daadf0cbb6725d2015a6dd57527992"},
-    {file = "fonttools-4.47.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e869da810ae35afb3019baa0d0306cdbab4760a54909c89ad8904fa629991812"},
-    {file = "fonttools-4.47.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dd23848f877c3754f53a4903fb7a593ed100924f9b4bff7d5a4e2e8a7001ae11"},
-    {file = "fonttools-4.47.0-cp311-cp311-win32.whl", hash = "sha256:bf1810635c00f7c45d93085611c995fc130009cec5abdc35b327156aa191f982"},
-    {file = "fonttools-4.47.0-cp311-cp311-win_amd64.whl", hash = "sha256:61df4dee5d38ab65b26da8efd62d859a1eef7a34dcbc331299a28e24d04c59a7"},
-    {file = "fonttools-4.47.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e3f4d61f3a8195eac784f1d0c16c0a3105382c1b9a74d99ac4ba421da39a8826"},
-    {file = "fonttools-4.47.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:174995f7b057e799355b393e97f4f93ef1f2197cbfa945e988d49b2a09ecbce8"},
-    {file = "fonttools-4.47.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea592e6a09b71cb7a7661dd93ac0b877a6228e2d677ebacbad0a4d118494c86d"},
-    {file = "fonttools-4.47.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40bdbe90b33897d9cc4a39f8e415b0fcdeae4c40a99374b8a4982f127ff5c767"},
-    {file = "fonttools-4.47.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:843509ae9b93db5aaf1a6302085e30bddc1111d31e11d724584818f5b698f500"},
-    {file = "fonttools-4.47.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9acfa1cdc479e0dde528b61423855913d949a7f7fe09e276228298fef4589540"},
-    {file = "fonttools-4.47.0-cp312-cp312-win32.whl", hash = "sha256:66c92ec7f95fd9732550ebedefcd190a8d81beaa97e89d523a0d17198a8bda4d"},
-    {file = "fonttools-4.47.0-cp312-cp312-win_amd64.whl", hash = "sha256:e8fa20748de55d0021f83754b371432dca0439e02847962fc4c42a0e444c2d78"},
-    {file = "fonttools-4.47.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c75e19971209fbbce891ebfd1b10c37320a5a28e8d438861c21d35305aedb81c"},
-    {file = "fonttools-4.47.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e79f1a3970d25f692bbb8c8c2637e621a66c0d60c109ab48d4a160f50856deff"},
-    {file = "fonttools-4.47.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:562681188c62c024fe2c611b32e08b8de2afa00c0c4e72bed47c47c318e16d5c"},
-    {file = "fonttools-4.47.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a77a60315c33393b2bd29d538d1ef026060a63d3a49a9233b779261bad9c3f71"},
-    {file = "fonttools-4.47.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b4fabb8cc9422efae1a925160083fdcbab8fdc96a8483441eb7457235df625bd"},
-    {file = "fonttools-4.47.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2a78dba8c2a1e9d53a0fb5382979f024200dc86adc46a56cbb668a2249862fda"},
-    {file = "fonttools-4.47.0-cp38-cp38-win32.whl", hash = "sha256:e6b968543fde4119231c12c2a953dcf83349590ca631ba8216a8edf9cd4d36a9"},
-    {file = "fonttools-4.47.0-cp38-cp38-win_amd64.whl", hash = "sha256:4a9a51745c0439516d947480d4d884fa18bd1458e05b829e482b9269afa655bc"},
-    {file = "fonttools-4.47.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:62d8ddb058b8e87018e5dc26f3258e2c30daad4c87262dfeb0e2617dd84750e6"},
-    {file = "fonttools-4.47.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5dde0eab40faaa5476133123f6a622a1cc3ac9b7af45d65690870620323308b4"},
-    {file = "fonttools-4.47.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4da089f6dfdb822293bde576916492cd708c37c2501c3651adde39804630538"},
-    {file = "fonttools-4.47.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:253bb46bab970e8aae254cebf2ae3db98a4ef6bd034707aa68a239027d2b198d"},
-    {file = "fonttools-4.47.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1193fb090061efa2f9e2d8d743ae9850c77b66746a3b32792324cdce65784154"},
-    {file = "fonttools-4.47.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:084511482dd265bce6dca24c509894062f0117e4e6869384d853f46c0e6d43be"},
-    {file = "fonttools-4.47.0-cp39-cp39-win32.whl", hash = "sha256:97620c4af36e4c849e52661492e31dc36916df12571cb900d16960ab8e92a980"},
-    {file = "fonttools-4.47.0-cp39-cp39-win_amd64.whl", hash = "sha256:e77bdf52185bdaf63d39f3e1ac3212e6cfa3ab07d509b94557a8902ce9c13c82"},
-    {file = "fonttools-4.47.0-py3-none-any.whl", hash = "sha256:d6477ba902dd2d7adda7f0fd3bfaeb92885d45993c9e1928c9f28fc3961415f7"},
-    {file = "fonttools-4.47.0.tar.gz", hash = "sha256:ec13a10715eef0e031858c1c23bfaee6cba02b97558e4a7bfa089dba4a8c2ebf"},
+    {file = "fonttools-4.47.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3b629108351d25512d4ea1a8393a2dba325b7b7d7308116b605ea3f8e1be88df"},
+    {file = "fonttools-4.47.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c19044256c44fe299d9a73456aabee4b4d06c6b930287be93b533b4737d70aa1"},
+    {file = "fonttools-4.47.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8be28c036b9f186e8c7eaf8a11b42373e7e4949f9e9f370202b9da4c4c3f56c"},
+    {file = "fonttools-4.47.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f83a4daef6d2a202acb9bf572958f91cfde5b10c8ee7fb1d09a4c81e5d851fd8"},
+    {file = "fonttools-4.47.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4a5a5318ba5365d992666ac4fe35365f93004109d18858a3e18ae46f67907670"},
+    {file = "fonttools-4.47.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8f57ecd742545362a0f7186774b2d1c53423ed9ece67689c93a1055b236f638c"},
+    {file = "fonttools-4.47.2-cp310-cp310-win32.whl", hash = "sha256:a1c154bb85dc9a4cf145250c88d112d88eb414bad81d4cb524d06258dea1bdc0"},
+    {file = "fonttools-4.47.2-cp310-cp310-win_amd64.whl", hash = "sha256:3e2b95dce2ead58fb12524d0ca7d63a63459dd489e7e5838c3cd53557f8933e1"},
+    {file = "fonttools-4.47.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:29495d6d109cdbabe73cfb6f419ce67080c3ef9ea1e08d5750240fd4b0c4763b"},
+    {file = "fonttools-4.47.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0a1d313a415eaaba2b35d6cd33536560deeebd2ed758b9bfb89ab5d97dc5deac"},
+    {file = "fonttools-4.47.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90f898cdd67f52f18049250a6474185ef6544c91f27a7bee70d87d77a8daf89c"},
+    {file = "fonttools-4.47.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3480eeb52770ff75140fe7d9a2ec33fb67b07efea0ab5129c7e0c6a639c40c70"},
+    {file = "fonttools-4.47.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0255dbc128fee75fb9be364806b940ed450dd6838672a150d501ee86523ac61e"},
+    {file = "fonttools-4.47.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f791446ff297fd5f1e2247c188de53c1bfb9dd7f0549eba55b73a3c2087a2703"},
+    {file = "fonttools-4.47.2-cp311-cp311-win32.whl", hash = "sha256:740947906590a878a4bde7dd748e85fefa4d470a268b964748403b3ab2aeed6c"},
+    {file = "fonttools-4.47.2-cp311-cp311-win_amd64.whl", hash = "sha256:63fbed184979f09a65aa9c88b395ca539c94287ba3a364517698462e13e457c9"},
+    {file = "fonttools-4.47.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:4ec558c543609e71b2275c4894e93493f65d2f41c15fe1d089080c1d0bb4d635"},
+    {file = "fonttools-4.47.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e040f905d542362e07e72e03612a6270c33d38281fd573160e1003e43718d68d"},
+    {file = "fonttools-4.47.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6dd58cc03016b281bd2c74c84cdaa6bd3ce54c5a7f47478b7657b930ac3ed8eb"},
+    {file = "fonttools-4.47.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32ab2e9702dff0dd4510c7bb958f265a8d3dd5c0e2547e7b5f7a3df4979abb07"},
+    {file = "fonttools-4.47.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3a808f3c1d1df1f5bf39be869b6e0c263570cdafb5bdb2df66087733f566ea71"},
+    {file = "fonttools-4.47.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ac71e2e201df041a2891067dc36256755b1229ae167edbdc419b16da78732c2f"},
+    {file = "fonttools-4.47.2-cp312-cp312-win32.whl", hash = "sha256:69731e8bea0578b3c28fdb43dbf95b9386e2d49a399e9a4ad736b8e479b08085"},
+    {file = "fonttools-4.47.2-cp312-cp312-win_amd64.whl", hash = "sha256:b3e1304e5f19ca861d86a72218ecce68f391646d85c851742d265787f55457a4"},
+    {file = "fonttools-4.47.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:254d9a6f7be00212bf0c3159e0a420eb19c63793b2c05e049eb337f3023c5ecc"},
+    {file = "fonttools-4.47.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:eabae77a07c41ae0b35184894202305c3ad211a93b2eb53837c2a1143c8bc952"},
+    {file = "fonttools-4.47.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a86a5ab2873ed2575d0fcdf1828143cfc6b977ac448e3dc616bb1e3d20efbafa"},
+    {file = "fonttools-4.47.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13819db8445a0cec8c3ff5f243af6418ab19175072a9a92f6cc8ca7d1452754b"},
+    {file = "fonttools-4.47.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4e743935139aa485fe3253fc33fe467eab6ea42583fa681223ea3f1a93dd01e6"},
+    {file = "fonttools-4.47.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d49ce3ea7b7173faebc5664872243b40cf88814ca3eb135c4a3cdff66af71946"},
+    {file = "fonttools-4.47.2-cp38-cp38-win32.whl", hash = "sha256:94208ea750e3f96e267f394d5588579bb64cc628e321dbb1d4243ffbc291b18b"},
+    {file = "fonttools-4.47.2-cp38-cp38-win_amd64.whl", hash = "sha256:0f750037e02beb8b3569fbff701a572e62a685d2a0e840d75816592280e5feae"},
+    {file = "fonttools-4.47.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3d71606c9321f6701642bd4746f99b6089e53d7e9817fc6b964e90d9c5f0ecc6"},
+    {file = "fonttools-4.47.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:86e0427864c6c91cf77f16d1fb9bf1bbf7453e824589e8fb8461b6ee1144f506"},
+    {file = "fonttools-4.47.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a00bd0e68e88987dcc047ea31c26d40a3c61185153b03457956a87e39d43c37"},
+    {file = "fonttools-4.47.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5d77479fb885ef38a16a253a2f4096bc3d14e63a56d6246bfdb56365a12b20c"},
+    {file = "fonttools-4.47.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5465df494f20a7d01712b072ae3ee9ad2887004701b95cb2cc6dcb9c2c97a899"},
+    {file = "fonttools-4.47.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4c811d3c73b6abac275babb8aa439206288f56fdb2c6f8835e3d7b70de8937a7"},
+    {file = "fonttools-4.47.2-cp39-cp39-win32.whl", hash = "sha256:5b60e3afa9635e3dfd3ace2757039593e3bd3cf128be0ddb7a1ff4ac45fa5a50"},
+    {file = "fonttools-4.47.2-cp39-cp39-win_amd64.whl", hash = "sha256:7ee48bd9d6b7e8f66866c9090807e3a4a56cf43ffad48962725a190e0dd774c8"},
+    {file = "fonttools-4.47.2-py3-none-any.whl", hash = "sha256:7eb7ad665258fba68fd22228a09f347469d95a97fb88198e133595947a20a184"},
+    {file = "fonttools-4.47.2.tar.gz", hash = "sha256:7df26dd3650e98ca45f1e29883c96a0b9f5bb6af8d632a6a108bc744fa0bd9b3"},
 ]
 
 [package.extras]
@@ -1010,13 +1010,13 @@ socks = ["socksio (==1.*)"]
 
 [[package]]
 name = "huggingface-hub"
-version = "0.20.1"
+version = "0.20.2"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "huggingface_hub-0.20.1-py3-none-any.whl", hash = "sha256:ecfdea395a8bc68cd160106c5bd857f7e010768d95f9e1862a779010cc304831"},
-    {file = "huggingface_hub-0.20.1.tar.gz", hash = "sha256:8c88c4c3c8853e22f2dfb4d84c3d493f4e1af52fb3856a90e1eeddcf191ddbb1"},
+    {file = "huggingface_hub-0.20.2-py3-none-any.whl", hash = "sha256:53752eda2239d30a470c307a61cf9adcf136bc77b0a734338c7d04941af560d8"},
+    {file = "huggingface_hub-0.20.2.tar.gz", hash = "sha256:215c5fceff631030c7a3d19ba7b588921c908b3f21eef31d160ebc245b200ff6"},
 ]
 
 [package.dependencies]
@@ -1064,13 +1064,13 @@ files = [
 
 [[package]]
 name = "jinja2"
-version = "3.1.2"
+version = "3.1.3"
 description = "A very fast and expressive template engine."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"},
-    {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"},
+    {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"},
+    {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"},
 ]
 
 [package.dependencies]
@@ -1230,13 +1230,13 @@ files = [
 
 [[package]]
 name = "langchain"
-version = "0.0.353"
+version = "0.1.0"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langchain-0.0.353-py3-none-any.whl", hash = "sha256:54cac8b74fbefacddcdf0c443619a7331d6b59fe94fa2a48a4d7da2b59cf1f63"},
-    {file = "langchain-0.0.353.tar.gz", hash = "sha256:a095ea819f13a3606ced699182a8369eb2d77034ec8c913983675d6dd9a98196"},
+    {file = "langchain-0.1.0-py3-none-any.whl", hash = "sha256:8652e74b039333a55c79faff4400b077ba1bd0ddce5255574e42d301c05c1733"},
+    {file = "langchain-0.1.0.tar.gz", hash = "sha256:d43119f8d3fda2c8ddf8c3a19bd5b94b347e27d1867ff14a921b90bdbed0668a"},
 ]
 
 [package.dependencies]
@@ -1244,9 +1244,9 @@ aiohttp = ">=3.8.3,<4.0.0"
 async-timeout = {version = ">=4.0.0,<5.0.0", markers = "python_version < \"3.11\""}
 dataclasses-json = ">=0.5.7,<0.7"
 jsonpatch = ">=1.33,<2.0"
-langchain-community = ">=0.0.2,<0.1"
-langchain-core = ">=0.1.4,<0.2"
-langsmith = ">=0.0.70,<0.1.0"
+langchain-community = ">=0.0.9,<0.1"
+langchain-core = ">=0.1.7,<0.2"
+langsmith = ">=0.0.77,<0.1.0"
 numpy = ">=1,<2"
 pydantic = ">=1,<3"
 PyYAML = ">=5.3"
@@ -1261,7 +1261,7 @@ cli = ["typer (>=0.9.0,<0.10.0)"]
 cohere = ["cohere (>=4,<5)"]
 docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"]
 embeddings = ["sentence-transformers (>=2,<3)"]
-extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "couchbase (>=4.1.9,<5.0.0)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
+extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "couchbase (>=4.1.9,<5.0.0)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "langchain-openai (>=0.0.2,<0.1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
 javascript = ["esprima (>=4.0.1,<5.0.0)"]
 llms = ["clarifai (>=9.1.0)", "cohere (>=4,<5)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (<2)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"]
 openai = ["openai (<2)", "tiktoken (>=0.3.2,<0.6.0)"]
@@ -1270,19 +1270,19 @@ text-helpers = ["chardet (>=5.1.0,<6.0.0)"]
 
 [[package]]
 name = "langchain-community"
-version = "0.0.7"
+version = "0.0.11"
 description = "Community contributed LangChain integrations."
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langchain_community-0.0.7-py3-none-any.whl", hash = "sha256:468af187bfffe753426cc4548132824be7df9404d38ceef2f873087290d8ff0e"},
-    {file = "langchain_community-0.0.7.tar.gz", hash = "sha256:cfbeb25cac7dff3c021f3c82aa243fc80f80082d6f6fdcc79daf36b1408828cc"},
+    {file = "langchain_community-0.0.11-py3-none-any.whl", hash = "sha256:30ab1d7dbf35d0ebe684d8a1e8964e8dedd3d31a3703790436b39674cfa06f41"},
+    {file = "langchain_community-0.0.11.tar.gz", hash = "sha256:eaeaa8d63427ecf0cb32fe2f1ba4d05ad6d5ef9f7019baf21dc2dde5b1403002"},
 ]
 
 [package.dependencies]
 aiohttp = ">=3.8.3,<4.0.0"
 dataclasses-json = ">=0.5.7,<0.7"
-langchain-core = ">=0.1,<0.2"
+langchain-core = ">=0.1.8,<0.2"
 langsmith = ">=0.0.63,<0.1.0"
 numpy = ">=1,<2"
 PyYAML = ">=5.3"
@@ -1292,17 +1292,17 @@ tenacity = ">=8.1.0,<9.0.0"
 
 [package.extras]
 cli = ["typer (>=0.9.0,<0.10.0)"]
-extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-ai-documentintelligence (>=1.0.0b1,<2.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "gradientai (>=1.4.0,<2.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "oracle-ads (>=2.9.1,<3.0.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)"]
+extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-ai-documentintelligence (>=1.0.0b1,<2.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "dashvector (>=1.0.1,<2.0.0)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "gradientai (>=1.4.0,<2.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "oracle-ads (>=2.9.1,<3.0.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)", "zhipuai (>=1.0.7,<2.0.0)"]
 
 [[package]]
 name = "langchain-core"
-version = "0.1.4"
+version = "0.1.10"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langchain_core-0.1.4-py3-none-any.whl", hash = "sha256:c62bd362d5abf5359436a99b29629e12a4d1ede9f1704dc958cdb8530a791efd"},
-    {file = "langchain_core-0.1.4.tar.gz", hash = "sha256:f700138689c9014e23d3c29796a892dccf7f2a42901cb8817671823e1a24724c"},
+    {file = "langchain_core-0.1.10-py3-none-any.whl", hash = "sha256:d89952f6d0766cfc88d9f1e25b84d56f8d7bd63a45ad8ec1a9a038c9b49df16d"},
+    {file = "langchain_core-0.1.10.tar.gz", hash = "sha256:3c9e1383264c102fcc6f865700dbb9416c4931a25d0ac2195f6311c6b867aa17"},
 ]
 
 [package.dependencies]
@@ -1318,15 +1318,32 @@ tenacity = ">=8.1.0,<9.0.0"
 [package.extras]
 extended-testing = ["jinja2 (>=3,<4)"]
 
+[[package]]
+name = "langchain-openai"
+version = "0.0.2"
+description = "An integration package connecting OpenAI and LangChain"
+optional = false
+python-versions = ">=3.8.1,<4.0"
+files = [
+    {file = "langchain_openai-0.0.2-py3-none-any.whl", hash = "sha256:0a46067be13ce95a029fdca339cd1034a61be1a727786178fbad702668a060f9"},
+    {file = "langchain_openai-0.0.2.tar.gz", hash = "sha256:713af4a638f65b3af2f741a9d61991011c31939b070d81ede5b2e3cba625e01a"},
+]
+
+[package.dependencies]
+langchain-core = ">=0.1.7,<0.2"
+numpy = ">=1,<2"
+openai = ">=1.6.1,<2.0.0"
+tiktoken = ">=0.5.2,<0.6.0"
+
 [[package]]
 name = "langsmith"
-version = "0.0.75"
+version = "0.0.79"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langsmith-0.0.75-py3-none-any.whl", hash = "sha256:3e008854204c5eaae007f34c7e249059218605689c385c037f6a40cac044833b"},
-    {file = "langsmith-0.0.75.tar.gz", hash = "sha256:3fd44c58bd53cb9366af3de129c7f11b6947914f1bb598a585240df0e2c566eb"},
+    {file = "langsmith-0.0.79-py3-none-any.whl", hash = "sha256:be0374e913c36d9f6a13dd6b6e20a506066d5a0f3abfd476f9cf9e0b086ed744"},
+    {file = "langsmith-0.0.79.tar.gz", hash = "sha256:d32639ccd18a92533b302f6f482255619afc8eb007fff91e37ee699d947c5e29"},
 ]
 
 [package.dependencies]
@@ -1335,13 +1352,13 @@ requests = ">=2,<3"
 
 [[package]]
 name = "llama-index"
-version = "0.9.24"
+version = "0.9.29"
 description = "Interface between LLMs and your data"
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "llama_index-0.9.24-py3-none-any.whl", hash = "sha256:aeef8a4fb478d45474261289046f37c2805e3bf3453c156c84088c0414465e5e"},
-    {file = "llama_index-0.9.24.tar.gz", hash = "sha256:48175a35c30427f361068693d6f384baf76865831569ca4e04a1a8b6f10ba269"},
+    {file = "llama_index-0.9.29-py3-none-any.whl", hash = "sha256:4ac6f589a2e8c5049882c185f25e6c57b68cf9d6e46a6f84102705a7dd357dd2"},
+    {file = "llama_index-0.9.29.tar.gz", hash = "sha256:df5af84bf593cdf6a36da403f521bd5582eccbd6ca551da44d2e4e5b2b4f5619"},
 ]
 
 [package.dependencies]
@@ -1352,6 +1369,7 @@ deprecated = ">=1.2.9.3"
 fsspec = ">=2023.5.0"
 httpx = "*"
 nest-asyncio = ">=1.5.8,<2.0.0"
+networkx = ">=3.0"
 nltk = ">=3.8.1,<4.0.0"
 numpy = "*"
 openai = ">=1.1.0"
@@ -1366,8 +1384,8 @@ typing-inspect = ">=0.8.0"
 [package.extras]
 gradientai = ["gradientai (>=1.4.0)"]
 langchain = ["langchain (>=0.0.303)"]
-local-models = ["optimum[onnxruntime] (>=1.13.2,<2.0.0)", "sentencepiece (>=0.1.99,<0.2.0)", "transformers[torch] (>=4.34.0,<5.0.0)"]
-postgres = ["asyncpg (>=0.28.0,<0.29.0)", "pgvector (>=0.1.0,<0.2.0)", "psycopg-binary (>=3.1.12,<4.0.0)"]
+local-models = ["optimum[onnxruntime] (>=1.13.2,<2.0.0)", "sentencepiece (>=0.1.99,<0.2.0)", "transformers[torch] (>=4.33.1,<5.0.0)"]
+postgres = ["asyncpg (>=0.28.0,<0.29.0)", "pgvector (>=0.1.0,<0.2.0)", "psycopg-binary (>=3.1.12,<4.0.0)", "psycopg2 (>=2.9.9,<3.0.0)"]
 query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "lm-format-enforcer (>=0.4.3,<0.5.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "scikit-learn", "spacy (>=3.7.1,<4.0.0)"]
 
 [[package]]
@@ -1465,22 +1483,22 @@ files = [
 
 [[package]]
 name = "marshmallow"
-version = "3.20.1"
+version = "3.20.2"
 description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "marshmallow-3.20.1-py3-none-any.whl", hash = "sha256:684939db93e80ad3561392f47be0230743131560a41c5110684c16e21ade0a5c"},
-    {file = "marshmallow-3.20.1.tar.gz", hash = "sha256:5d2371bbe42000f2b3fb5eaa065224df7d8f8597bc19a1bbfa5bfe7fba8da889"},
+    {file = "marshmallow-3.20.2-py3-none-any.whl", hash = "sha256:c21d4b98fee747c130e6bc8f45c4b3199ea66bc00c12ee1f639f0aeca034d5e9"},
+    {file = "marshmallow-3.20.2.tar.gz", hash = "sha256:4c1daff273513dc5eb24b219a8035559dc573c8f322558ef85f5438ddd1236dd"},
 ]
 
 [package.dependencies]
 packaging = ">=17.0"
 
 [package.extras]
-dev = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)", "pytest", "pytz", "simplejson", "tox"]
-docs = ["alabaster (==0.7.13)", "autodocsumm (==0.2.11)", "sphinx (==7.0.1)", "sphinx-issues (==3.0.1)", "sphinx-version-warning (==1.1.2)"]
-lint = ["flake8 (==6.0.0)", "flake8-bugbear (==23.7.10)", "mypy (==1.4.1)", "pre-commit (>=2.4,<4.0)"]
+dev = ["pre-commit (>=2.4,<4.0)", "pytest", "pytz", "simplejson", "tox"]
+docs = ["alabaster (==0.7.15)", "autodocsumm (==0.2.12)", "sphinx (==7.2.6)", "sphinx-issues (==3.0.1)", "sphinx-version-warning (==1.1.2)"]
+lint = ["pre-commit (>=2.4,<4.0)"]
 tests = ["pytest", "pytz", "simplejson"]
 
 [[package]]
@@ -1737,47 +1755,47 @@ twitter = ["twython"]
 
 [[package]]
 name = "numpy"
-version = "1.26.2"
+version = "1.26.3"
 description = "Fundamental package for array computing in Python"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "numpy-1.26.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3703fc9258a4a122d17043e57b35e5ef1c5a5837c3db8be396c82e04c1cf9b0f"},
-    {file = "numpy-1.26.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc392fdcbd21d4be6ae1bb4475a03ce3b025cd49a9be5345d76d7585aea69440"},
-    {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36340109af8da8805d8851ef1d74761b3b88e81a9bd80b290bbfed61bd2b4f75"},
-    {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcc008217145b3d77abd3e4d5ef586e3bdfba8fe17940769f8aa09b99e856c00"},
-    {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ced40d4e9e18242f70dd02d739e44698df3dcb010d31f495ff00a31ef6014fe"},
-    {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b272d4cecc32c9e19911891446b72e986157e6a1809b7b56518b4f3755267523"},
-    {file = "numpy-1.26.2-cp310-cp310-win32.whl", hash = "sha256:22f8fc02fdbc829e7a8c578dd8d2e15a9074b630d4da29cda483337e300e3ee9"},
-    {file = "numpy-1.26.2-cp310-cp310-win_amd64.whl", hash = "sha256:26c9d33f8e8b846d5a65dd068c14e04018d05533b348d9eaeef6c1bd787f9919"},
-    {file = "numpy-1.26.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b96e7b9c624ef3ae2ae0e04fa9b460f6b9f17ad8b4bec6d7756510f1f6c0c841"},
-    {file = "numpy-1.26.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aa18428111fb9a591d7a9cc1b48150097ba6a7e8299fb56bdf574df650e7d1f1"},
-    {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06fa1ed84aa60ea6ef9f91ba57b5ed963c3729534e6e54055fc151fad0423f0a"},
-    {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96ca5482c3dbdd051bcd1fce8034603d6ebfc125a7bd59f55b40d8f5d246832b"},
-    {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:854ab91a2906ef29dc3925a064fcd365c7b4da743f84b123002f6139bcb3f8a7"},
-    {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f43740ab089277d403aa07567be138fc2a89d4d9892d113b76153e0e412409f8"},
-    {file = "numpy-1.26.2-cp311-cp311-win32.whl", hash = "sha256:a2bbc29fcb1771cd7b7425f98b05307776a6baf43035d3b80c4b0f29e9545186"},
-    {file = "numpy-1.26.2-cp311-cp311-win_amd64.whl", hash = "sha256:2b3fca8a5b00184828d12b073af4d0fc5fdd94b1632c2477526f6bd7842d700d"},
-    {file = "numpy-1.26.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a4cd6ed4a339c21f1d1b0fdf13426cb3b284555c27ac2f156dfdaaa7e16bfab0"},
-    {file = "numpy-1.26.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5d5244aabd6ed7f312268b9247be47343a654ebea52a60f002dc70c769048e75"},
-    {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a3cdb4d9c70e6b8c0814239ead47da00934666f668426fc6e94cce869e13fd7"},
-    {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa317b2325f7aa0a9471663e6093c210cb2ae9c0ad824732b307d2c51983d5b6"},
-    {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:174a8880739c16c925799c018f3f55b8130c1f7c8e75ab0a6fa9d41cab092fd6"},
-    {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f79b231bf5c16b1f39c7f4875e1ded36abee1591e98742b05d8a0fb55d8a3eec"},
-    {file = "numpy-1.26.2-cp312-cp312-win32.whl", hash = "sha256:4a06263321dfd3598cacb252f51e521a8cb4b6df471bb12a7ee5cbab20ea9167"},
-    {file = "numpy-1.26.2-cp312-cp312-win_amd64.whl", hash = "sha256:b04f5dc6b3efdaab541f7857351aac359e6ae3c126e2edb376929bd3b7f92d7e"},
-    {file = "numpy-1.26.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4eb8df4bf8d3d90d091e0146f6c28492b0be84da3e409ebef54349f71ed271ef"},
-    {file = "numpy-1.26.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1a13860fdcd95de7cf58bd6f8bc5a5ef81c0b0625eb2c9a783948847abbef2c2"},
-    {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64308ebc366a8ed63fd0bf426b6a9468060962f1a4339ab1074c228fa6ade8e3"},
-    {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baf8aab04a2c0e859da118f0b38617e5ee65d75b83795055fb66c0d5e9e9b818"},
-    {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d73a3abcac238250091b11caef9ad12413dab01669511779bc9b29261dd50210"},
-    {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b361d369fc7e5e1714cf827b731ca32bff8d411212fccd29ad98ad622449cc36"},
-    {file = "numpy-1.26.2-cp39-cp39-win32.whl", hash = "sha256:bd3f0091e845164a20bd5a326860c840fe2af79fa12e0469a12768a3ec578d80"},
-    {file = "numpy-1.26.2-cp39-cp39-win_amd64.whl", hash = "sha256:2beef57fb031dcc0dc8fa4fe297a742027b954949cabb52a2a376c144e5e6060"},
-    {file = "numpy-1.26.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1cc3d5029a30fb5f06704ad6b23b35e11309491c999838c31f124fee32107c79"},
-    {file = "numpy-1.26.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94cc3c222bb9fb5a12e334d0479b97bb2df446fbe622b470928f5284ffca3f8d"},
-    {file = "numpy-1.26.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe6b44fb8fcdf7eda4ef4461b97b3f63c466b27ab151bec2366db8b197387841"},
-    {file = "numpy-1.26.2.tar.gz", hash = "sha256:f65738447676ab5777f11e6bbbdb8ce11b785e105f690bc45966574816b6d3ea"},
+    {file = "numpy-1.26.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:806dd64230dbbfaca8a27faa64e2f414bf1c6622ab78cc4264f7f5f028fee3bf"},
+    {file = "numpy-1.26.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02f98011ba4ab17f46f80f7f8f1c291ee7d855fcef0a5a98db80767a468c85cd"},
+    {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d45b3ec2faed4baca41c76617fcdcfa4f684ff7a151ce6fc78ad3b6e85af0a6"},
+    {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdd2b45bf079d9ad90377048e2747a0c82351989a2165821f0c96831b4a2a54b"},
+    {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:211ddd1e94817ed2d175b60b6374120244a4dd2287f4ece45d49228b4d529178"},
+    {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b1240f767f69d7c4c8a29adde2310b871153df9b26b5cb2b54a561ac85146485"},
+    {file = "numpy-1.26.3-cp310-cp310-win32.whl", hash = "sha256:21a9484e75ad018974a2fdaa216524d64ed4212e418e0a551a2d83403b0531d3"},
+    {file = "numpy-1.26.3-cp310-cp310-win_amd64.whl", hash = "sha256:9e1591f6ae98bcfac2a4bbf9221c0b92ab49762228f38287f6eeb5f3f55905ce"},
+    {file = "numpy-1.26.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b831295e5472954104ecb46cd98c08b98b49c69fdb7040483aff799a755a7374"},
+    {file = "numpy-1.26.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9e87562b91f68dd8b1c39149d0323b42e0082db7ddb8e934ab4c292094d575d6"},
+    {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c66d6fec467e8c0f975818c1796d25c53521124b7cfb760114be0abad53a0a2"},
+    {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f25e2811a9c932e43943a2615e65fc487a0b6b49218899e62e426e7f0a57eeda"},
+    {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:af36e0aa45e25c9f57bf684b1175e59ea05d9a7d3e8e87b7ae1a1da246f2767e"},
+    {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:51c7f1b344f302067b02e0f5b5d2daa9ed4a721cf49f070280ac202738ea7f00"},
+    {file = "numpy-1.26.3-cp311-cp311-win32.whl", hash = "sha256:7ca4f24341df071877849eb2034948459ce3a07915c2734f1abb4018d9c49d7b"},
+    {file = "numpy-1.26.3-cp311-cp311-win_amd64.whl", hash = "sha256:39763aee6dfdd4878032361b30b2b12593fb445ddb66bbac802e2113eb8a6ac4"},
+    {file = "numpy-1.26.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a7081fd19a6d573e1a05e600c82a1c421011db7935ed0d5c483e9dd96b99cf13"},
+    {file = "numpy-1.26.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12c70ac274b32bc00c7f61b515126c9205323703abb99cd41836e8125ea0043e"},
+    {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f784e13e598e9594750b2ef6729bcd5a47f6cfe4a12cca13def35e06d8163e3"},
+    {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f24750ef94d56ce6e33e4019a8a4d68cfdb1ef661a52cdaee628a56d2437419"},
+    {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:77810ef29e0fb1d289d225cabb9ee6cf4d11978a00bb99f7f8ec2132a84e0166"},
+    {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8ed07a90f5450d99dad60d3799f9c03c6566709bd53b497eb9ccad9a55867f36"},
+    {file = "numpy-1.26.3-cp312-cp312-win32.whl", hash = "sha256:f73497e8c38295aaa4741bdfa4fda1a5aedda5473074369eca10626835445511"},
+    {file = "numpy-1.26.3-cp312-cp312-win_amd64.whl", hash = "sha256:da4b0c6c699a0ad73c810736303f7fbae483bcb012e38d7eb06a5e3b432c981b"},
+    {file = "numpy-1.26.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1666f634cb3c80ccbd77ec97bc17337718f56d6658acf5d3b906ca03e90ce87f"},
+    {file = "numpy-1.26.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18c3319a7d39b2c6a9e3bb75aab2304ab79a811ac0168a671a62e6346c29b03f"},
+    {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b7e807d6888da0db6e7e75838444d62495e2b588b99e90dd80c3459594e857b"},
+    {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4d362e17bcb0011738c2d83e0a65ea8ce627057b2fdda37678f4374a382a137"},
+    {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b8c275f0ae90069496068c714387b4a0eba5d531aace269559ff2b43655edd58"},
+    {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cc0743f0302b94f397a4a65a660d4cd24267439eb16493fb3caad2e4389bccbb"},
+    {file = "numpy-1.26.3-cp39-cp39-win32.whl", hash = "sha256:9bc6d1a7f8cedd519c4b7b1156d98e051b726bf160715b769106661d567b3f03"},
+    {file = "numpy-1.26.3-cp39-cp39-win_amd64.whl", hash = "sha256:867e3644e208c8922a3be26fc6bbf112a035f50f0a86497f98f228c50c607bb2"},
+    {file = "numpy-1.26.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3c67423b3703f8fbd90f5adaa37f85b5794d3366948efe9a5190a5f3a83fc34e"},
+    {file = "numpy-1.26.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46f47ee566d98849323f01b349d58f2557f02167ee301e5e28809a8c0e27a2d0"},
+    {file = "numpy-1.26.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a8474703bffc65ca15853d5fd4d06b18138ae90c17c8d12169968e998e448bb5"},
+    {file = "numpy-1.26.3.tar.gz", hash = "sha256:697df43e2b6310ecc9d95f05d5ef20eacc09c7c4ecc9da3f235d39e71b7da1e4"},
 ]
 
 [[package]]
@@ -1923,13 +1941,13 @@ files = [
 
 [[package]]
 name = "openai"
-version = "1.6.1"
+version = "1.7.1"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.7.1"
 files = [
-    {file = "openai-1.6.1-py3-none-any.whl", hash = "sha256:bc9f774838d67ac29fb24cdeb2d58faf57de8b311085dcd1348f7aa02a96c7ee"},
-    {file = "openai-1.6.1.tar.gz", hash = "sha256:d553ca9dbf9486b08e75b09e8671e4f638462aaadccfced632bf490fc3d75fa2"},
+    {file = "openai-1.7.1-py3-none-any.whl", hash = "sha256:e52ad7ea015331edc584e6e9c98741c819d7ffbbd2ecc50bf1f55c33f9cb3f77"},
+    {file = "openai-1.7.1.tar.gz", hash = "sha256:7556e6aa30e20254b1ad68de49bb5ef4d8106bfac5e8a78abdc1daa911fbb1fb"},
 ]
 
 [package.dependencies]
@@ -2035,70 +2053,88 @@ files = [
 
 [[package]]
 name = "pillow"
-version = "10.1.0"
+version = "10.2.0"
 description = "Python Imaging Library (Fork)"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "Pillow-10.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1ab05f3db77e98f93964697c8efc49c7954b08dd61cff526b7f2531a22410106"},
-    {file = "Pillow-10.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6932a7652464746fcb484f7fc3618e6503d2066d853f68a4bd97193a3996e273"},
-    {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f63b5a68daedc54c7c3464508d8c12075e56dcfbd42f8c1bf40169061ae666"},
-    {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0949b55eb607898e28eaccb525ab104b2d86542a85c74baf3a6dc24002edec2"},
-    {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ae88931f93214777c7a3aa0a8f92a683f83ecde27f65a45f95f22d289a69e593"},
-    {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b0eb01ca85b2361b09480784a7931fc648ed8b7836f01fb9241141b968feb1db"},
-    {file = "Pillow-10.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d27b5997bdd2eb9fb199982bb7eb6164db0426904020dc38c10203187ae2ff2f"},
-    {file = "Pillow-10.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7df5608bc38bd37ef585ae9c38c9cd46d7c81498f086915b0f97255ea60c2818"},
-    {file = "Pillow-10.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:41f67248d92a5e0a2076d3517d8d4b1e41a97e2df10eb8f93106c89107f38b57"},
-    {file = "Pillow-10.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1fb29c07478e6c06a46b867e43b0bcdb241b44cc52be9bc25ce5944eed4648e7"},
-    {file = "Pillow-10.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2cdc65a46e74514ce742c2013cd4a2d12e8553e3a2563c64879f7c7e4d28bce7"},
-    {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50d08cd0a2ecd2a8657bd3d82c71efd5a58edb04d9308185d66c3a5a5bed9610"},
-    {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:062a1610e3bc258bff2328ec43f34244fcec972ee0717200cb1425214fe5b839"},
-    {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:61f1a9d247317fa08a308daaa8ee7b3f760ab1809ca2da14ecc88ae4257d6172"},
-    {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a646e48de237d860c36e0db37ecaecaa3619e6f3e9d5319e527ccbc8151df061"},
-    {file = "Pillow-10.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:47e5bf85b80abc03be7455c95b6d6e4896a62f6541c1f2ce77a7d2bb832af262"},
-    {file = "Pillow-10.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a92386125e9ee90381c3369f57a2a50fa9e6aa8b1cf1d9c4b200d41a7dd8e992"},
-    {file = "Pillow-10.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:0f7c276c05a9767e877a0b4c5050c8bee6a6d960d7f0c11ebda6b99746068c2a"},
-    {file = "Pillow-10.1.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:a89b8312d51715b510a4fe9fc13686283f376cfd5abca8cd1c65e4c76e21081b"},
-    {file = "Pillow-10.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:00f438bb841382b15d7deb9a05cc946ee0f2c352653c7aa659e75e592f6fa17d"},
-    {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d929a19f5469b3f4df33a3df2983db070ebb2088a1e145e18facbc28cae5b27"},
-    {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a92109192b360634a4489c0c756364c0c3a2992906752165ecb50544c251312"},
-    {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:0248f86b3ea061e67817c47ecbe82c23f9dd5d5226200eb9090b3873d3ca32de"},
-    {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9882a7451c680c12f232a422730f986a1fcd808da0fd428f08b671237237d651"},
-    {file = "Pillow-10.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1c3ac5423c8c1da5928aa12c6e258921956757d976405e9467c5f39d1d577a4b"},
-    {file = "Pillow-10.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:806abdd8249ba3953c33742506fe414880bad78ac25cc9a9b1c6ae97bedd573f"},
-    {file = "Pillow-10.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:eaed6977fa73408b7b8a24e8b14e59e1668cfc0f4c40193ea7ced8e210adf996"},
-    {file = "Pillow-10.1.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:fe1e26e1ffc38be097f0ba1d0d07fcade2bcfd1d023cda5b29935ae8052bd793"},
-    {file = "Pillow-10.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7a7e3daa202beb61821c06d2517428e8e7c1aab08943e92ec9e5755c2fc9ba5e"},
-    {file = "Pillow-10.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24fadc71218ad2b8ffe437b54876c9382b4a29e030a05a9879f615091f42ffc2"},
-    {file = "Pillow-10.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa1d323703cfdac2036af05191b969b910d8f115cf53093125e4058f62012c9a"},
-    {file = "Pillow-10.1.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:912e3812a1dbbc834da2b32299b124b5ddcb664ed354916fd1ed6f193f0e2d01"},
-    {file = "Pillow-10.1.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:7dbaa3c7de82ef37e7708521be41db5565004258ca76945ad74a8e998c30af8d"},
-    {file = "Pillow-10.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9d7bc666bd8c5a4225e7ac71f2f9d12466ec555e89092728ea0f5c0c2422ea80"},
-    {file = "Pillow-10.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:baada14941c83079bf84c037e2d8b7506ce201e92e3d2fa0d1303507a8538212"},
-    {file = "Pillow-10.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:2ef6721c97894a7aa77723740a09547197533146fba8355e86d6d9a4a1056b14"},
-    {file = "Pillow-10.1.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0a026c188be3b443916179f5d04548092e253beb0c3e2ee0a4e2cdad72f66099"},
-    {file = "Pillow-10.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:04f6f6149f266a100374ca3cc368b67fb27c4af9f1cc8cb6306d849dcdf12616"},
-    {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb40c011447712d2e19cc261c82655f75f32cb724788df315ed992a4d65696bb"},
-    {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a8413794b4ad9719346cd9306118450b7b00d9a15846451549314a58ac42219"},
-    {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:c9aeea7b63edb7884b031a35305629a7593272b54f429a9869a4f63a1bf04c34"},
-    {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b4005fee46ed9be0b8fb42be0c20e79411533d1fd58edabebc0dd24626882cfd"},
-    {file = "Pillow-10.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4d0152565c6aa6ebbfb1e5d8624140a440f2b99bf7afaafbdbf6430426497f28"},
-    {file = "Pillow-10.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d921bc90b1defa55c9917ca6b6b71430e4286fc9e44c55ead78ca1a9f9eba5f2"},
-    {file = "Pillow-10.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cfe96560c6ce2f4c07d6647af2d0f3c54cc33289894ebd88cfbb3bcd5391e256"},
-    {file = "Pillow-10.1.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:937bdc5a7f5343d1c97dc98149a0be7eb9704e937fe3dc7140e229ae4fc572a7"},
-    {file = "Pillow-10.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1c25762197144e211efb5f4e8ad656f36c8d214d390585d1d21281f46d556ba"},
-    {file = "Pillow-10.1.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:afc8eef765d948543a4775f00b7b8c079b3321d6b675dde0d02afa2ee23000b4"},
-    {file = "Pillow-10.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:883f216eac8712b83a63f41b76ddfb7b2afab1b74abbb413c5df6680f071a6b9"},
-    {file = "Pillow-10.1.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b920e4d028f6442bea9a75b7491c063f0b9a3972520731ed26c83e254302eb1e"},
-    {file = "Pillow-10.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c41d960babf951e01a49c9746f92c5a7e0d939d1652d7ba30f6b3090f27e412"},
-    {file = "Pillow-10.1.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1fafabe50a6977ac70dfe829b2d5735fd54e190ab55259ec8aea4aaea412fa0b"},
-    {file = "Pillow-10.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3b834f4b16173e5b92ab6566f0473bfb09f939ba14b23b8da1f54fa63e4b623f"},
-    {file = "Pillow-10.1.0.tar.gz", hash = "sha256:e6bf8de6c36ed96c86ea3b6e1d5273c53f46ef518a062464cd7ef5dd2cf92e38"},
+    {file = "pillow-10.2.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:7823bdd049099efa16e4246bdf15e5a13dbb18a51b68fa06d6c1d4d8b99a796e"},
+    {file = "pillow-10.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:83b2021f2ade7d1ed556bc50a399127d7fb245e725aa0113ebd05cfe88aaf588"},
+    {file = "pillow-10.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fad5ff2f13d69b7e74ce5b4ecd12cc0ec530fcee76356cac6742785ff71c452"},
+    {file = "pillow-10.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da2b52b37dad6d9ec64e653637a096905b258d2fc2b984c41ae7d08b938a67e4"},
+    {file = "pillow-10.2.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:47c0995fc4e7f79b5cfcab1fc437ff2890b770440f7696a3ba065ee0fd496563"},
+    {file = "pillow-10.2.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:322bdf3c9b556e9ffb18f93462e5f749d3444ce081290352c6070d014c93feb2"},
+    {file = "pillow-10.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:51f1a1bffc50e2e9492e87d8e09a17c5eea8409cda8d3f277eb6edc82813c17c"},
+    {file = "pillow-10.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:69ffdd6120a4737710a9eee73e1d2e37db89b620f702754b8f6e62594471dee0"},
+    {file = "pillow-10.2.0-cp310-cp310-win32.whl", hash = "sha256:c6dafac9e0f2b3c78df97e79af707cdc5ef8e88208d686a4847bab8266870023"},
+    {file = "pillow-10.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:aebb6044806f2e16ecc07b2a2637ee1ef67a11840a66752751714a0d924adf72"},
+    {file = "pillow-10.2.0-cp310-cp310-win_arm64.whl", hash = "sha256:7049e301399273a0136ff39b84c3678e314f2158f50f517bc50285fb5ec847ad"},
+    {file = "pillow-10.2.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:35bb52c37f256f662abdfa49d2dfa6ce5d93281d323a9af377a120e89a9eafb5"},
+    {file = "pillow-10.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c23f307202661071d94b5e384e1e1dc7dfb972a28a2310e4ee16103e66ddb67"},
+    {file = "pillow-10.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:773efe0603db30c281521a7c0214cad7836c03b8ccff897beae9b47c0b657d61"},
+    {file = "pillow-10.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11fa2e5984b949b0dd6d7a94d967743d87c577ff0b83392f17cb3990d0d2fd6e"},
+    {file = "pillow-10.2.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:716d30ed977be8b37d3ef185fecb9e5a1d62d110dfbdcd1e2a122ab46fddb03f"},
+    {file = "pillow-10.2.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a086c2af425c5f62a65e12fbf385f7c9fcb8f107d0849dba5839461a129cf311"},
+    {file = "pillow-10.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c8de2789052ed501dd829e9cae8d3dcce7acb4777ea4a479c14521c942d395b1"},
+    {file = "pillow-10.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:609448742444d9290fd687940ac0b57fb35e6fd92bdb65386e08e99af60bf757"},
+    {file = "pillow-10.2.0-cp311-cp311-win32.whl", hash = "sha256:823ef7a27cf86df6597fa0671066c1b596f69eba53efa3d1e1cb8b30f3533068"},
+    {file = "pillow-10.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:1da3b2703afd040cf65ec97efea81cfba59cdbed9c11d8efc5ab09df9509fc56"},
+    {file = "pillow-10.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:edca80cbfb2b68d7b56930b84a0e45ae1694aeba0541f798e908a49d66b837f1"},
+    {file = "pillow-10.2.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:1b5e1b74d1bd1b78bc3477528919414874748dd363e6272efd5abf7654e68bef"},
+    {file = "pillow-10.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0eae2073305f451d8ecacb5474997c08569fb4eb4ac231ffa4ad7d342fdc25ac"},
+    {file = "pillow-10.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7c2286c23cd350b80d2fc9d424fc797575fb16f854b831d16fd47ceec078f2c"},
+    {file = "pillow-10.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e23412b5c41e58cec602f1135c57dfcf15482013ce6e5f093a86db69646a5aa"},
+    {file = "pillow-10.2.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:52a50aa3fb3acb9cf7213573ef55d31d6eca37f5709c69e6858fe3bc04a5c2a2"},
+    {file = "pillow-10.2.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:127cee571038f252a552760076407f9cff79761c3d436a12af6000cd182a9d04"},
+    {file = "pillow-10.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:8d12251f02d69d8310b046e82572ed486685c38f02176bd08baf216746eb947f"},
+    {file = "pillow-10.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:54f1852cd531aa981bc0965b7d609f5f6cc8ce8c41b1139f6ed6b3c54ab82bfb"},
+    {file = "pillow-10.2.0-cp312-cp312-win32.whl", hash = "sha256:257d8788df5ca62c980314053197f4d46eefedf4e6175bc9412f14412ec4ea2f"},
+    {file = "pillow-10.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:154e939c5f0053a383de4fd3d3da48d9427a7e985f58af8e94d0b3c9fcfcf4f9"},
+    {file = "pillow-10.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:f379abd2f1e3dddb2b61bc67977a6b5a0a3f7485538bcc6f39ec76163891ee48"},
+    {file = "pillow-10.2.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:8373c6c251f7ef8bda6675dd6d2b3a0fcc31edf1201266b5cf608b62a37407f9"},
+    {file = "pillow-10.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:870ea1ada0899fd0b79643990809323b389d4d1d46c192f97342eeb6ee0b8483"},
+    {file = "pillow-10.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4b6b1e20608493548b1f32bce8cca185bf0480983890403d3b8753e44077129"},
+    {file = "pillow-10.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3031709084b6e7852d00479fd1d310b07d0ba82765f973b543c8af5061cf990e"},
+    {file = "pillow-10.2.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:3ff074fc97dd4e80543a3e91f69d58889baf2002b6be64347ea8cf5533188213"},
+    {file = "pillow-10.2.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:cb4c38abeef13c61d6916f264d4845fab99d7b711be96c326b84df9e3e0ff62d"},
+    {file = "pillow-10.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b1b3020d90c2d8e1dae29cf3ce54f8094f7938460fb5ce8bc5c01450b01fbaf6"},
+    {file = "pillow-10.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:170aeb00224ab3dc54230c797f8404507240dd868cf52066f66a41b33169bdbe"},
+    {file = "pillow-10.2.0-cp38-cp38-win32.whl", hash = "sha256:c4225f5220f46b2fde568c74fca27ae9771536c2e29d7c04f4fb62c83275ac4e"},
+    {file = "pillow-10.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:0689b5a8c5288bc0504d9fcee48f61a6a586b9b98514d7d29b840143d6734f39"},
+    {file = "pillow-10.2.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:b792a349405fbc0163190fde0dc7b3fef3c9268292586cf5645598b48e63dc67"},
+    {file = "pillow-10.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c570f24be1e468e3f0ce7ef56a89a60f0e05b30a3669a459e419c6eac2c35364"},
+    {file = "pillow-10.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8ecd059fdaf60c1963c58ceb8997b32e9dc1b911f5da5307aab614f1ce5c2fb"},
+    {file = "pillow-10.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c365fd1703040de1ec284b176d6af5abe21b427cb3a5ff68e0759e1e313a5e7e"},
+    {file = "pillow-10.2.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:70c61d4c475835a19b3a5aa42492409878bbca7438554a1f89d20d58a7c75c01"},
+    {file = "pillow-10.2.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b6f491cdf80ae540738859d9766783e3b3c8e5bd37f5dfa0b76abdecc5081f13"},
+    {file = "pillow-10.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9d189550615b4948f45252d7f005e53c2040cea1af5b60d6f79491a6e147eef7"},
+    {file = "pillow-10.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:49d9ba1ed0ef3e061088cd1e7538a0759aab559e2e0a80a36f9fd9d8c0c21591"},
+    {file = "pillow-10.2.0-cp39-cp39-win32.whl", hash = "sha256:babf5acfede515f176833ed6028754cbcd0d206f7f614ea3447d67c33be12516"},
+    {file = "pillow-10.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:0304004f8067386b477d20a518b50f3fa658a28d44e4116970abfcd94fac34a8"},
+    {file = "pillow-10.2.0-cp39-cp39-win_arm64.whl", hash = "sha256:0fb3e7fc88a14eacd303e90481ad983fd5b69c761e9e6ef94c983f91025da869"},
+    {file = "pillow-10.2.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:322209c642aabdd6207517e9739c704dc9f9db943015535783239022002f054a"},
+    {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3eedd52442c0a5ff4f887fab0c1c0bb164d8635b32c894bc1faf4c618dd89df2"},
+    {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb28c753fd5eb3dd859b4ee95de66cc62af91bcff5db5f2571d32a520baf1f04"},
+    {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:33870dc4653c5017bf4c8873e5488d8f8d5f8935e2f1fb9a2208c47cdd66efd2"},
+    {file = "pillow-10.2.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3c31822339516fb3c82d03f30e22b1d038da87ef27b6a78c9549888f8ceda39a"},
+    {file = "pillow-10.2.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a2b56ba36e05f973d450582fb015594aaa78834fefe8dfb8fcd79b93e64ba4c6"},
+    {file = "pillow-10.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:d8e6aeb9201e655354b3ad049cb77d19813ad4ece0df1249d3c793de3774f8c7"},
+    {file = "pillow-10.2.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:2247178effb34a77c11c0e8ac355c7a741ceca0a732b27bf11e747bbc950722f"},
+    {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15587643b9e5eb26c48e49a7b33659790d28f190fc514a322d55da2fb5c2950e"},
+    {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753cd8f2086b2b80180d9b3010dd4ed147efc167c90d3bf593fe2af21265e5a5"},
+    {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:7c8f97e8e7a9009bcacbe3766a36175056c12f9a44e6e6f2d5caad06dcfbf03b"},
+    {file = "pillow-10.2.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d1b35bcd6c5543b9cb547dee3150c93008f8dd0f1fef78fc0cd2b141c5baf58a"},
+    {file = "pillow-10.2.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe4c15f6c9285dc54ce6553a3ce908ed37c8f3825b5a51a15c91442bb955b868"},
+    {file = "pillow-10.2.0.tar.gz", hash = "sha256:e87f0b2c78157e12d7686b27d63c070fd65d994e8ddae6f328e0dcf4a0cd007e"},
 ]
 
 [package.extras]
 docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"]
+fpx = ["olefile"]
+mic = ["olefile"]
 tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
+typing = ["typing-extensions"]
+xmp = ["defusedxml"]
 
 [[package]]
 name = "platformdirs"
@@ -2151,22 +2187,22 @@ tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "p
 
 [[package]]
 name = "protobuf"
-version = "4.25.1"
+version = "4.25.2"
 description = ""
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "protobuf-4.25.1-cp310-abi3-win32.whl", hash = "sha256:193f50a6ab78a970c9b4f148e7c750cfde64f59815e86f686c22e26b4fe01ce7"},
-    {file = "protobuf-4.25.1-cp310-abi3-win_amd64.whl", hash = "sha256:3497c1af9f2526962f09329fd61a36566305e6c72da2590ae0d7d1322818843b"},
-    {file = "protobuf-4.25.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:0bf384e75b92c42830c0a679b0cd4d6e2b36ae0cf3dbb1e1dfdda48a244f4bcd"},
-    {file = "protobuf-4.25.1-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:0f881b589ff449bf0b931a711926e9ddaad3b35089cc039ce1af50b21a4ae8cb"},
-    {file = "protobuf-4.25.1-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:ca37bf6a6d0046272c152eea90d2e4ef34593aaa32e8873fc14c16440f22d4b7"},
-    {file = "protobuf-4.25.1-cp38-cp38-win32.whl", hash = "sha256:abc0525ae2689a8000837729eef7883b9391cd6aa7950249dcf5a4ede230d5dd"},
-    {file = "protobuf-4.25.1-cp38-cp38-win_amd64.whl", hash = "sha256:1484f9e692091450e7edf418c939e15bfc8fc68856e36ce399aed6889dae8bb0"},
-    {file = "protobuf-4.25.1-cp39-cp39-win32.whl", hash = "sha256:8bdbeaddaac52d15c6dce38c71b03038ef7772b977847eb6d374fc86636fa510"},
-    {file = "protobuf-4.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:becc576b7e6b553d22cbdf418686ee4daa443d7217999125c045ad56322dda10"},
-    {file = "protobuf-4.25.1-py3-none-any.whl", hash = "sha256:a19731d5e83ae4737bb2a089605e636077ac001d18781b3cf489b9546c7c80d6"},
-    {file = "protobuf-4.25.1.tar.gz", hash = "sha256:57d65074b4f5baa4ab5da1605c02be90ac20c8b40fb137d6a8df9f416b0d0ce2"},
+    {file = "protobuf-4.25.2-cp310-abi3-win32.whl", hash = "sha256:b50c949608682b12efb0b2717f53256f03636af5f60ac0c1d900df6213910fd6"},
+    {file = "protobuf-4.25.2-cp310-abi3-win_amd64.whl", hash = "sha256:8f62574857ee1de9f770baf04dde4165e30b15ad97ba03ceac65f760ff018ac9"},
+    {file = "protobuf-4.25.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:2db9f8fa64fbdcdc93767d3cf81e0f2aef176284071507e3ede160811502fd3d"},
+    {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:10894a2885b7175d3984f2be8d9850712c57d5e7587a2410720af8be56cdaf62"},
+    {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:fc381d1dd0516343f1440019cedf08a7405f791cd49eef4ae1ea06520bc1c020"},
+    {file = "protobuf-4.25.2-cp38-cp38-win32.whl", hash = "sha256:33a1aeef4b1927431d1be780e87b641e322b88d654203a9e9d93f218ee359e61"},
+    {file = "protobuf-4.25.2-cp38-cp38-win_amd64.whl", hash = "sha256:47f3de503fe7c1245f6f03bea7e8d3ec11c6c4a2ea9ef910e3221c8a15516d62"},
+    {file = "protobuf-4.25.2-cp39-cp39-win32.whl", hash = "sha256:5e5c933b4c30a988b52e0b7c02641760a5ba046edc5e43d3b94a74c9fc57c1b3"},
+    {file = "protobuf-4.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:d66a769b8d687df9024f2985d5137a337f957a0916cf5464d1513eee96a63ff0"},
+    {file = "protobuf-4.25.2-py3-none-any.whl", hash = "sha256:a8b7a98d4ce823303145bf3c1a8bdb0f2f4642a414b196f04ad9853ed0c8f830"},
+    {file = "protobuf-4.25.2.tar.gz", hash = "sha256:fe599e175cb347efc8ee524bcd4b902d11f7262c0e569ececcb89995c15f0a5e"},
 ]
 
 [[package]]
@@ -3051,13 +3087,13 @@ files = [
 
 [[package]]
 name = "sentry-sdk"
-version = "1.39.1"
+version = "1.39.2"
 description = "Python client for Sentry (https://sentry.io)"
 optional = false
 python-versions = "*"
 files = [
-    {file = "sentry-sdk-1.39.1.tar.gz", hash = "sha256:320a55cdf9da9097a0bead239c35b7e61f53660ef9878861824fd6d9b2eaf3b5"},
-    {file = "sentry_sdk-1.39.1-py2.py3-none-any.whl", hash = "sha256:81b5b9ffdd1a374e9eb0c053b5d2012155db9cbe76393a8585677b753bd5fdc1"},
+    {file = "sentry-sdk-1.39.2.tar.gz", hash = "sha256:24c83b0b41c887d33328a9166f5950dc37ad58f01c9f2fbff6b87a6f1094170c"},
+    {file = "sentry_sdk-1.39.2-py2.py3-none-any.whl", hash = "sha256:acaf597b30258fc7663063b291aa99e58f3096e91fe1e6634f4b79f9c1943e8e"},
 ]
 
 [package.dependencies]
@@ -3083,7 +3119,7 @@ huey = ["huey (>=2)"]
 loguru = ["loguru (>=0.5)"]
 opentelemetry = ["opentelemetry-distro (>=0.35b0)"]
 opentelemetry-experimental = ["opentelemetry-distro (>=0.40b0,<1.0)", "opentelemetry-instrumentation-aiohttp-client (>=0.40b0,<1.0)", "opentelemetry-instrumentation-django (>=0.40b0,<1.0)", "opentelemetry-instrumentation-fastapi (>=0.40b0,<1.0)", "opentelemetry-instrumentation-flask (>=0.40b0,<1.0)", "opentelemetry-instrumentation-requests (>=0.40b0,<1.0)", "opentelemetry-instrumentation-sqlite3 (>=0.40b0,<1.0)", "opentelemetry-instrumentation-urllib (>=0.40b0,<1.0)"]
-pure-eval = ["asttokens", "executing", "pure-eval"]
+pure-eval = ["asttokens", "executing", "pure_eval"]
 pymongo = ["pymongo (>=3.1)"]
 pyspark = ["pyspark (>=2.4.4)"]
 quart = ["blinker (>=1.1)", "quart (>=0.16.1)"]
@@ -3145,65 +3181,65 @@ files = [
 
 [[package]]
 name = "sqlalchemy"
-version = "2.0.24"
+version = "2.0.25"
 description = "Database Abstraction Library"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "SQLAlchemy-2.0.24-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5f801d85ba4753d4ed97181d003e5d3fa330ac7c4587d131f61d7f968f416862"},
-    {file = "SQLAlchemy-2.0.24-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b35c35e3923ade1e7ac44e150dec29f5863513246c8bf85e2d7d313e3832bcfb"},
-    {file = "SQLAlchemy-2.0.24-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d9b3fd5eca3c0b137a5e0e468e24ca544ed8ca4783e0e55341b7ed2807518ee"},
-    {file = "SQLAlchemy-2.0.24-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a6209e689d0ff206c40032b6418e3cfcfc5af044b3f66e381d7f1ae301544b4"},
-    {file = "SQLAlchemy-2.0.24-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:37e89d965b52e8b20571b5d44f26e2124b26ab63758bf1b7598a0e38fb2c4005"},
-    {file = "SQLAlchemy-2.0.24-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c6910eb4ea90c0889f363965cd3c8c45a620ad27b526a7899f0054f6c1b9219e"},
-    {file = "SQLAlchemy-2.0.24-cp310-cp310-win32.whl", hash = "sha256:d8e7e8a150e7b548e7ecd6ebb9211c37265991bf2504297d9454e01b58530fc6"},
-    {file = "SQLAlchemy-2.0.24-cp310-cp310-win_amd64.whl", hash = "sha256:396f05c552f7fa30a129497c41bef5b4d1423f9af8fe4df0c3dcd38f3e3b9a14"},
-    {file = "SQLAlchemy-2.0.24-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:adbd67dac4ebf54587198b63cd30c29fd7eafa8c0cab58893d9419414f8efe4b"},
-    {file = "SQLAlchemy-2.0.24-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a0f611b431b84f55779cbb7157257d87b4a2876b067c77c4f36b15e44ced65e2"},
-    {file = "SQLAlchemy-2.0.24-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56a0e90a959e18ac5f18c80d0cad9e90cb09322764f536e8a637426afb1cae2f"},
-    {file = "SQLAlchemy-2.0.24-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6db686a1d9f183c639f7e06a2656af25d4ed438eda581de135d15569f16ace33"},
-    {file = "SQLAlchemy-2.0.24-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f0cc0b486a56dff72dddae6b6bfa7ff201b0eeac29d4bc6f0e9725dc3c360d71"},
-    {file = "SQLAlchemy-2.0.24-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4a1d4856861ba9e73bac05030cec5852eabfa9ef4af8e56c19d92de80d46fc34"},
-    {file = "SQLAlchemy-2.0.24-cp311-cp311-win32.whl", hash = "sha256:a3c2753bf4f48b7a6024e5e8a394af49b1b12c817d75d06942cae03d14ff87b3"},
-    {file = "SQLAlchemy-2.0.24-cp311-cp311-win_amd64.whl", hash = "sha256:38732884eabc64982a09a846bacf085596ff2371e4e41d20c0734f7e50525d01"},
-    {file = "SQLAlchemy-2.0.24-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9f992e0f916201731993eab8502912878f02287d9f765ef843677ff118d0e0b1"},
-    {file = "SQLAlchemy-2.0.24-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2587e108463cc2e5b45a896b2e7cc8659a517038026922a758bde009271aed11"},
-    {file = "SQLAlchemy-2.0.24-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bb7cedcddffca98c40bb0becd3423e293d1fef442b869da40843d751785beb3"},
-    {file = "SQLAlchemy-2.0.24-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83fa6df0e035689df89ff77a46bf8738696785d3156c2c61494acdcddc75c69d"},
-    {file = "SQLAlchemy-2.0.24-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:cc889fda484d54d0b31feec409406267616536d048a450fc46943e152700bb79"},
-    {file = "SQLAlchemy-2.0.24-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57ef6f2cb8b09a042d0dbeaa46a30f2df5dd1e1eb889ba258b0d5d7d6011b81c"},
-    {file = "SQLAlchemy-2.0.24-cp312-cp312-win32.whl", hash = "sha256:ea490564435b5b204d8154f0e18387b499ea3cedc1e6af3b3a2ab18291d85aa7"},
-    {file = "SQLAlchemy-2.0.24-cp312-cp312-win_amd64.whl", hash = "sha256:ccfd336f96d4c9bbab0309f2a565bf15c468c2d8b2d277a32f89c5940f71fcf9"},
-    {file = "SQLAlchemy-2.0.24-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9aaaaa846b10dfbe1bda71079d0e31a7e2cebedda9409fa7dba3dfed1ae803e8"},
-    {file = "SQLAlchemy-2.0.24-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95bae3d38f8808d79072da25d5e5a6095f36fe1f9d6c614dd72c59ca8397c7c0"},
-    {file = "SQLAlchemy-2.0.24-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a04191a7c8d77e63f6fc1e8336d6c6e93176c0c010833e74410e647f0284f5a1"},
-    {file = "SQLAlchemy-2.0.24-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:acc58b7c2e40235712d857fdfc8f2bda9608f4a850d8d9ac0dd1fc80939ca6ac"},
-    {file = "SQLAlchemy-2.0.24-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:00d76fe5d7cdb5d84d625ce002ce29fefba0bfd98e212ae66793fed30af73931"},
-    {file = "SQLAlchemy-2.0.24-cp37-cp37m-win32.whl", hash = "sha256:29e51f848f843bbd75d74ae64ab1ab06302cb1dccd4549d1f5afe6b4a946edb2"},
-    {file = "SQLAlchemy-2.0.24-cp37-cp37m-win_amd64.whl", hash = "sha256:e9d036e343a604db3f5a6c33354018a84a1d3f6dcae3673358b404286204798c"},
-    {file = "SQLAlchemy-2.0.24-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9bafaa05b19dc07fa191c1966c5e852af516840b0d7b46b7c3303faf1a349bc9"},
-    {file = "SQLAlchemy-2.0.24-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e69290b921b7833c04206f233d6814c60bee1d135b09f5ae5d39229de9b46cd4"},
-    {file = "SQLAlchemy-2.0.24-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8398593ccc4440ce6dffcc4f47d9b2d72b9fe7112ac12ea4a44e7d4de364db1"},
-    {file = "SQLAlchemy-2.0.24-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f073321a79c81e1a009218a21089f61d87ee5fa3c9563f6be94f8b41ff181812"},
-    {file = "SQLAlchemy-2.0.24-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9036ebfd934813990c5b9f71f297e77ed4963720db7d7ceec5a3fdb7cd2ef6ce"},
-    {file = "SQLAlchemy-2.0.24-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fcf84fe93397a0f67733aa2a38ed4eab9fc6348189fc950e656e1ea198f45668"},
-    {file = "SQLAlchemy-2.0.24-cp38-cp38-win32.whl", hash = "sha256:6f5e75de91c754365c098ac08c13fdb267577ce954fa239dd49228b573ca88d7"},
-    {file = "SQLAlchemy-2.0.24-cp38-cp38-win_amd64.whl", hash = "sha256:9f29c7f0f4b42337ec5a779e166946a9f86d7d56d827e771b69ecbdf426124ac"},
-    {file = "SQLAlchemy-2.0.24-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:07cc423892f2ceda9ae1daa28c0355757f362ecc7505b1ab1a3d5d8dc1c44ac6"},
-    {file = "SQLAlchemy-2.0.24-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2a479aa1ab199178ff1956b09ca8a0693e70f9c762875d69292d37049ffd0d8f"},
-    {file = "SQLAlchemy-2.0.24-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b8d0e8578e7f853f45f4512b5c920f6a546cd4bed44137460b2a56534644205"},
-    {file = "SQLAlchemy-2.0.24-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17e7e27af178d31b436dda6a596703b02a89ba74a15e2980c35ecd9909eea3a"},
-    {file = "SQLAlchemy-2.0.24-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1ca7903d5e7db791a355b579c690684fac6304478b68efdc7f2ebdcfe770d8d7"},
-    {file = "SQLAlchemy-2.0.24-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db09e424d7bb89b6215a184ca93b4f29d7f00ea261b787918a1af74143b98c06"},
-    {file = "SQLAlchemy-2.0.24-cp39-cp39-win32.whl", hash = "sha256:a5cd7d30e47f87b21362beeb3e86f1b5886e7d9b0294b230dde3d3f4a1591375"},
-    {file = "SQLAlchemy-2.0.24-cp39-cp39-win_amd64.whl", hash = "sha256:7ae5d44517fe81079ce75cf10f96978284a6db2642c5932a69c82dbae09f009a"},
-    {file = "SQLAlchemy-2.0.24-py3-none-any.whl", hash = "sha256:8f358f5cfce04417b6ff738748ca4806fe3d3ae8040fb4e6a0c9a6973ccf9b6e"},
-    {file = "SQLAlchemy-2.0.24.tar.gz", hash = "sha256:6db97656fd3fe3f7e5b077f12fa6adb5feb6e0b567a3e99f47ecf5f7ea0a09e3"},
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4344d059265cc8b1b1be351bfb88749294b87a8b2bbe21dfbe066c4199541ebd"},
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f9e2e59cbcc6ba1488404aad43de005d05ca56e069477b33ff74e91b6319735"},
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84daa0a2055df9ca0f148a64fdde12ac635e30edbca80e87df9b3aaf419e144a"},
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc8b7dabe8e67c4832891a5d322cec6d44ef02f432b4588390017f5cec186a84"},
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f5693145220517b5f42393e07a6898acdfe820e136c98663b971906120549da5"},
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:db854730a25db7c956423bb9fb4bdd1216c839a689bf9cc15fada0a7fb2f4570"},
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-win32.whl", hash = "sha256:14a6f68e8fc96e5e8f5647ef6cda6250c780612a573d99e4d881581432ef1669"},
+    {file = "SQLAlchemy-2.0.25-cp310-cp310-win_amd64.whl", hash = "sha256:87f6e732bccd7dcf1741c00f1ecf33797383128bd1c90144ac8adc02cbb98643"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:342d365988ba88ada8af320d43df4e0b13a694dbd75951f537b2d5e4cb5cd002"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f37c0caf14b9e9b9e8f6dbc81bc56db06acb4363eba5a633167781a48ef036ed"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa9373708763ef46782d10e950b49d0235bfe58facebd76917d3f5cbf5971aed"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d24f571990c05f6b36a396218f251f3e0dda916e0c687ef6fdca5072743208f5"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75432b5b14dc2fff43c50435e248b45c7cdadef73388e5610852b95280ffd0e9"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:884272dcd3ad97f47702965a0e902b540541890f468d24bd1d98bcfe41c3f018"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-win32.whl", hash = "sha256:e607cdd99cbf9bb80391f54446b86e16eea6ad309361942bf88318bcd452363c"},
+    {file = "SQLAlchemy-2.0.25-cp311-cp311-win_amd64.whl", hash = "sha256:7d505815ac340568fd03f719446a589162d55c52f08abd77ba8964fbb7eb5b5f"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0dacf67aee53b16f365c589ce72e766efaabd2b145f9de7c917777b575e3659d"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b801154027107461ee992ff4b5c09aa7cc6ec91ddfe50d02bca344918c3265c6"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59a21853f5daeb50412d459cfb13cb82c089ad4c04ec208cd14dddd99fc23b39"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29049e2c299b5ace92cbed0c1610a7a236f3baf4c6b66eb9547c01179f638ec5"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b64b183d610b424a160b0d4d880995e935208fc043d0302dd29fee32d1ee3f95"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4f7a7d7fcc675d3d85fbf3b3828ecd5990b8d61bd6de3f1b260080b3beccf215"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-win32.whl", hash = "sha256:cf18ff7fc9941b8fc23437cc3e68ed4ebeff3599eec6ef5eebf305f3d2e9a7c2"},
+    {file = "SQLAlchemy-2.0.25-cp312-cp312-win_amd64.whl", hash = "sha256:91f7d9d1c4dd1f4f6e092874c128c11165eafcf7c963128f79e28f8445de82d5"},
+    {file = "SQLAlchemy-2.0.25-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:bb209a73b8307f8fe4fe46f6ad5979649be01607f11af1eb94aa9e8a3aaf77f0"},
+    {file = "SQLAlchemy-2.0.25-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:798f717ae7c806d67145f6ae94dc7c342d3222d3b9a311a784f371a4333212c7"},
+    {file = "SQLAlchemy-2.0.25-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fdd402169aa00df3142149940b3bf9ce7dde075928c1886d9a1df63d4b8de62"},
+    {file = "SQLAlchemy-2.0.25-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:0d3cab3076af2e4aa5693f89622bef7fa770c6fec967143e4da7508b3dceb9b9"},
+    {file = "SQLAlchemy-2.0.25-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:74b080c897563f81062b74e44f5a72fa44c2b373741a9ade701d5f789a10ba23"},
+    {file = "SQLAlchemy-2.0.25-cp37-cp37m-win32.whl", hash = "sha256:87d91043ea0dc65ee583026cb18e1b458d8ec5fc0a93637126b5fc0bc3ea68c4"},
+    {file = "SQLAlchemy-2.0.25-cp37-cp37m-win_amd64.whl", hash = "sha256:75f99202324383d613ddd1f7455ac908dca9c2dd729ec8584c9541dd41822a2c"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:420362338681eec03f53467804541a854617faed7272fe71a1bfdb07336a381e"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c88f0c7dcc5f99bdb34b4fd9b69b93c89f893f454f40219fe923a3a2fd11625"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3be4987e3ee9d9a380b66393b77a4cd6d742480c951a1c56a23c335caca4ce3"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a159111a0f58fb034c93eeba211b4141137ec4b0a6e75789ab7a3ef3c7e7e3"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8b8cb63d3ea63b29074dcd29da4dc6a97ad1349151f2d2949495418fd6e48db9"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:736ea78cd06de6c21ecba7416499e7236a22374561493b456a1f7ffbe3f6cdb4"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-win32.whl", hash = "sha256:10331f129982a19df4284ceac6fe87353ca3ca6b4ca77ff7d697209ae0a5915e"},
+    {file = "SQLAlchemy-2.0.25-cp38-cp38-win_amd64.whl", hash = "sha256:c55731c116806836a5d678a70c84cb13f2cedba920212ba7dcad53260997666d"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:605b6b059f4b57b277f75ace81cc5bc6335efcbcc4ccb9066695e515dbdb3900"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:665f0a3954635b5b777a55111ababf44b4fc12b1f3ba0a435b602b6387ffd7cf"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecf6d4cda1f9f6cb0b45803a01ea7f034e2f1aed9475e883410812d9f9e3cfcf"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c51db269513917394faec5e5c00d6f83829742ba62e2ac4fa5c98d58be91662f"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:790f533fa5c8901a62b6fef5811d48980adeb2f51f1290ade8b5e7ba990ba3de"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1b1180cda6df7af84fe72e4530f192231b1f29a7496951db4ff38dac1687202d"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-win32.whl", hash = "sha256:555651adbb503ac7f4cb35834c5e4ae0819aab2cd24857a123370764dc7d7e24"},
+    {file = "SQLAlchemy-2.0.25-cp39-cp39-win_amd64.whl", hash = "sha256:dc55990143cbd853a5d038c05e79284baedf3e299661389654551bd02a6a68d7"},
+    {file = "SQLAlchemy-2.0.25-py3-none-any.whl", hash = "sha256:a86b4240e67d4753dc3092d9511886795b3c2852abe599cffe108952f7af7ac3"},
+    {file = "SQLAlchemy-2.0.25.tar.gz", hash = "sha256:a2c69a7664fb2d54b8682dd774c3b54f67f84fa123cf84dda2a5f40dcaa04e08"},
 ]
 
 [package.dependencies]
 greenlet = {version = "!=0.4.17", optional = true, markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\" or extra == \"asyncio\""}
-typing-extensions = ">=4.2.0"
+typing-extensions = ">=4.6.0"
 
 [package.extras]
 aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"]
@@ -4055,4 +4091,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.12"
-content-hash = "2b0939b9cce5fa623a932eb89645a6ef3cc7384addd232339faded4a4c35cab7"
+content-hash = "1acc3ce5c08e0d3af483964b46a783bab32b37eb09f762b55d46826514210252"
diff --git a/pyproject.toml b/pyproject.toml
index df3ada338..fbd2d4718 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,6 +37,7 @@ black = "*"
 portalocker = "*"
 openai = "*"
 langchain = "*"
+langchain_openai = "*"
 protobuf = "*"
 typer = "*"
 setuptools = "*"
diff --git a/setup.py b/setup.py
index ab5d4584d..135a26e56 100644
--- a/setup.py
+++ b/setup.py
@@ -26,6 +26,7 @@
         "pytest-xdist",
         "portalocker",
         "langchain",
+        "langchain_openai",
         "rouge_score==0.1.2",
         "nltk==3.8.1",
         "ragas",
diff --git a/tests/test_faithfulness.py b/tests/test_faithfulness.py
index d3595be69..7b3a5c27c 100644
--- a/tests/test_faithfulness.py
+++ b/tests/test_faithfulness.py
@@ -38,7 +38,7 @@
 """
 
 
-# @pytest.mark.skip(reason="openai is expensive")
+@pytest.mark.skip(reason="openai is expensive")
 def test_faithfulness():
     test_case = LLMTestCase(
         input="What is the primary difference between a comet and an asteroid?",
diff --git a/tests/test_ragas.py b/tests/test_ragas.py
index a2e38d306..caa93a868 100644
--- a/tests/test_ragas.py
+++ b/tests/test_ragas.py
@@ -2,17 +2,17 @@
 from deepeval.test_case import LLMTestCase
 from deepeval.metrics import (
     RagasMetric,
-    ContextualPrecisionMetric,
-    ContextualRelevancyMetric,
+    RAGASContextualPrecisionMetric,
+    RAGASContextualRelevancyMetric,
     RAGASFaithfulnessMetric,
-    ContextualRecallMetric,
+    RAGASContextualRecallMetric,
     ConcisenessMetric,
     CorrectnessMetric,
     CoherenceMetric,
     MaliciousnessMetric,
+    RAGASAnswerRelevancyMetric,
 )
-from deepeval.metrics.ragas_metric import RAGASAnswerRelevancyMetric
-from deepeval import assert_test, evaluate
+from deepeval import assert_test
 
 query = "Who won the FIFA World Cup in 2018 and what was the score?"
 output = "Winners of the FIFA world cup were the French national football team"
@@ -51,28 +51,28 @@ def test_everything():
         retrieval_context=context,
         context=context,
     )
-    # metric1 = ContextualRelevancyMetric(model="gpt-4")
-    # metric2 = RAGASFaithfulnessMetric(model="gpt-4")
-    # metric3 = ContextualRecallMetric(model="gpt-4")
-    # metric4 = ConcisenessMetric(model="gpt-4")
-    # metric5 = CorrectnessMetric(model="gpt-4")
-    # metric6 = CoherenceMetric(model="gpt-4")
-    # metric7 = MaliciousnessMetric(model="gpt-4")
-    # metric8 = RAGASAnswerRelevancyMetric(model="gpt-4")
-    metric9 = ContextualPrecisionMetric()
-    # metric10 = RagasMetric()
+    metric1 = RAGASContextualRelevancyMetric(model="gpt-4")
+    metric2 = RAGASFaithfulnessMetric(model="gpt-4")
+    metric3 = RAGASContextualRecallMetric(model="gpt-4")
+    metric4 = ConcisenessMetric(model="gpt-4")
+    metric5 = CorrectnessMetric(model="gpt-4")
+    metric6 = CoherenceMetric(model="gpt-4")
+    metric7 = MaliciousnessMetric(model="gpt-4")
+    metric8 = RAGASAnswerRelevancyMetric(model="gpt-4")
+    metric9 = RAGASContextualPrecisionMetric()
+    metric10 = RagasMetric()
     assert_test(
         test_case,
         [
-            # metric1,
-            # metric2,
-            # metric3,
-            # metric4,
-            # metric5,
-            # metric6,
-            # metric7,
-            # metric8,
+            metric1,
+            metric2,
+            metric3,
+            metric4,
+            metric5,
+            metric6,
+            metric7,
+            metric8,
             metric9,
-            # metric10,
+            metric10,
         ],
     )

From 43675ae5b50b68fa47d2f24108409dbd2aa4b8ce Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 11 Jan 2024 18:36:17 -0800
Subject: [PATCH 37/46] Added threshold

---
 deepeval/test_run/api.py      | 1 +
 deepeval/test_run/test_run.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/deepeval/test_run/api.py b/deepeval/test_run/api.py
index 3a3f37b5b..a80e55cce 100644
--- a/deepeval/test_run/api.py
+++ b/deepeval/test_run/api.py
@@ -6,6 +6,7 @@ class MetricsMetadata(BaseModel):
     metric: str
     score: float
     threshold: float
+    success: bool
     reason: Optional[str] = None
 
 
diff --git a/deepeval/test_run/test_run.py b/deepeval/test_run/test_run.py
index 3a1c41ffa..e382895c7 100644
--- a/deepeval/test_run/test_run.py
+++ b/deepeval/test_run/test_run.py
@@ -93,7 +93,7 @@ def add_llm_test_case(
             score=metric.score,
             threshold=metric.threshold,
             reason=metric.reason,
-            # success=metric.is_successful()
+            success=metric.is_successful(),
         )
 
         if existing_test_case:

From a4d80237bc3970f7a0c7d544a23ce3dab1168c33 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 11 Jan 2024 18:41:12 -0800
Subject: [PATCH 38/46] new release

---
 deepeval/_version.py | 2 +-
 pyproject.toml       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/deepeval/_version.py b/deepeval/_version.py
index ee489183b..3180b565a 100644
--- a/deepeval/_version.py
+++ b/deepeval/_version.py
@@ -1 +1 @@
-__version__: str = "0.20.44"
+__version__: str = "0.20.45"
diff --git a/pyproject.toml b/pyproject.toml
index fbd2d4718..daad81439 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "deepeval"
-version = "0.20.44"
+version = "0.20.45"
 description = "The Evaluation Framework for LLMs"
 authors = ["Jeffrey Ip <jeffreyip@confident-ai.com>"]
 license = "Apache-2.0"

From c6a7a5513536802f99b2007fe8bb2bae2c4bc534 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 11 Jan 2024 18:54:05 -0800
Subject: [PATCH 39/46] Updated docs

---
 docs/docs/metrics-custom.mdx | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/docs/metrics-custom.mdx b/docs/docs/metrics-custom.mdx
index 2cadb8f3e..afe4bb9cb 100644
--- a/docs/docs/metrics-custom.mdx
+++ b/docs/docs/metrics-custom.mdx
@@ -24,14 +24,14 @@ from deepeval.metrics import BaseMetric
 from deepeval.test_case import LLMTestCase
 
 # Inherit BaseMetric
-class LengthMetric(BaseMetric):
-    # This metric checks if the output length is greater than 10 characters
-    def __init__(self, max_length: int=10):
-        self.threshold = max_length
+class LatencyMetric(BaseMetric):
+    # This metric by default checks if the latency is greater than 10 seconds
+    def __init__(self, max_seconds: int=10):
+        self.threshold = max_seconds
 
     def measure(self, test_case: LLMTestCase):
         # Set self.success and self.score in the "measure" method
-        self.success = len(test_case.actual_output) > self.threshold
+        self.success = len(test_case.execution_time) <= self.threshold
         if self.success:
             self.score = 1
         else:
@@ -47,7 +47,7 @@ class LengthMetric(BaseMetric):
 
     @property
     def name(self):
-        return "Length"
+        return "Latency"
 ```
 
 Notice that a few things has happened:

From 02559e27da04a360bead18bdddef2d9d4e8d69ba Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 11 Jan 2024 19:09:47 -0800
Subject: [PATCH 40/46] Fix display logic

---
 deepeval/test_run/test_run.py       | 10 +++++-----
 tests/test_custom_execution_time.py | 29 ---------------------------
 tests/test_custom_metric.py         | 31 +++++++++++++++++------------
 3 files changed, 23 insertions(+), 47 deletions(-)
 delete mode 100644 tests/test_custom_execution_time.py

diff --git a/deepeval/test_run/test_run.py b/deepeval/test_run/test_run.py
index e382895c7..dd82c9523 100644
--- a/deepeval/test_run/test_run.py
+++ b/deepeval/test_run/test_run.py
@@ -88,7 +88,7 @@ def add_llm_test_case(
             test_case_id, None
         )
 
-        metrics_metadata = MetricsMetadata(
+        metric_metadata = MetricsMetadata(
             metric=metric.__name__,
             score=metric.score,
             threshold=metric.threshold,
@@ -98,7 +98,7 @@ def add_llm_test_case(
 
         if existing_test_case:
             # If it exists, append the metrics to the existing test case
-            existing_test_case.metrics_metadata.append(metrics_metadata)
+            existing_test_case.metrics_metadata.append(metric_metadata)
             if metric.is_successful() and existing_test_case.success == True:
                 success = True
             else:
@@ -112,7 +112,7 @@ def add_llm_test_case(
                 actualOutput=test_case.actual_output,
                 expectedOutput=test_case.expected_output,
                 success=metric.is_successful(),
-                metricsMetadata=[metrics_metadata],
+                metricsMetadata=[metric_metadata],
                 runDuration=run_duration,
                 context=test_case.context,
                 retrievalContext=test_case.retrieval_context,
@@ -214,7 +214,7 @@ def display_results_table(self, test_run: TestRun):
                 test_case_name += f" ({test_case.id})"
 
             for metric_metadata in test_case.metrics_metadata:
-                if metric_metadata.score >= metric_metadata.threshold:
+                if metric_metadata.success:
                     pass_count += 1
                 else:
                     fail_count += 1
@@ -228,7 +228,7 @@ def display_results_table(self, test_run: TestRun):
             )
 
             for metric_metadata in test_case.metrics_metadata:
-                if metric_metadata.score >= metric_metadata.threshold:
+                if metric_metadata.success:
                     status = "[green]PASSED[/green]"
                 else:
                     status = "[red]FAILED[/red]"
diff --git a/tests/test_custom_execution_time.py b/tests/test_custom_execution_time.py
deleted file mode 100644
index 488432cad..000000000
--- a/tests/test_custom_execution_time.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# from deepeval.metrics import BaseMetric
-# from deepeval.test_case import LLMTestCase
-# from deepeval import assert_test
-
-# class ExecutionTimeMetric(BaseMetric):
-#     def __init__(self, max_execution_time: float):
-#         self.max_execution_time = max_execution_time
-
-#     def measure(self, test_case: LLMTestCase):
-#         self.success = test_case.execution_time <= self.max_execution_time
-#         if self.success:
-#             self.score = 1
-#         else:
-#             self.score = 0
-
-#         return self.score
-
-#     def is_successful(self):
-#         return self.success
-
-#     @property
-#     def name(self):
-#         return "Execution Time"
-
-
-# def test_execution_time():
-#     test_case = LLMTestCase(input="...", actual_output="...", execution_time=4.57)
-#     execution_time_metric = ExecutionTimeMetric(max_execution_time=5)
-#     assert_test(test_case, [execution_time_metric])
diff --git a/tests/test_custom_metric.py b/tests/test_custom_metric.py
index 407b378e4..9ec77aa9b 100644
--- a/tests/test_custom_metric.py
+++ b/tests/test_custom_metric.py
@@ -6,32 +6,37 @@
 from deepeval import assert_test
 
 
-class LengthMetric(BaseMetric):
-    """This metric checks if the output is more than 3 letters"""
-
-    def __init__(self, threshold: int = 3):
-        self.threshold = threshold
+class LatencyMetric(BaseMetric):
+    # This metric by default checks if the latency is greater than 10 seconds
+    def __init__(self, max_seconds: float = 10):
+        self.threshold = max_seconds
 
     def measure(self, test_case: LLMTestCase):
-        # sends to server
-        text = test_case.actual_output
-        score = len(text)
-        self.success = score > self.threshold
-        # Optional: Logs it to the server
-        return score
+        # Set self.success and self.score in the "measure" method
+        self.success = test_case.execution_time <= self.threshold
+        if self.success:
+            self.score = 1
+        else:
+            self.score = 0
+
+        # You can also set a reason for the score returned.
+        # This is particularly useful for a score computed using LLMs
+        self.reason = None
+        return self.score
 
     def is_successful(self):
         return self.success
 
     @property
     def __name__(self):
-        return "Length"
+        return "Latency"
 
 
 def test_length_metric():
-    metric = LengthMetric()
+    metric = LatencyMetric()
     test_case = LLMTestCase(
         input="placeholder",
         actual_output="This is a long sentence that is more than 3 letters",
+        execution_time=8.3,
     )
     assert_test(test_case, [metric])

From 877842ffc49e3a69dfbe98bf0022f2e93e06f0cf Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 11 Jan 2024 19:12:08 -0800
Subject: [PATCH 41/46] .

---
 docs/docs/metrics-custom.mdx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/docs/metrics-custom.mdx b/docs/docs/metrics-custom.mdx
index afe4bb9cb..229913ed0 100644
--- a/docs/docs/metrics-custom.mdx
+++ b/docs/docs/metrics-custom.mdx
@@ -39,14 +39,14 @@ class LatencyMetric(BaseMetric):
 
         # You can also set a reason for the score returned.
         # This is particularly useful for a score computed using LLMs
-        self.reason = "..."
+        # self.reason = "Too slow!"
         return self.score
 
     def is_successful(self):
         return self.success
 
     @property
-    def name(self):
+    def __name__(self):
         return "Latency"
 ```
 

From b1115a1c54ff946fd04aff472c7be21f14684ac5 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 11 Jan 2024 19:17:54 -0800
Subject: [PATCH 42/46] .

---
 docs/docs/metrics-custom.mdx | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/docs/docs/metrics-custom.mdx b/docs/docs/metrics-custom.mdx
index 229913ed0..8609954fa 100644
--- a/docs/docs/metrics-custom.mdx
+++ b/docs/docs/metrics-custom.mdx
@@ -58,6 +58,17 @@ Notice that a few things has happened:
 - `self.is_successful()` simply returns the success status
 - `name()` simply returns a string representing the metric name
 
-To create a custom metric without unexpected errors, we recommend you set the appropriate class variables in the appropriate methods as outlined above.
+To create a custom metric without unexpected errors, we recommend you set the appropriate class variables in the appropriate methods as outlined above. You should also note that `self.reason` is **optional**. `self.reason` should be a string representing the rationale behind an LLM computed score. This is only applicable if you're using LLMs as an evaluator in the `measure()` method, and has implemented a way to generate a score reasoning.
 
-You should also note that `self.reason` is **optional**. `self.reason` should be a string representing the rationale behind an LLM computed score. This is only applicable if you're using LLMs as an evaluator in the `measure()` method, and has implemented a way to generate a score reasoning.
+After creating a custom metric, you can use it in the same way as all of `deepeval`'s metrics:
+
+```python
+from deepeval import evaluate
+from deepeval.test_case import LLMTestCase
+...
+
+# Note that we pass in execution time since the measure method requires it for evaluation
+latency_metric = LatencyMetric(max_seconds=10.0)
+test_case = LLMTestCase(input="...", actual_output="...", execution_time=8.3)
+evaluate([test_case], [latency_metric])
+```

From dcdc07e16f0fa066d170069b66641d5568df92b7 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 11 Jan 2024 19:49:48 -0800
Subject: [PATCH 43/46] .

---
 docs/docs/evaluation-test-cases.mdx | 49 ++++++++++++++++++++++++++---
 docs/docs/metrics-custom.mdx        | 14 +++++----
 2 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/docs/docs/evaluation-test-cases.mdx b/docs/docs/evaluation-test-cases.mdx
index 270f3be2d..4561404d3 100644
--- a/docs/docs/evaluation-test-cases.mdx
+++ b/docs/docs/evaluation-test-cases.mdx
@@ -13,16 +13,19 @@ A test case is a blueprint provided by `deepeval` to unit test LLM outputs based
 - [Optional] `expected_output`
 - [Optional] `context`
 - [Optional] `retrieval_context`
+- [Optional] `execution_time` (`float`)
+- [Optional] `cost` (`float`)
 
 Except for `actual_output`, all parameters should originate from your evaluation dataset (if you have one). Here's an example implementation of a test case:
 
 ```python
 test_case = LLMTestCase(
     input="What if these shoes don't fit?",
-    expected_output = "You're eligible for a 30 day refund at no extra cost.",
-    actual_output = "We offer a 30-day full refund at no extra cost.",
-    context = ["All customers are eligible for a 30 day full refund at no extra cost."]
-    retrieval_context = ["Only shoes can be refunded."]
+    expected_output="You're eligible for a 30 day refund at no extra cost.",
+    actual_output="We offer a 30-day full refund at no extra cost.",
+    context=["All customers are eligible for a 30 day full refund at no extra cost."],
+    retrieval_context=["Only shoes can be refunded."],
+    execution_time=10.0
 )
 ```
 
@@ -174,6 +177,44 @@ test_case = LLMTestCase(
 Remember, `context` is the ideal retrieval results for a given input and typically come from your evaluation dataset, whereas `retrieval_context` is your LLM application's actual retrieval results.
 :::
 
+## Execution Time
+
+The `execution_time` is an **optional** parameter that represents how long it took your LLM application to finish executing. However, if you're trying to measure something else, like the execution time for only the retrieval part of your RAG pipeline, feel free to supply that number instead of the overall execution time.
+
+```python
+...
+
+test_case = LLMTestCase(
+    input="...",
+    actual_output="...",
+    # Replace this with the actual execution time of your LLM application
+    execution_time=10.4
+)
+```
+
+:::note
+`deepeval` does not offer metrics that evaluate on latency and cost, so feel free to supply the `execution_time` in either seconds, miliseconds, or even nanoseconds. That being said, [here is a full working example](metrics-custom#implementation) of how you can build your own `LatencyMetric` using the `execution_time` parameter.
+:::
+
+## Cost
+
+The `cost` is an **optional** parameter that represents the token cost for a given execution of your LLM application. However, similar to `execution_time`, the `cost` parameter does not strictly have to be the total completion cost (eg., it could be the embedding cost), nor does it have to be in any set currency.
+
+```python
+...
+
+test_case = LLMTestCase(
+    input="...",
+    actual_output="...",
+    # Replace this with the actual execution time of your LLM application
+    cost=0.78
+)
+```
+
+:::info
+`deepeval` does not offer cost and latency metrics because it is difficult to account for all different units and currencies available. We highly encourage you to look at the [custom metrics section](metrics-custom#implementation) for a full example on how to create your own metric if you are looking to evaluate cost and latency.
+:::
+
 ## Run A Test Case
 
 `deepeval` offers an option to quickly run a test case without going through the CLI.
diff --git a/docs/docs/metrics-custom.mdx b/docs/docs/metrics-custom.mdx
index 8609954fa..1621deed6 100644
--- a/docs/docs/metrics-custom.mdx
+++ b/docs/docs/metrics-custom.mdx
@@ -31,15 +31,15 @@ class LatencyMetric(BaseMetric):
 
     def measure(self, test_case: LLMTestCase):
         # Set self.success and self.score in the "measure" method
-        self.success = len(test_case.execution_time) <= self.threshold
+        self.success = test_case.execution_time <= self.threshold
         if self.success:
             self.score = 1
         else:
             self.score = 0
 
-        # You can also set a reason for the score returned.
+        # You can also optionally set a reason for the score returned.
         # This is particularly useful for a score computed using LLMs
-        # self.reason = "Too slow!"
+        self.reason = "Too slow!"
         return self.score
 
     def is_successful(self):
@@ -52,11 +52,11 @@ class LatencyMetric(BaseMetric):
 
 Notice that a few things has happened:
 
-- `self.threshold` was set in `__init__()`
+- `self.threshold` was set in `__init__()`, and this can be either a minimum or maximum threshold
 - `self.success`, `self.score`, and `self.reason` was set in `measure()`
 - `measure()` takes in an `LLMTestCase`
-- `self.is_successful()` simply returns the success status
-- `name()` simply returns a string representing the metric name
+- `is_successful()` simply returns the success status
+- `__name()__` simply returns a string representing the metric name
 
 To create a custom metric without unexpected errors, we recommend you set the appropriate class variables in the appropriate methods as outlined above. You should also note that `self.reason` is **optional**. `self.reason` should be a string representing the rationale behind an LLM computed score. This is only applicable if you're using LLMs as an evaluator in the `measure()` method, and has implemented a way to generate a score reasoning.
 
@@ -65,8 +65,10 @@ After creating a custom metric, you can use it in the same way as all of `deepev
 ```python
 from deepeval import evaluate
 from deepeval.test_case import LLMTestCase
+
 ...
 
+
 # Note that we pass in execution time since the measure method requires it for evaluation
 latency_metric = LatencyMetric(max_seconds=10.0)
 test_case = LLMTestCase(input="...", actual_output="...", execution_time=8.3)

From 6e63b989c6efb418097f5b9c162e84bd3690b79e Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 11 Jan 2024 19:54:14 -0800
Subject: [PATCH 44/46] Updated docs

---
 docs/docs/evaluation-test-cases.mdx | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/docs/docs/evaluation-test-cases.mdx b/docs/docs/evaluation-test-cases.mdx
index 4561404d3..753690541 100644
--- a/docs/docs/evaluation-test-cases.mdx
+++ b/docs/docs/evaluation-test-cases.mdx
@@ -158,8 +158,6 @@ prompt_template = """
 
 prompt = prompt_template.format(text="Who's a good boy?")
 
-context = ["Rocky is a good boy."]
-
 # Replace this with the actual retrieved context from your RAG pipeline
 retrieval_context = ["Rocky is a good cat."]
 
@@ -168,7 +166,6 @@ test_case = LLMTestCase(
     # Replace this with your actual LLM application
     actual_output=chatbot.run(prompt),
     expected_output="Me, ruff!",
-    context=context,
     retrieval_context=retrieval_context
 )
 ```
@@ -182,8 +179,6 @@ Remember, `context` is the ideal retrieval results for a given input and typical
 The `execution_time` is an **optional** parameter that represents how long it took your LLM application to finish executing. However, if you're trying to measure something else, like the execution time for only the retrieval part of your RAG pipeline, feel free to supply that number instead of the overall execution time.
 
 ```python
-...
-
 test_case = LLMTestCase(
     input="...",
     actual_output="...",
@@ -201,8 +196,6 @@ test_case = LLMTestCase(
 The `cost` is an **optional** parameter that represents the token cost for a given execution of your LLM application. However, similar to `execution_time`, the `cost` parameter does not strictly have to be the total completion cost (eg., it could be the embedding cost), nor does it have to be in any set currency.
 
 ```python
-...
-
 test_case = LLMTestCase(
     input="...",
     actual_output="...",

From 7b8e1b4143621e5feef90d8136a9c1f67ae8508d Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <jeffreyip@confident-ai.com>
Date: Thu, 11 Jan 2024 19:59:52 -0800
Subject: [PATCH 45/46] new release

---
 deepeval/_version.py | 2 +-
 pyproject.toml       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/deepeval/_version.py b/deepeval/_version.py
index 3180b565a..2c7d1d32f 100644
--- a/deepeval/_version.py
+++ b/deepeval/_version.py
@@ -1 +1 @@
-__version__: str = "0.20.45"
+__version__: str = "0.20.46"
diff --git a/pyproject.toml b/pyproject.toml
index daad81439..0dc7e540d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "deepeval"
-version = "0.20.45"
+version = "0.20.46"
 description = "The Evaluation Framework for LLMs"
 authors = ["Jeffrey Ip <jeffreyip@confident-ai.com>"]
 license = "Apache-2.0"

From 6b4e6e9ea7cfb192f3cae9f97ac5528e128560a7 Mon Sep 17 00:00:00 2001
From: Jeffrey Ip <143328635+penguine-ip@users.noreply.github.com>
Date: Fri, 12 Jan 2024 09:55:01 -0800
Subject: [PATCH 46/46] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 1e4afff12..62fcfc68a 100644
--- a/README.md
+++ b/README.md
@@ -120,7 +120,7 @@ deepeval test run test_chatbot.py
 Alternatively, you can evaluate without Pytest, which is more suited for a notebook environment.
 
 ```python
-from deepeval import evalate
+from deepeval import evaluate
 from deepeval.metrics import HallucinationMetric
 from deepeval.test_case import LLMTestCase
 
@@ -135,7 +135,7 @@ test_case = LLMTestCase(
     actual_output=actual_output,
     context=context
 )
-evalate([test_case], [hallucination_metric])
+evaluate([test_case], [hallucination_metric])
 ```
 
 ## Evaluting a Dataset / Test Cases in Bulk