Adding experiment_result class for backup

Anindyadeep · Mar 5, 2024 · 9fef28a · 9fef28a
1 parent fc26b5d
commit 9fef28a
Showing 1 changed file with 0 additions and 100 deletions.
diff --git a/deepeval/experimental/harness/experiment.py b/deepeval/experimental/harness/experiment.py
@@ -99,103 +99,3 @@ def run(self, tasks: Union[str, List[str]]):
         print(config_to_pass)
         results = self.evaluator(tasks=tasks, config=config_to_pass)
         return results
-
-
-# class EvaluationResult(BaseModel):
-#     task_name: str = Field(..., description="The name of the task")
-#     saved_path: Union[str, PosixPath] = Field(
-#         ..., description="The evaluation csv path."
-#     )
-#     accuracy: float = Field(
-#         ...,
-#         description="The final value of the number of test cases passed out of total number of test cases.",
-#     )
-#     std_dev: float = Field(..., description="Standard deviation of the results")
-
-
-# class EvaluationResult(BaseEvaluationExperiment):
-#     def __init__(self, task_name: str, task_limit: int) -> None:
-#         self.task_name, self.task_limit = task_name, task_limit
-
-#     def visualize_result(
-#         self, sub_task: Optional[Union[str, List[str]]] = None
-#     ) -> None:
-#         raise NotImplementedError
-
-#     def collect_results_and_push(self, push_to_hub: Optional[bool] = False):
-#         all_task_data = HarnessTasks.get_dataset_from_task(
-#             task_names=self.task_name, task_limit=self.task_limit
-#         )
-#         all_task_results = {}
-#         all_jsonl = os.listdir(self.experiment_folder)
-
-#         for task_name, task_data in all_task_data.items():
-#             test_cases = []
-#             # todo: this is not applicable, need to find the string which matches
-
-#             try:
-#                 closest_match = difflib.get_close_matches(
-#                     task_name, all_jsonl, n=1
-#                 )
-#                 task_jsonl = closest_match[0]
-
-#                 task_evaluation_dict = json.load(
-#                     open(self.experiment_folder / task_jsonl, "r")
-#                 )
-#                 all_responses, is_correct_overall = [], []
-#                 # also collect the accuracy
-#                 scores = []
-#                 all_prompts, all_targets = [], []
-
-#                 for eval_dict in task_evaluation_dict:
-#                     responses = eval_dict["filtered_resps"]
-#                     filtered_responses, is_response_correct = zip(*responses)
-#                     all_responses.extend(list(filtered_responses))
-#                     is_correct_overall.extend(list(is_response_correct))
-#                     scores.append(eval_dict["acc"])
-
-#                 for _, prompt, target, response in zip(
-#                     task_data["doc_id"],
-#                     task_data["prompt"],
-#                     task_data["target"],
-#                     all_responses,
-#                 ):
-#                     test_case = LLMTestCase(
-#                         input=prompt,
-#                         actual_output=str(response),
-#                         expected_output=str(target),
-#                     )
-#                     test_cases.append(test_case)
-#                     all_prompts.append(prompt)
-#                     all_targets.append(target)
-
-#                 dataset = EvaluationDataset(test_cases=test_cases)
-#                 # Provide an alias when pushing a dataset
-#                 dataset.evaluate(
-#                     metrics=[ExactMatchAccuracyMetric(minimum_score=0.5)]
-#                 )
-
-#                 if push_to_hub:
-#                     # this is very unstable, for each task it opens a new window in confident-ai.
-#                     dataset.push(alias=task_name)
-
-#                 # do not save in the memory
-#                 pd.DataFrame(
-#                     {
-#                         "id": list(range(1, len(all_prompts) + 1)),
-#                         "prompt": all_prompts,
-#                         "target": all_targets,
-#                         "response": all_responses,
-#                         "is_correct": is_correct_overall,
-#                     }
-#                 ).to_csv(self.evaluation_csvs_folder / f"{task_name}.csv"),
-
-#                 all_task_results[task_name] = EvaluationResult(
-#                     task_name=task_name,
-#                     saved_path=self.evaluation_csvs_folder / f"{task_name}.csv",
-#                     accuracy=sum(scores) / len(scores),
-#                 )
-#             except Exception as e:
-#                 print(f"Task {task_name} not found or not run.\nError: {e}")
-#                 continue
-#         return all_task_results