autorag -> external_Rag

Signed-off-by: lilacheden <[email protected]>
IBM · Jan 19, 2025 · 7b3fae7 · 7b3fae7
1 parent 3924705
commit 7b3fae7
Show file tree

Hide file tree

Showing 84 changed files with 98 additions and 98 deletions.
diff --git a/prepare/metrics/llm_as_judge/rag_judge.py b/prepare/metrics/llm_as_judge/rag_judge.py
@@ -110,7 +110,7 @@ def get_prediction_field(metric_type):
     realization_sufffix = metric_type_to_realization[metric_type]
     template = f"templates.rag_eval.{metric_type}.{template_name}{realization_sufffix}"
     for inf_label, inference_model in inference_models_v2.items():
-        for rag_unitxt_task in ["autorag", "response_generation", "end_to_end"]:
+        for rag_unitxt_task in ["external_rag", "response_generation", "end_to_end"]:
             if (
                 rag_unitxt_task == "response_generation"
                 and metric_type == "context_relevance"
@@ -119,7 +119,7 @@ def get_prediction_field(metric_type):
 
             judge_to_generator_fields_mapping = (
                 {}
-                if rag_unitxt_task == "autorag"
+                if rag_unitxt_task == "external_rag"
                 else {"ground_truths": "reference_answers"}
             )
 

diff --git a/prepare/metrics/rag_answer_correctness.py b/prepare/metrics/rag_answer_correctness.py
@@ -3,7 +3,7 @@
 from unitxt.operators import Copy, Rename
 from unitxt.test_utils.metrics import test_metric
 
-task_names = ["autorag", "response_generation", "end_to_end"]
+task_names = ["external_rag", "response_generation", "end_to_end"]
 base = "metrics.rag"
 default = "token_recall"
 dimension = "answer_correctness"
@@ -29,7 +29,7 @@ def add_scores_prefix_to_target(target, metric_catalog_name, dim_name):
 
 
 def get_test_pipeline_task_preprocess_steps(task):
-    if task == "autorag":
+    if task == "external_rag":
         return [
             Rename(field_to_field={"task_data/ground_truths": "ground_truths"}),
             Rename(field_to_field={"task_data/answer": "answer"}),
@@ -83,7 +83,7 @@ def test_answer_correctness(
 
 
 def get_preprocess_steps(task):
-    if task == "autorag":
+    if task == "external_rag":
         return [
             Copy(
                 field_to_field={
@@ -137,7 +137,7 @@ def get_preprocess_steps(task):
             overwrite=True,
         )
 
-        if new_catalog_name == default and task == "autorag":
+        if new_catalog_name == default and task == "external_rag":
             metric = MetricPipeline(
                 main_score=main_score,
                 preprocess_steps=preprocess_steps.copy(),

diff --git a/prepare/metrics/rag_answer_relevance.py b/prepare/metrics/rag_answer_relevance.py
@@ -4,7 +4,7 @@
 )
 from unitxt.operators import Copy, ListFieldValues
 
-task_names = ["autorag", "response_generation", "end_to_end"]
+task_names = ["external_rag", "response_generation", "end_to_end"]
 base = "metrics.rag"
 
 
@@ -13,7 +13,7 @@ def get_preprocess_steps(task):
     # We have to wrap the question by a list (otherwise it will be a string),
     # because references are expected to be lists
     last_step = ListFieldValues(fields=["references"], to_field="references")
-    if task == "autorag":
+    if task == "external_rag":
         return [
             Copy(
                 field_to_field={
@@ -55,7 +55,7 @@ def get_preprocess_steps(task):
     add_to_catalog(
         answer_reward, f"{base}.{task}.answer_relevance.answer_reward", overwrite=True
     )
-    if task == "autorag":
+    if task == "external_rag":
         add_to_catalog(answer_reward, f"{base}.{task}.answer_reward", overwrite=True)
 
     answer_token_overlap = MetricPipeline(

diff --git a/prepare/metrics/rag_context_correctness.py b/prepare/metrics/rag_context_correctness.py
@@ -6,7 +6,7 @@
 
 default = "mrr"
 base = "metrics.rag"
-tasks = ["autorag", "end_to_end"]
+tasks = ["external_rag", "end_to_end"]
 dimension = "context_correctness"
 
 
@@ -28,7 +28,7 @@ def add_scores_prefix_to_target(target, metric_catalog_name, dim_name):
 
 
 def get_preprocess_steps(task):
-    if task == "autorag":
+    if task == "external_rag":
         return [
             Copy(field="context_ids", to_field="prediction"),
             Wrap(
@@ -48,7 +48,7 @@ def get_preprocess_steps(task):
 
 
 def get_test_pipeline_task_preprocess_steps(task):
-    if task == "autorag":
+    if task == "external_rag":
         return [
             Rename(field_to_field={"task_data/context_ids": "context_ids"}),
             Rename(
@@ -85,7 +85,7 @@ def get_test_pipeline_task_preprocess_steps(task):
             metric, f"{base}.{task}.{dimension}.{new_catalog_name}", overwrite=True
         )
 
-        if new_catalog_name == default and task == "autorag":
+        if new_catalog_name == default and task == "external_rag":
             add_to_catalog(metric, f"{base}.{task}.{dimension}", overwrite=True)
 
 

diff --git a/prepare/metrics/rag_context_relevance.py b/prepare/metrics/rag_context_relevance.py
@@ -5,13 +5,13 @@
 from unitxt.operators import Copy
 
 base = "metrics.rag"
-tasks = ["autorag", "end_to_end"]
+tasks = ["external_rag", "end_to_end"]
 default = "perplexity_flan_t5_small"
 dimension = "context_relevance"
 
 
 def get_preprocess_steps(task):
-    if task == "autorag":
+    if task == "external_rag":
         return [
             Copy(field="contexts", to_field="references"),
             Copy(field="question", to_field="prediction"),
@@ -45,7 +45,7 @@ def get_preprocess_steps(task):
             metric, f"{base}.{task}.{dimension}.{new_catalog_name}", overwrite=True
         )
 
-        if new_catalog_name == default and task == "autorag":
+        if new_catalog_name == default and task == "external_rag":
             metric = MetricPipeline(
                 main_score=main_score,
                 preprocess_steps=get_preprocess_steps(task).copy(),

diff --git a/prepare/metrics/rag_faithfulness.py b/prepare/metrics/rag_faithfulness.py
@@ -8,7 +8,7 @@
 base = "metrics.rag"
 default = "token_k_precision"
 dimension = "faithfulness"
-task_names = ["autorag", "response_generation", "end_to_end"]
+task_names = ["external_rag", "response_generation", "end_to_end"]
 
 
 def get_scores_prefix(metric_catalog_name, dim_name):
@@ -31,7 +31,7 @@ def add_scores_prefix_to_target(target, metric_catalog_name, dim_name):
 
 
 def get_preprocess_steps(task):
-    if task == "autorag":
+    if task == "external_rag":
         return [
             Copy(
                 field_to_field={
@@ -61,7 +61,7 @@ def get_preprocess_steps(task):
 
 
 def get_test_pipeline_task_preprocess_steps(task):
-    if task == "autorag":
+    if task == "external_rag":
         return [
             Rename(field_to_field={"task_data/contexts": "contexts"}),
             Rename(field_to_field={"task_data/answer": "answer"}),
@@ -105,7 +105,7 @@ def get_test_pipeline_task_preprocess_steps(task):
             metric, f"{base}.{task}.{dimension}.{new_catalog_name}", overwrite=True
         )
 
-        if new_catalog_name == default and task == "autorag":
+        if new_catalog_name == default and task == "external_rag":
             metric = MetricPipeline(
                 main_score=main_score,
                 preprocess_steps=get_preprocess_steps(task),

diff --git a/prepare/metrics/rag_metrics_deprecated.py b/prepare/metrics/rag_metrics_deprecated.py
@@ -4,7 +4,7 @@
 from unitxt.operators import Copy, ListFieldValues
 
 base = "metrics.rag"
-new_base = "metrics.rag.autorag"
+new_base = "metrics.rag.external_rag"
 
 
 def add_metric_pipeline_to_catalog(
@@ -187,7 +187,7 @@ def get_replacing_metric(depr_metric):
     postprocess_steps=[
         Copy(field="score/instance/reference_scores", to_field="score/instance/score")
     ],
-    __deprecated_msg__="This metric is deprecated. Use metrics.rag.autorag.context_relevance instead.",
+    __deprecated_msg__="This metric is deprecated. Use metrics.rag.external_rag.context_relevance instead.",
 )
 add_to_catalog(context_perplexity, "metrics.rag.context_perplexity", overwrite=True)
 

diff --git a/prepare/metrics/rag_recommended_metric_lists.py b/prepare/metrics/rag_recommended_metric_lists.py
@@ -63,7 +63,7 @@ def get_recommended_metrics(resources_string, rag_unitxt_task):
 
 def register_recommended_metric_lists():
     for resource_str in recommended_metrics.keys():
-        for rag_unitxt_task in ["response_generation", "end_to_end", "autorag"]:
+        for rag_unitxt_task in ["response_generation", "end_to_end", "external_rag"]:
             metrics = MetricsList(
                 get_recommended_metrics(resource_str, rag_unitxt_task)
             )

diff --git a/src/unitxt/catalog/metrics/rag/answer_correctness.json b/src/unitxt/catalog/metrics/rag/answer_correctness.json
@@ -19,5 +19,5 @@
         }
     ],
     "metric": "metrics.token_overlap",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_correctness"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_correctness"
 }
diff --git a/src/unitxt/catalog/metrics/rag/answer_correctness/bert_score_recall.json b/src/unitxt/catalog/metrics/rag/answer_correctness/bert_score_recall.json
@@ -19,5 +19,5 @@
         }
     ],
     "metric": "metrics.bert_score.deberta_large_mnli",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_correctness.bert_score_recall"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_correctness.bert_score_recall"
 }
diff --git a/src/unitxt/catalog/metrics/rag/answer_correctness/bert_score_recall_ml.json b/src/unitxt/catalog/metrics/rag/answer_correctness/bert_score_recall_ml.json
@@ -19,5 +19,5 @@
         }
     ],
     "metric": "metrics.bert_score.deberta_v3_base_mnli_xnli_ml",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_correctness.bert_score_recall_ml"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_correctness.bert_score_recall_ml"
 }
diff --git a/src/unitxt/catalog/metrics/rag/answer_correctness/sentence_bert_bge.json b/src/unitxt/catalog/metrics/rag/answer_correctness/sentence_bert_bge.json
@@ -19,5 +19,5 @@
         }
     ],
     "metric": "metrics.sentence_bert.bge_large_en_1_5",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_correctness.sentence_bert_bge"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_correctness.sentence_bert_bge"
 }
diff --git a/src/unitxt/catalog/metrics/rag/answer_correctness/sentence_bert_mini_lm.json b/src/unitxt/catalog/metrics/rag/answer_correctness/sentence_bert_mini_lm.json
@@ -19,5 +19,5 @@
         }
     ],
     "metric": "metrics.sentence_bert.minilm_l12_v2",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_correctness.sentence_bert_mini_lm"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_correctness.sentence_bert_mini_lm"
 }
diff --git a/src/unitxt/catalog/metrics/rag/answer_correctness/token_recall.json b/src/unitxt/catalog/metrics/rag/answer_correctness/token_recall.json
@@ -19,5 +19,5 @@
         }
     ],
     "metric": "metrics.token_overlap",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_correctness.token_recall"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_correctness.token_recall"
 }
diff --git a/src/unitxt/catalog/metrics/rag/answer_relevance/token_recall.json b/src/unitxt/catalog/metrics/rag/answer_relevance/token_recall.json
@@ -26,5 +26,5 @@
         }
     ],
     "metric": "metrics.token_overlap",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_relevance.token_recall"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_relevance.token_recall"
 }
diff --git a/src/unitxt/catalog/metrics/rag/answer_reward.json b/src/unitxt/catalog/metrics/rag/answer_reward.json
@@ -26,5 +26,5 @@
         }
     ],
     "metric": "metrics.reward.deberta_v3_large_v2",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_reward"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_reward"
 }
diff --git a/src/unitxt/catalog/metrics/rag/autorag/recommended/cpu_only/all.json b/src/unitxt/catalog/metrics/rag/autorag/recommended/cpu_only/all.json
diff --git a/src/unitxt/catalog/metrics/rag/autorag/recommended/llmaj_azure/all.json b/src/unitxt/catalog/metrics/rag/autorag/recommended/llmaj_azure/all.json
diff --git a/src/unitxt/catalog/metrics/rag/autorag/recommended/llmaj_rits/all.json b/src/unitxt/catalog/metrics/rag/autorag/recommended/llmaj_rits/all.json
diff --git a/src/unitxt/catalog/metrics/rag/autorag/recommended/llmaj_watsonx/all.json b/src/unitxt/catalog/metrics/rag/autorag/recommended/llmaj_watsonx/all.json
diff --git a/src/unitxt/catalog/metrics/rag/autorag/recommended/small_llm/all.json b/src/unitxt/catalog/metrics/rag/autorag/recommended/small_llm/all.json
diff --git a/src/unitxt/catalog/metrics/rag/context_correctness.json b/src/unitxt/catalog/metrics/rag/context_correctness.json
@@ -15,5 +15,5 @@
         }
     ],
     "metric": "metrics.mrr",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_correctness"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_correctness"
 }
diff --git a/src/unitxt/catalog/metrics/rag/context_correctness/map.json b/src/unitxt/catalog/metrics/rag/context_correctness/map.json
@@ -15,5 +15,5 @@
         }
     ],
     "metric": "metrics.map",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_correctness.map"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_correctness.map"
 }
diff --git a/src/unitxt/catalog/metrics/rag/context_correctness/mrr.json b/src/unitxt/catalog/metrics/rag/context_correctness/mrr.json
@@ -15,5 +15,5 @@
         }
     ],
     "metric": "metrics.mrr",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_correctness.mrr"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_correctness.mrr"
 }
diff --git a/src/unitxt/catalog/metrics/rag/context_correctness/retrieval_at_k.json b/src/unitxt/catalog/metrics/rag/context_correctness/retrieval_at_k.json
@@ -15,5 +15,5 @@
         }
     ],
     "metric": "metrics.retrieval_at_k",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_correctness.retrieval_at_k"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_correctness.retrieval_at_k"
 }
diff --git a/src/unitxt/catalog/metrics/rag/context_perplexity.json b/src/unitxt/catalog/metrics/rag/context_perplexity.json
@@ -26,5 +26,5 @@
             "to_field": "score/instance/score"
         }
     ],
-    "__deprecated_msg__": "This metric is deprecated. Use metrics.rag.autorag.context_relevance instead."
+    "__deprecated_msg__": "This metric is deprecated. Use metrics.rag.external_rag.context_relevance instead."
 }
diff --git a/src/unitxt/catalog/metrics/rag/context_relevance.json b/src/unitxt/catalog/metrics/rag/context_relevance.json
@@ -19,5 +19,5 @@
         }
     ],
     "metric": "metrics.perplexity_q.flan_t5_small",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_relevance"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_relevance"
 }
diff --git a/src/unitxt/catalog/metrics/rag/context_relevance/perplexity_flan_t5_small.json b/src/unitxt/catalog/metrics/rag/context_relevance/perplexity_flan_t5_small.json
@@ -19,5 +19,5 @@
         }
     ],
     "metric": "metrics.perplexity_q.flan_t5_small",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_relevance.perplexity_flan_t5_small"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_relevance.perplexity_flan_t5_small"
 }
diff --git a/src/unitxt/catalog/metrics/rag/context_relevance/sentence_bert_bge.json b/src/unitxt/catalog/metrics/rag/context_relevance/sentence_bert_bge.json
@@ -19,5 +19,5 @@
         }
     ],
     "metric": "metrics.sentence_bert.bge_large_en_1_5",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_relevance.sentence_bert_bge"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_relevance.sentence_bert_bge"
 }
diff --git a/src/unitxt/catalog/metrics/rag/context_relevance/sentence_bert_mini_lm.json b/src/unitxt/catalog/metrics/rag/context_relevance/sentence_bert_mini_lm.json
@@ -19,5 +19,5 @@
         }
     ],
     "metric": "metrics.sentence_bert.minilm_l12_v2",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_relevance.sentence_bert_mini_lm"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_relevance.sentence_bert_mini_lm"
 }
diff --git a/src/unitxt/catalog/metrics/rag/context_relevance/token_precision.json b/src/unitxt/catalog/metrics/rag/context_relevance/token_precision.json
@@ -19,5 +19,5 @@
         }
     ],
     "metric": "metrics.token_overlap",
-    "__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_relevance.token_precision"
+    "__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_relevance.token_precision"
 }
diff --git a/...trics/rag/autorag/answer_correctness.json → .../rag/external_rag/answer_correctness.json b/...trics/rag/autorag/answer_correctness.json → .../rag/external_rag/answer_correctness.json
diff --git a/...answer_correctness/bert_score_recall.json → ...answer_correctness/bert_score_recall.json b/...answer_correctness/bert_score_recall.json → ...answer_correctness/bert_score_recall.json
diff --git a/...wer_correctness/bert_score_recall_ml.json → ...wer_correctness/bert_score_recall_ml.json b/...wer_correctness/bert_score_recall_ml.json → ...wer_correctness/bert_score_recall_ml.json
diff --git a/...tness/generic_inference_engine_judge.json → ...tness/generic_inference_engine_judge.json b/...tness/generic_inference_engine_judge.json → ...tness/generic_inference_engine_judge.json
diff --git a/...nswer_correctness/gpt_4o_azure_judge.json → ...nswer_correctness/gpt_4o_azure_judge.json b/...nswer_correctness/gpt_4o_azure_judge.json → ...nswer_correctness/gpt_4o_azure_judge.json
diff --git a/...ss/llama_3_3_70b_instruct_rits_judge.json → ...ss/llama_3_3_70b_instruct_rits_judge.json b/...ss/llama_3_3_70b_instruct_rits_judge.json → ...ss/llama_3_3_70b_instruct_rits_judge.json
diff --git a/...llama_3_3_70b_instruct_watsonx_judge.json → ...llama_3_3_70b_instruct_watsonx_judge.json b/...llama_3_3_70b_instruct_watsonx_judge.json → ...llama_3_3_70b_instruct_watsonx_judge.json
diff --git a/...answer_correctness/sentence_bert_bge.json → ...answer_correctness/sentence_bert_bge.json b/...answer_correctness/sentence_bert_bge.json → ...answer_correctness/sentence_bert_bge.json
diff --git a/...er_correctness/sentence_bert_mini_lm.json → ...er_correctness/sentence_bert_mini_lm.json b/...er_correctness/sentence_bert_mini_lm.json → ...er_correctness/sentence_bert_mini_lm.json
diff --git a/...orag/answer_correctness/token_recall.json → ..._rag/answer_correctness/token_recall.json b/...orag/answer_correctness/token_recall.json → ..._rag/answer_correctness/token_recall.json
diff --git a/...torag/answer_relevance/answer_reward.json → ...l_rag/answer_relevance/answer_reward.json b/...torag/answer_relevance/answer_reward.json → ...l_rag/answer_relevance/answer_reward.json
diff --git a/...vance/generic_inference_engine_judge.json → ...vance/generic_inference_engine_judge.json b/...vance/generic_inference_engine_judge.json → ...vance/generic_inference_engine_judge.json
diff --git a/.../answer_relevance/gpt_4o_azure_judge.json → .../answer_relevance/gpt_4o_azure_judge.json b/.../answer_relevance/gpt_4o_azure_judge.json → .../answer_relevance/gpt_4o_azure_judge.json
diff --git a/...ce/llama_3_3_70b_instruct_rits_judge.json → ...ce/llama_3_3_70b_instruct_rits_judge.json b/...ce/llama_3_3_70b_instruct_rits_judge.json → ...ce/llama_3_3_70b_instruct_rits_judge.json
diff --git a/...llama_3_3_70b_instruct_watsonx_judge.json → ...llama_3_3_70b_instruct_watsonx_judge.json b/...llama_3_3_70b_instruct_watsonx_judge.json → ...llama_3_3_70b_instruct_watsonx_judge.json
diff --git a/...utorag/answer_relevance/token_recall.json → ...al_rag/answer_relevance/token_recall.json b/...utorag/answer_relevance/token_recall.json → ...al_rag/answer_relevance/token_recall.json
diff --git a/...og/metrics/rag/autorag/answer_reward.json → ...trics/rag/external_rag/answer_reward.json b/...og/metrics/rag/autorag/answer_reward.json → ...trics/rag/external_rag/answer_reward.json
diff --git a/...rics/rag/autorag/context_correctness.json → ...rag/external_rag/context_correctness.json b/...rics/rag/autorag/context_correctness.json → ...rag/external_rag/context_correctness.json
diff --git a/.../rag/autorag/context_correctness/map.json → ...external_rag/context_correctness/map.json b/.../rag/autorag/context_correctness/map.json → ...external_rag/context_correctness/map.json
diff --git a/.../rag/autorag/context_correctness/mrr.json → ...external_rag/context_correctness/mrr.json b/.../rag/autorag/context_correctness/mrr.json → ...external_rag/context_correctness/mrr.json
diff --git a/...g/context_correctness/retrieval_at_k.json → ...g/context_correctness/retrieval_at_k.json b/...g/context_correctness/retrieval_at_k.json → ...g/context_correctness/retrieval_at_k.json
diff --git a/...etrics/rag/autorag/context_relevance.json → ...s/rag/external_rag/context_relevance.json b/...etrics/rag/autorag/context_relevance.json → ...s/rag/external_rag/context_relevance.json
diff --git a/...vance/generic_inference_engine_judge.json → ...vance/generic_inference_engine_judge.json b/...vance/generic_inference_engine_judge.json → ...vance/generic_inference_engine_judge.json
diff --git a/...context_relevance/gpt_4o_azure_judge.json → ...context_relevance/gpt_4o_azure_judge.json b/...context_relevance/gpt_4o_azure_judge.json → ...context_relevance/gpt_4o_azure_judge.json
diff --git a/...ce/llama_3_3_70b_instruct_rits_judge.json → ...ce/llama_3_3_70b_instruct_rits_judge.json b/...ce/llama_3_3_70b_instruct_rits_judge.json → ...ce/llama_3_3_70b_instruct_rits_judge.json
diff --git a/...llama_3_3_70b_instruct_watsonx_judge.json → ...llama_3_3_70b_instruct_watsonx_judge.json b/...llama_3_3_70b_instruct_watsonx_judge.json → ...llama_3_3_70b_instruct_watsonx_judge.json
diff --git a/...t_relevance/perplexity_flan_t5_small.json → ...t_relevance/perplexity_flan_t5_small.json b/...t_relevance/perplexity_flan_t5_small.json → ...t_relevance/perplexity_flan_t5_small.json
diff --git a/.../context_relevance/sentence_bert_bge.json → .../context_relevance/sentence_bert_bge.json b/.../context_relevance/sentence_bert_bge.json → .../context_relevance/sentence_bert_bge.json
diff --git a/...text_relevance/sentence_bert_mini_lm.json → ...text_relevance/sentence_bert_mini_lm.json b/...text_relevance/sentence_bert_mini_lm.json → ...text_relevance/sentence_bert_mini_lm.json
diff --git a/...ag/context_relevance/token_precision.json → ...ag/context_relevance/token_precision.json b/...ag/context_relevance/token_precision.json → ...ag/context_relevance/token_precision.json
diff --git a/...log/metrics/rag/autorag/faithfulness.json → ...etrics/rag/external_rag/faithfulness.json b/...log/metrics/rag/autorag/faithfulness.json → ...etrics/rag/external_rag/faithfulness.json
diff --git a/.../faithfulness/bert_score_k_precision.json → .../faithfulness/bert_score_k_precision.json b/.../faithfulness/bert_score_k_precision.json → .../faithfulness/bert_score_k_precision.json
diff --git a/...ithfulness/bert_score_k_precision_ml.json → ...ithfulness/bert_score_k_precision_ml.json b/...ithfulness/bert_score_k_precision_ml.json → ...ithfulness/bert_score_k_precision_ml.json
diff --git a/...lness/generic_inference_engine_judge.json → ...lness/generic_inference_engine_judge.json b/...lness/generic_inference_engine_judge.json → ...lness/generic_inference_engine_judge.json
diff --git a/...orag/faithfulness/gpt_4o_azure_judge.json → ..._rag/faithfulness/gpt_4o_azure_judge.json b/...orag/faithfulness/gpt_4o_azure_judge.json → ..._rag/faithfulness/gpt_4o_azure_judge.json
diff --git a/...ss/llama_3_3_70b_instruct_rits_judge.json → ...ss/llama_3_3_70b_instruct_rits_judge.json b/...ss/llama_3_3_70b_instruct_rits_judge.json → ...ss/llama_3_3_70b_instruct_rits_judge.json
diff --git a/...llama_3_3_70b_instruct_watsonx_judge.json → ...llama_3_3_70b_instruct_watsonx_judge.json b/...llama_3_3_70b_instruct_watsonx_judge.json → ...llama_3_3_70b_instruct_watsonx_judge.json
diff --git a/...torag/faithfulness/sentence_bert_bge.json → ...l_rag/faithfulness/sentence_bert_bge.json b/...torag/faithfulness/sentence_bert_bge.json → ...l_rag/faithfulness/sentence_bert_bge.json
diff --git a/...g/faithfulness/sentence_bert_mini_lm.json → ...g/faithfulness/sentence_bert_mini_lm.json b/...g/faithfulness/sentence_bert_mini_lm.json → ...g/faithfulness/sentence_bert_mini_lm.json
diff --git a/...torag/faithfulness/token_k_precision.json → ...l_rag/faithfulness/token_k_precision.json b/...torag/faithfulness/token_k_precision.json → ...l_rag/faithfulness/token_k_precision.json
diff --git a/...utorag/faithfulness/vectara_hhem_2_1.json → ...al_rag/faithfulness/vectara_hhem_2_1.json b/...utorag/faithfulness/vectara_hhem_2_1.json → ...al_rag/faithfulness/vectara_hhem_2_1.json
diff --git a/src/unitxt/catalog/metrics/rag/external_rag/recommended/cpu_only/all.json b/src/unitxt/catalog/metrics/rag/external_rag/recommended/cpu_only/all.json
@@ -0,0 +1,10 @@
+{
+    "__type__": "metrics_list",
+    "items": [
+        "metrics.rag.external_rag.answer_correctness.token_recall",
+        "metrics.rag.external_rag.faithfulness.token_k_precision",
+        "metrics.rag.external_rag.answer_relevance.token_recall",
+        "metrics.rag.external_rag.context_relevance.token_precision",
+        "metrics.rag.external_rag.context_correctness.mrr"
+    ]
+}