Skip to content

Commit

Permalink
autorag -> external_Rag
Browse files Browse the repository at this point in the history
Signed-off-by: lilacheden <[email protected]>
  • Loading branch information
lilacheden committed Jan 19, 2025
1 parent 3924705 commit 7b3fae7
Show file tree
Hide file tree
Showing 84 changed files with 98 additions and 98 deletions.
4 changes: 2 additions & 2 deletions prepare/metrics/llm_as_judge/rag_judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def get_prediction_field(metric_type):
realization_sufffix = metric_type_to_realization[metric_type]
template = f"templates.rag_eval.{metric_type}.{template_name}{realization_sufffix}"
for inf_label, inference_model in inference_models_v2.items():
for rag_unitxt_task in ["autorag", "response_generation", "end_to_end"]:
for rag_unitxt_task in ["external_rag", "response_generation", "end_to_end"]:
if (
rag_unitxt_task == "response_generation"
and metric_type == "context_relevance"
Expand All @@ -119,7 +119,7 @@ def get_prediction_field(metric_type):

judge_to_generator_fields_mapping = (
{}
if rag_unitxt_task == "autorag"
if rag_unitxt_task == "external_rag"
else {"ground_truths": "reference_answers"}
)

Expand Down
8 changes: 4 additions & 4 deletions prepare/metrics/rag_answer_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from unitxt.operators import Copy, Rename
from unitxt.test_utils.metrics import test_metric

task_names = ["autorag", "response_generation", "end_to_end"]
task_names = ["external_rag", "response_generation", "end_to_end"]
base = "metrics.rag"
default = "token_recall"
dimension = "answer_correctness"
Expand All @@ -29,7 +29,7 @@ def add_scores_prefix_to_target(target, metric_catalog_name, dim_name):


def get_test_pipeline_task_preprocess_steps(task):
if task == "autorag":
if task == "external_rag":
return [
Rename(field_to_field={"task_data/ground_truths": "ground_truths"}),
Rename(field_to_field={"task_data/answer": "answer"}),
Expand Down Expand Up @@ -83,7 +83,7 @@ def test_answer_correctness(


def get_preprocess_steps(task):
if task == "autorag":
if task == "external_rag":
return [
Copy(
field_to_field={
Expand Down Expand Up @@ -137,7 +137,7 @@ def get_preprocess_steps(task):
overwrite=True,
)

if new_catalog_name == default and task == "autorag":
if new_catalog_name == default and task == "external_rag":
metric = MetricPipeline(
main_score=main_score,
preprocess_steps=preprocess_steps.copy(),
Expand Down
6 changes: 3 additions & 3 deletions prepare/metrics/rag_answer_relevance.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
)
from unitxt.operators import Copy, ListFieldValues

task_names = ["autorag", "response_generation", "end_to_end"]
task_names = ["external_rag", "response_generation", "end_to_end"]
base = "metrics.rag"


Expand All @@ -13,7 +13,7 @@ def get_preprocess_steps(task):
# We have to wrap the question by a list (otherwise it will be a string),
# because references are expected to be lists
last_step = ListFieldValues(fields=["references"], to_field="references")
if task == "autorag":
if task == "external_rag":
return [
Copy(
field_to_field={
Expand Down Expand Up @@ -55,7 +55,7 @@ def get_preprocess_steps(task):
add_to_catalog(
answer_reward, f"{base}.{task}.answer_relevance.answer_reward", overwrite=True
)
if task == "autorag":
if task == "external_rag":
add_to_catalog(answer_reward, f"{base}.{task}.answer_reward", overwrite=True)

answer_token_overlap = MetricPipeline(
Expand Down
8 changes: 4 additions & 4 deletions prepare/metrics/rag_context_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

default = "mrr"
base = "metrics.rag"
tasks = ["autorag", "end_to_end"]
tasks = ["external_rag", "end_to_end"]
dimension = "context_correctness"


Expand All @@ -28,7 +28,7 @@ def add_scores_prefix_to_target(target, metric_catalog_name, dim_name):


def get_preprocess_steps(task):
if task == "autorag":
if task == "external_rag":
return [
Copy(field="context_ids", to_field="prediction"),
Wrap(
Expand All @@ -48,7 +48,7 @@ def get_preprocess_steps(task):


def get_test_pipeline_task_preprocess_steps(task):
if task == "autorag":
if task == "external_rag":
return [
Rename(field_to_field={"task_data/context_ids": "context_ids"}),
Rename(
Expand Down Expand Up @@ -85,7 +85,7 @@ def get_test_pipeline_task_preprocess_steps(task):
metric, f"{base}.{task}.{dimension}.{new_catalog_name}", overwrite=True
)

if new_catalog_name == default and task == "autorag":
if new_catalog_name == default and task == "external_rag":
add_to_catalog(metric, f"{base}.{task}.{dimension}", overwrite=True)


Expand Down
6 changes: 3 additions & 3 deletions prepare/metrics/rag_context_relevance.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
from unitxt.operators import Copy

base = "metrics.rag"
tasks = ["autorag", "end_to_end"]
tasks = ["external_rag", "end_to_end"]
default = "perplexity_flan_t5_small"
dimension = "context_relevance"


def get_preprocess_steps(task):
if task == "autorag":
if task == "external_rag":
return [
Copy(field="contexts", to_field="references"),
Copy(field="question", to_field="prediction"),
Expand Down Expand Up @@ -45,7 +45,7 @@ def get_preprocess_steps(task):
metric, f"{base}.{task}.{dimension}.{new_catalog_name}", overwrite=True
)

if new_catalog_name == default and task == "autorag":
if new_catalog_name == default and task == "external_rag":
metric = MetricPipeline(
main_score=main_score,
preprocess_steps=get_preprocess_steps(task).copy(),
Expand Down
8 changes: 4 additions & 4 deletions prepare/metrics/rag_faithfulness.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
base = "metrics.rag"
default = "token_k_precision"
dimension = "faithfulness"
task_names = ["autorag", "response_generation", "end_to_end"]
task_names = ["external_rag", "response_generation", "end_to_end"]


def get_scores_prefix(metric_catalog_name, dim_name):
Expand All @@ -31,7 +31,7 @@ def add_scores_prefix_to_target(target, metric_catalog_name, dim_name):


def get_preprocess_steps(task):
if task == "autorag":
if task == "external_rag":
return [
Copy(
field_to_field={
Expand Down Expand Up @@ -61,7 +61,7 @@ def get_preprocess_steps(task):


def get_test_pipeline_task_preprocess_steps(task):
if task == "autorag":
if task == "external_rag":
return [
Rename(field_to_field={"task_data/contexts": "contexts"}),
Rename(field_to_field={"task_data/answer": "answer"}),
Expand Down Expand Up @@ -105,7 +105,7 @@ def get_test_pipeline_task_preprocess_steps(task):
metric, f"{base}.{task}.{dimension}.{new_catalog_name}", overwrite=True
)

if new_catalog_name == default and task == "autorag":
if new_catalog_name == default and task == "external_rag":
metric = MetricPipeline(
main_score=main_score,
preprocess_steps=get_preprocess_steps(task),
Expand Down
4 changes: 2 additions & 2 deletions prepare/metrics/rag_metrics_deprecated.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from unitxt.operators import Copy, ListFieldValues

base = "metrics.rag"
new_base = "metrics.rag.autorag"
new_base = "metrics.rag.external_rag"


def add_metric_pipeline_to_catalog(
Expand Down Expand Up @@ -187,7 +187,7 @@ def get_replacing_metric(depr_metric):
postprocess_steps=[
Copy(field="score/instance/reference_scores", to_field="score/instance/score")
],
__deprecated_msg__="This metric is deprecated. Use metrics.rag.autorag.context_relevance instead.",
__deprecated_msg__="This metric is deprecated. Use metrics.rag.external_rag.context_relevance instead.",
)
add_to_catalog(context_perplexity, "metrics.rag.context_perplexity", overwrite=True)

Expand Down
2 changes: 1 addition & 1 deletion prepare/metrics/rag_recommended_metric_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def get_recommended_metrics(resources_string, rag_unitxt_task):

def register_recommended_metric_lists():
for resource_str in recommended_metrics.keys():
for rag_unitxt_task in ["response_generation", "end_to_end", "autorag"]:
for rag_unitxt_task in ["response_generation", "end_to_end", "external_rag"]:
metrics = MetricsList(
get_recommended_metrics(resource_str, rag_unitxt_task)
)
Expand Down
2 changes: 1 addition & 1 deletion src/unitxt/catalog/metrics/rag/answer_correctness.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@
}
],
"metric": "metrics.token_overlap",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_correctness"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_correctness"
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@
}
],
"metric": "metrics.bert_score.deberta_large_mnli",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_correctness.bert_score_recall"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_correctness.bert_score_recall"
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@
}
],
"metric": "metrics.bert_score.deberta_v3_base_mnli_xnli_ml",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_correctness.bert_score_recall_ml"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_correctness.bert_score_recall_ml"
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@
}
],
"metric": "metrics.sentence_bert.bge_large_en_1_5",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_correctness.sentence_bert_bge"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_correctness.sentence_bert_bge"
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@
}
],
"metric": "metrics.sentence_bert.minilm_l12_v2",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_correctness.sentence_bert_mini_lm"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_correctness.sentence_bert_mini_lm"
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@
}
],
"metric": "metrics.token_overlap",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_correctness.token_recall"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_correctness.token_recall"
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@
}
],
"metric": "metrics.token_overlap",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_relevance.token_recall"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_relevance.token_recall"
}
2 changes: 1 addition & 1 deletion src/unitxt/catalog/metrics/rag/answer_reward.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@
}
],
"metric": "metrics.reward.deberta_v3_large_v2",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.answer_reward"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.answer_reward"
}

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

2 changes: 1 addition & 1 deletion src/unitxt/catalog/metrics/rag/context_correctness.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@
}
],
"metric": "metrics.mrr",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_correctness"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_correctness"
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@
}
],
"metric": "metrics.map",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_correctness.map"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_correctness.map"
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@
}
],
"metric": "metrics.mrr",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_correctness.mrr"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_correctness.mrr"
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@
}
],
"metric": "metrics.retrieval_at_k",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_correctness.retrieval_at_k"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_correctness.retrieval_at_k"
}
2 changes: 1 addition & 1 deletion src/unitxt/catalog/metrics/rag/context_perplexity.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@
"to_field": "score/instance/score"
}
],
"__deprecated_msg__": "This metric is deprecated. Use metrics.rag.autorag.context_relevance instead."
"__deprecated_msg__": "This metric is deprecated. Use metrics.rag.external_rag.context_relevance instead."
}
2 changes: 1 addition & 1 deletion src/unitxt/catalog/metrics/rag/context_relevance.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@
}
],
"metric": "metrics.perplexity_q.flan_t5_small",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_relevance"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_relevance"
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@
}
],
"metric": "metrics.perplexity_q.flan_t5_small",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_relevance.perplexity_flan_t5_small"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_relevance.perplexity_flan_t5_small"
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@
}
],
"metric": "metrics.sentence_bert.bge_large_en_1_5",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_relevance.sentence_bert_bge"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_relevance.sentence_bert_bge"
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@
}
],
"metric": "metrics.sentence_bert.minilm_l12_v2",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_relevance.sentence_bert_mini_lm"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_relevance.sentence_bert_mini_lm"
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@
}
],
"metric": "metrics.token_overlap",
"__deprecated_msg__": "This metric should be replaced with metrics.rag.autorag.context_relevance.token_precision"
"__deprecated_msg__": "This metric should be replaced with metrics.rag.external_rag.context_relevance.token_precision"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"__type__": "metrics_list",
"items": [
"metrics.rag.external_rag.answer_correctness.token_recall",
"metrics.rag.external_rag.faithfulness.token_k_precision",
"metrics.rag.external_rag.answer_relevance.token_recall",
"metrics.rag.external_rag.context_relevance.token_precision",
"metrics.rag.external_rag.context_correctness.mrr"
]
}
Loading

0 comments on commit 7b3fae7

Please sign in to comment.