diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_GPT4_FewShot.py
new file mode 100644
index 00000000..ab41d36f
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_GPT4_FewShot.py
@@ -0,0 +1,88 @@
+import re
+
+from llmebench.datasets import ArProBinaryDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import ArProTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
+        "scores": {"Micro-F1": "0.592"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProBinaryDataset,
+        "task": ArProTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample, examples):
+    prompt_text = (
+        f"Your task is to analyze the text and determine if it contains elements of propaganda.\n\n"
+        f"Below you will find a few examples that can help you to understand:\n\n"
+    )
+
+    fs_prompt = few_shot_prompt(input_sample, prompt_text, examples)
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator.",
+        },
+        {
+            "role": "user",
+            "content": fs_prompt,
+        },
+    ]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt
+    for index, example in enumerate(examples):
+        sent = example["input"]
+        label = example["label"]
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "text: "
+            + sent
+            + "\nlabel: "
+            + label
+            + "\n\n"
+        )
+
+    out_prompt = (
+        out_prompt
+        + "Based on the instructions and examples above analyze the following 'text' and predict whether it contains the use of any propaganda technique. Answer only by true or false. Return only predicted label.\n\n"
+    )
+    out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def post_process(response):
+    input_label = response["choices"][0]["message"]["content"]
+    pred_label = input_label.replace(".", "").strip().lower()
+
+    pred_label = pred_label.replace("label:", "").strip()
+
+    if "true" == pred_label:
+        pred_label = "true"
+
+    elif "false" == pred_label:
+        pred_label = "false"
+    else:
+        pred_label = None
+
+    return pred_label
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_GPT4_ZeroShot.py
new file mode 100644
index 00000000..459055b5
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProBinary_GPT4_ZeroShot.py
@@ -0,0 +1,86 @@
+import re
+
+from llmebench.datasets import ArProBinaryDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import ArProTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
+        "scores": {"Micro-F1": "0.526"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProBinaryDataset,
+        "task": ArProTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_text = (
+        f"Your task is to analyze the text and determine if it contains elements of propaganda. Based on the instructions, analyze the following 'text' and predict whether it contains the use of any propaganda technique. Answer only by true or false. Return only predicted label.\n\n"
+        f"text: {input_sample}\n"
+        f"label: \n"
+    )
+
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert fact checker.",
+        },
+        {
+            "role": "user",
+            "content": prompt_text,
+        },
+    ]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt
+    for example in examples:
+        sent = example["input"]
+        label = example["label"]
+
+        out_prompt = (
+            out_prompt + "Sentence: " + sent + "\n" + "label: " + label + "\n\n"
+        )
+
+    # Append the sentence we want the model to predict for but leave the Label blank
+    out_prompt = out_prompt + "Sentence: " + input_sample + "\nlabel: \n"
+
+    # print("=========== FS Prompt =============\n")
+    # print(out_prompt)
+
+    return out_prompt
+
+
+def post_process(response):
+    input_label = response["choices"][0]["message"]["content"]
+    input_label = input_label.replace(".", "").strip().lower()
+
+    if (
+        "true" in input_label
+        or "label: 1" in input_label
+        or "label: yes" in input_label
+    ):
+        pred_label = "true"
+    elif (
+        "false" in input_label
+        or "label: 0" in input_label
+        or "label: no" in input_label
+    ):
+        pred_label = "false"
+    else:
+        print("label problem!! " + input_label)
+        pred_label = None
+
+    return pred_label
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_GPT4_FewShot.py
new file mode 100644
index 00000000..3319a4c3
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_GPT4_FewShot.py
@@ -0,0 +1,119 @@
+import re
+
+from llmebench.datasets import ArProCoarseDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
+        "scores": {"Micro-F1": "0.587"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProCoarseDataset,
+        "task": MultilabelPropagandaTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample, examples):
+    prompt_text = (
+        f"Your task is to analyze the text and determine if it contains elements of propaganda.\n\n"
+        f"The following coarse-grained propaganda techniques is defined based on their appearance of any of the fine-grained propaganda techniques. The left side of the equal sign indicate coarse-grained techniques and right side indicate fine-grained techniques.\n\n"
+        f"no_technique = ['no propaganda']\n"
+        f"Manipulative Wording = ['Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation, Vagueness, Confusion' , 'Repetition']\n"
+        f"Reputation = ['Appeal to Hypocrisy' , 'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation']\n"
+        f"Justification = ['Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving']\n"
+        f"Simplification = ['Causal Oversimplification' , 'Consequential Oversimplification' , 'False Dilemma/No Choice']\n"
+        f"Distraction = ['Red Herring' , 'Straw Man' , 'Whataboutism']\n"
+        f"Call = ['Appeal to Time' , 'Conversation Killer' , 'Slogans']\n"
+        f"Below you will find a few examples of text with coarse-grained propaganda techniques:\n\n"
+    )
+
+    fs_prompt = few_shot_prompt(input_sample, prompt_text, examples)
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator.",
+        },
+        {
+            "role": "user",
+            "content": fs_prompt,
+        },
+    ]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt
+    for index, example in enumerate(examples):
+        sent = example["input"]
+        tech_str = ""
+        for t in example["label"]:
+            tech_str += "'" + t + "', "
+
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "text: "
+            + sent
+            + "\nlabel: "
+            + tech_str
+            + "\n\n"
+        )
+
+    out_prompt = out_prompt + (
+        f"Based on the instructions and examples above analyze the following text and provide only labels as a list of string.\n\n"
+    )
+    out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]  # .lower()
+    # pred_label = eval(label)
+
+    labels = []
+
+    response = [
+        s.strip().replace("'", "").replace("[", "").replace("]", "")
+        for s in label.split(",")
+        if len(s) > 1
+    ]
+
+    # print(response)
+    for label in response:
+        label = label.lower()
+        if "manipulative" in label:
+            labels.append("Manipulative_Wording")
+        if "call" in label:
+            labels.append("Call")
+        if "reputation" in label:
+            labels.append("Reputation")
+        if "technique" in label or "propaganda" in label:
+            labels.append("no_technique")
+        if "justification" in label:
+            labels.append("Justification")
+        if "simplification" in label:
+            labels.append("Simplification")
+        if "distraction" in label:
+            labels.append("Distraction")
+
+    if len(labels) == 0:
+        labels.append("no_technique")
+    if len(labels) > 1 and "no_technique" in labels:
+        labels.remove("no_technique")
+
+    return labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_GPT4_ZeroShot.py
new file mode 100644
index 00000000..47c538fa
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProCoarse_GPT4_ZeroShot.py
@@ -0,0 +1,90 @@
+from llmebench.datasets import ArProCoarseDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
+        "scores": {"Micro-F1": "0.540"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProCoarseDataset,
+        "task": MultilabelPropagandaTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_text = (
+        f"Your task is to analyze the text and determine if it contains elements of propaganda.\n\n"
+        f"The following coarse-grained propaganda techniques are defined based on the appearance of any of the fine-grained propaganda techniques. The left side of the equal sign indicates coarse-grained techniques and right side indicates fine-grained techniques.\n\n"
+        f"no_technique = ['no propaganda']\n"
+        f"Manipulative Wording = ['Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation, Vagueness, Confusion' , 'Repetition']\n"
+        f"Reputation = ['Appeal to Hypocrisy' , 'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation']\n"
+        f"Justification = ['Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving']\n"
+        f"Simplification = ['Causal Oversimplification' , 'Consequential Oversimplification' , 'False Dilemma/No Choice']\n"
+        f"Distraction = ['Red Herring' , 'Straw Man' , 'Whataboutism']\n"
+        f"Call = ['Appeal to Time' , 'Conversation Killer' , 'Slogans']\n"
+    )
+    out_prompt = prompt_text + (
+        f"Based on the instructions above analyze the following text and provide only coarse-grained propaganda techniques as a list of strings.\n\n"
+    )
+    out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n"
+
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator.",
+        },
+        {
+            "role": "user",
+            "content": out_prompt,
+        },
+    ]
+
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]  # .lower()
+    # pred_label = eval(label)
+
+    labels = []
+
+    response = [
+        s.strip().replace("'", "").replace("[", "").replace("]", "")
+        for s in label.split(",")
+        if len(s) > 1
+    ]
+
+    # print(response)
+    for label in response:
+        label = label.lower()
+        if "manipulative" in label:
+            labels.append("Manipulative_Wording")
+        if "call" in label:
+            labels.append("Call")
+        if "reputation" in label:
+            labels.append("Reputation")
+        if "technique" in label or "propaganda" in label:
+            labels.append("no_technique")
+        if "justification" in label:
+            labels.append("Justification")
+        if "simplification" in label:
+            labels.append("Simplification")
+        if "distraction" in label:
+            labels.append("Distraction")
+
+    if len(labels) == 0:
+        labels.append("no_technique")
+    if len(labels) > 1 and "no_technique" in labels:
+        labels.remove("no_technique")
+
+    return labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_GPT4_FewShot.py
new file mode 100644
index 00000000..25a420cb
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_GPT4_FewShot.py
@@ -0,0 +1,207 @@
+import ast
+import re
+import codecs
+
+from llmebench.datasets import ArProMultilabelDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+ESCAPE_SEQUENCE_RE = re.compile(r'''
+    ( \\U........      # 8-digit hex escapes
+    | \\u....          # 4-digit hex escapes
+    | \\x..            # 2-digit hex escapes
+    | \\[0-7]{1,3}     # Octal escapes
+    | \\N\{[^}]+\}     # Unicode characters by name
+    | \\[\\'"abfnrtv]  # Single-character escapes
+    )''', re.UNICODE | re.VERBOSE)
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning.",
+        "scores": {"Micro-F1": "0.467"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProMultilabelDataset,
+        "task": MultilabelPropagandaTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 10,
+        },
+    }
+
+
+def prompt(input_sample, examples):
+    prompt_text = (
+        f"Your task is to analyze the text and determine if it contains the following propaganda techniques.\n\n"
+        f"'Appeal to Time' , 'Conversation Killer' , 'Slogans' , 'Red Herring' , 'Straw Man' , 'Whataboutism' , 'Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving' , 'Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation/Vagueness/Confusion' , 'Repetition' , 'Appeal to Hypocrisy' , 'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation' , 'Causal Oversimplification' , 'Consequential Oversimplification' , 'False Dilemma/No Choice' , 'no technique'"
+        f"Below you will find a few examples of text with propaganda techniques:\n\n"
+    )
+
+    fs_prompt = few_shot_prompt(input_sample, prompt_text, examples)
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator.",
+        },
+        {
+            "role": "user",
+            "content": fs_prompt,
+        },
+    ]
+
+
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt
+    for index, example in enumerate(examples):
+        sent = example["input"]
+        tech_str = ""
+        for t in example["label"]:
+            tech_str += "'" + t + "', "
+
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index)
+            + ":"
+            + "\n"
+            + "text: "
+            + sent
+            + "\nlabel: "
+            + tech_str
+            + "\n\n"
+        )
+
+    out_prompt = out_prompt + (
+        f"Based on the instructions and examples above analyze the following text and provide only labels as a list of string.\n\n"
+    )
+    out_prompt = out_prompt + "text: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def decode_escapes(s):
+    def decode_match(match):
+        return codecs.decode(match.group(0), 'unicode-escape')
+
+    return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
+
+
+def fix_single_label(label):
+    label_fixed = ""
+    if "slogan" in label:
+        label_fixed = "Slogans"
+    if "loaded" in label:
+        label_fixed = "Loaded_Language"
+    if "prejudice" in label or "fear" in label or "mongering" in label:
+        label_fixed = "Appeal_to_Fear-Prejudice"
+    if "terminating" in label or "thought" in label or "conversation" in label or "killer" in label:
+        label_fixed = "Conversation_Killer"
+    if "calling" in label or label == "name c" or "labeling" in label:
+        label_fixed = "Name_Calling-Labeling"
+    if "minimisation" in label or label == "exaggeration minim" or "exaggeration" in label:
+        label_fixed = "Exaggeration-Minimisation"
+    if "values" in label:
+        label_fixed = "Appeal_to_Values"
+    if "flag" in label or "wav" in label:
+        label_fixed = "Flag_Waving"
+    if "obfusc" in label or "vague" in label or "confusion" in label:
+        label_fixed = "Obfuscation-Vagueness-Confusion"
+    if "causal" in label:
+        label_fixed = "Causal_Oversimplification"
+    if "conseq" in label:
+        label_fixed = "Consequential_Oversimplification"
+    if "authority" in label:
+        label_fixed = "Appeal_to_Authority"
+    if "choice" in label or "dilemma" in label or "false" in label:
+        label_fixed = "False_Dilemma-No_Choice"
+    if "herring" in label or "irrelevant" in label:
+        label_fixed = "Red_Herring"
+    if "straw" in label or "misrepresentation" in label:
+        label_fixed = "Straw_Man"
+    if "guilt" in label or "association" in label:
+        label_fixed = "Guilt_by_Association"
+    if "questioning" in label or "reputation" in label:
+        label_fixed = "Questioning_the_Reputation"
+    if "whataboutism" in label:
+        label_fixed = "Whataboutism"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "time" in label:
+        label_fixed = "Appeal_to_Time"
+    if "popularity" in label:
+        label_fixed = "Appeal_to_Popularity"
+    if "repetition" in label:
+        label_fixed = "Repetition"
+    if "hypocrisy" in label:
+        label_fixed = "Appeal_to_Hypocrisy"
+
+    if ("no propaganda" in label or "no technique" in label
+            or label == ""
+            or label == "no"
+            or label == "appeal to history"
+            or label == "appeal to emotion"
+            or label == "appeal to"
+            or label == "appeal"
+            or label == "appeal to author"
+            or label == "emotional appeal"
+            or "no techn" in label
+            or "hashtag" in label
+            or "theory" in label
+            or "specific mention" in label
+            or "sarcasm" in label
+            or "frustration" in label
+            or "analogy" in label
+            or "metaphor" in label
+            or "religious" in label
+            or "gratitude" in label
+            or 'no_technique' in label
+            or "technique" in label):
+        label_fixed = "no_technique"
+
+    #print(label_fixed)
+
+    return label_fixed
+
+def fix_multilabel(pred_label):
+    if "used in this text" in pred_label or "no technique" in pred_label:
+        return ["no_technique"]
+
+    labels_fixed = []
+    pred_label = pred_label.replace("'label: ","").replace("'label': ","").replace("\"\"","\"").replace("\'\'","\'")
+
+
+    pred_label = decode_escapes(pred_label).replace("\'", "\"")
+    if not pred_label.startswith("["):
+        pred_label = "[" + pred_label + "]"
+    pred_label = ast.literal_eval(pred_label)
+
+    for label in pred_label:
+        label = label.strip().lower()
+        label_fixed = fix_single_label(label)
+        labels_fixed.append(label_fixed)
+
+    out_put_labels = []
+    # Remove no technique label when we have other techniques for the same text
+    if len(labels_fixed) > 1:
+        for flabel in labels_fixed:
+            if flabel != "no_technique":
+                out_put_labels.append(flabel)
+        return out_put_labels
+
+    return labels_fixed
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]  # .lower()
+    labels = fix_multilabel(label)
+
+    return labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_GPT4_ZeroShot.py
new file mode 100644
index 00000000..384a035f
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProMultilabel_GPT4_ZeroShot.py
@@ -0,0 +1,179 @@
+import ast
+import re
+import codecs
+
+from llmebench.datasets import ArProMultilabelDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import MultilabelPropagandaTask
+
+ESCAPE_SEQUENCE_RE = re.compile(r'''
+    ( \\U........      # 8-digit hex escapes
+    | \\u....          # 4-digit hex escapes
+    | \\x..            # 2-digit hex escapes
+    | \\[0-7]{1,3}     # Octal escapes
+    | \\N\{[^}]+\}     # Unicode characters by name
+    | \\[\\'"abfnrtv]  # Single-character escapes
+    )''', re.UNICODE | re.VERBOSE)
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'.",
+        "scores": {"Micro-F1": "0.372"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProMultilabelDataset,
+        "task": MultilabelPropagandaTask,
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 10,
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_text = (
+        f"Your task is to analyze the text and determine if it contains the following propaganda techniques.\n\n"
+        f"'Appeal to Time' , 'Conversation Killer' , 'Slogans' , 'Red Herring' , 'Straw Man' , 'Whataboutism' , "
+        f"'Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving' , "
+        f"'Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation/Vagueness/Confusion' , 'Repetition' , 'Appeal to Hypocrisy' , "
+        f"'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation' , 'Causal Oversimplification' , "
+        f"'Consequential Oversimplification' , 'False Dilemma/No Choice' , 'no technique'. \nProvide only labels as a list of strings.\n\n"
+    )
+    out_prompt = prompt_text + "text: " + input_sample + "\nlabel: \n"
+
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator.",
+        },
+        {
+            "role": "user",
+            "content": out_prompt,
+        },
+    ]
+
+
+def decode_escapes(s):
+    def decode_match(match):
+        return codecs.decode(match.group(0), 'unicode-escape')
+
+    return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
+
+
+def fix_single_label(label):
+    label_fixed = ""
+    if "slogan" in label:
+        label_fixed = "Slogans"
+    if "loaded" in label:
+        label_fixed = "Loaded_Language"
+    if "prejudice" in label or "fear" in label or "mongering" in label:
+        label_fixed = "Appeal_to_Fear-Prejudice"
+    if "terminating" in label or "thought" in label or "conversation" in label or "killer" in label:
+        label_fixed = "Conversation_Killer"
+    if "calling" in label or label == "name c" or "labeling" in label:
+        label_fixed = "Name_Calling-Labeling"
+    if "minimisation" in label or label == "exaggeration minim" or "exaggeration" in label:
+        label_fixed = "Exaggeration-Minimisation"
+    if "values" in label:
+        label_fixed = "Appeal_to_Values"
+    if "flag" in label or "wav" in label:
+        label_fixed = "Flag_Waving"
+    if "obfusc" in label or "vague" in label or "confusion" in label:
+        label_fixed = "Obfuscation-Vagueness-Confusion"
+    if "causal" in label:
+        label_fixed = "Causal_Oversimplification"
+    if "conseq" in label:
+        label_fixed = "Consequential_Oversimplification"
+    if "authority" in label:
+        label_fixed = "Appeal_to_Authority"
+    if "choice" in label or "dilemma" in label or "false" in label:
+        label_fixed = "False_Dilemma-No_Choice"
+    if "herring" in label or "irrelevant" in label:
+        label_fixed = "Red_Herring"
+    if "straw" in label or "misrepresentation" in label:
+        label_fixed = "Straw_Man"
+    if "guilt" in label or "association" in label:
+        label_fixed = "Guilt_by_Association"
+    if "questioning" in label or "reputation" in label:
+        label_fixed = "Questioning_the_Reputation"
+    if "whataboutism" in label:
+        label_fixed = "Whataboutism"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "time" in label:
+        label_fixed = "Appeal_to_Time"
+    if "popularity" in label:
+        label_fixed = "Appeal_to_Popularity"
+    if "repetition" in label:
+        label_fixed = "Repetition"
+    if "hypocrisy" in label:
+        label_fixed = "Appeal_to_Hypocrisy"
+
+    if ("no propaganda" in label or "no technique" in label
+            or label == ""
+            or label == "no"
+            or label == "appeal to history"
+            or label == "appeal to emotion"
+            or label == "appeal to"
+            or label == "appeal"
+            or label == "appeal to author"
+            or label == "emotional appeal"
+            or "no techn" in label
+            or "hashtag" in label
+            or "theory" in label
+            or "specific mention" in label
+            or "sarcasm" in label
+            or "frustration" in label
+            or "analogy" in label
+            or "metaphor" in label
+            or "religious" in label
+            or "gratitude" in label
+            or 'no_technique' in label
+            or "technique" in label):
+        label_fixed = "no_technique"
+
+    #print(label_fixed)
+
+    return label_fixed
+
+def fix_multilabel(pred_label):
+    if "used in this text" in pred_label or "no technique" in pred_label:
+        return ["no_technique"]
+
+    labels_fixed = []
+    pred_label = pred_label.replace("'label: ","").replace("'label': ","").replace("\"\"","\"").replace("\'\'","\'")
+
+
+    pred_label = decode_escapes(pred_label).replace("\'", "\"")
+    if not pred_label.startswith("["):
+        pred_label = "[" + pred_label + "]"
+    pred_label = ast.literal_eval(pred_label)
+
+    for label in pred_label:
+        label = label.strip().lower()
+        label_fixed = fix_single_label(label)
+        labels_fixed.append(label_fixed)
+
+    out_put_labels = []
+    # Remove no technique label when we have other techniques for the same text
+    if len(labels_fixed) > 1:
+        for flabel in labels_fixed:
+            if flabel != "no_technique":
+                out_put_labels.append(flabel)
+        return out_put_labels
+
+    return labels_fixed
+
+def post_process(response):
+    label = response["choices"][0]["message"]["content"]  # .lower()
+    labels = fix_multilabel(label)
+
+    return labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_FewShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_FewShot.py
new file mode 100644
index 00000000..d96c408c
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_FewShot.py
@@ -0,0 +1,238 @@
+import ast
+import json
+import re
+import codecs
+
+from llmebench.datasets import ArProSpanDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import ArProSpanTask
+
+ESCAPE_SEQUENCE_RE = re.compile(r'''
+    ( \\U........      # 8-digit hex escapes
+    | \\u....          # 4-digit hex escapes
+    | \\x..            # 2-digit hex escapes
+    | \\[0-7]{1,3}     # Octal escapes
+    | \\N\{[^}]+\}     # Unicode characters by name
+    | \\[\\'"abfnrtv]  # Single-character escapes
+    )''', re.UNICODE | re.VERBOSE)
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. We implemented correcting predicted span by GPT. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning",
+        "scores": {"Micro-F1": "0.267"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProSpanDataset,
+        "task": ArProSpanTask,
+        "task_args": {"correct_span": True},
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample, examples):
+    prompt_text = (
+        f"Your task is to analyze the Paragraph and determine if it contains the following propaganda techniques.\n\n"
+        f"'Appeal to Time' , 'Conversation Killer' , 'Slogans' , 'Red Herring' , 'Straw Man' , 'Whataboutism' , "
+        f"'Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving' , "
+        f"'Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation/Vagueness/Confusion' , 'Repetition' , 'Appeal to Hypocrisy' , "
+        f"'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation' , 'Causal Oversimplification' , "
+        f"'Consequential Oversimplification' , 'False Dilemma/No Choice' , 'no technique'"
+    )
+    fs_prompt = few_shot_prompt(input_sample, prompt_text, examples)
+
+
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator.",
+        },
+        {
+            "role": "user",
+            "content": fs_prompt,
+        },
+    ]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+
+    for index, example in enumerate(examples):
+        sent = example["input"]
+        ex_labels = []
+
+        for l in example["label"]:
+            #print(l)
+            l.pop('par_txt',None)
+            ex_labels.append(l)
+
+        ex_labels = str(ex_labels)
+
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index+1)
+            + ":\n"
+            + "Paragraph: "
+            + sent
+            + "\nlabel: "
+            + ex_labels
+            + "\n\n"
+        )
+
+    out_prompt = out_prompt + (
+        f"Based on the instructions and examples above analyze the following Paragraph and answer exactly and only by returning a list of the matching labels from the aforementioned techniques and specify the start position and end position of the text span matching each technique."
+        f'Use the following template and return the results as a list of json strings  [{{"technique": ,"text": ,"start": ,"end": }}]\n\n'
+    )
+    out_prompt = out_prompt + "Paragraph: " + input_sample + "\nlabel: \n"
+
+    return out_prompt
+
+
+def decode_escapes(s):
+    def decode_match(match):
+        return codecs.decode(match.group(0), 'unicode-escape')
+
+    return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
+
+
+def fix_single_label(label):
+    if "slogan" in label:
+        label_fixed = "Slogans"
+    if "loaded" in label:
+        label_fixed = "Loaded_Language"
+    if "prejudice" in label or "fear" in label or "mongering" in label:
+        label_fixed = "Appeal_to_Fear-Prejudice"
+    if (
+            "terminating" in label
+            or "thought" in label
+            or "conversation" in label
+            or "killer" in label
+    ):
+        label_fixed = "Conversation_Killer"
+    if "calling" in label or label == "name c" or "labeling" in label:
+        label_fixed = "Name_Calling-Labeling"
+    if (
+            "minimisation" in label
+            or label == "exaggeration minim"
+            or "exaggeration" in label
+    ):
+        label_fixed = "Exaggeration-Minimisation"
+    if "values" in label:
+        label_fixed = "Appeal_to_Values"
+    if "flag" in label or "wav" in label:
+        label_fixed = "Flag_Waving"
+    if "obfusc" in label or "vague" in label or "confusion" in label:
+        label_fixed = "Obfuscation-Vagueness-Confusion"
+    if "causal" in label:
+        label_fixed = "Causal_Oversimplification"
+    if "conseq" in label:
+        label_fixed = "Consequential_Oversimplification"
+    if "authority" in label:
+        label_fixed = "Appeal_to_Authority"
+    if "choice" in label or "dilemma" in label or "false" in label:
+        label_fixed = "False_Dilemma-No_Choice"
+    if "herring" in label or "irrelevant" in label:
+        label_fixed = "Red_Herring"
+    if "straw" in label or "misrepresentation" in label:
+        label_fixed = "Straw_Man"
+    if "guilt" in label or "association" in label:
+        label_fixed = "Guilt_by_Association"
+    if "questioning" in label or "reputation" in label:
+        label_fixed = "Questioning_the_Reputation"
+    if "whataboutism" in label:
+        label_fixed = "Whataboutism"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "time" in label:
+        label_fixed = "Appeal_to_Time"
+    if "popularity" in label:
+        label_fixed = "Appeal_to_Popularity"
+    if "repetition" in label:
+        label_fixed = "Repetition"
+    if "hypocrisy" in label:
+        label_fixed = "Appeal_to_Hypocrisy"
+
+    if (
+            "no propaganda" in label
+            or "no technique" in label
+            or label == ""
+            or label == "no"
+            or label == "appeal to history"
+            or label == "appeal to emotion"
+            or label == "appeal to"
+            or label == "appeal"
+            or label == "appeal to author"
+            or label == "emotional appeal"
+            or "no techn" in label
+            or "hashtag" in label
+            or "theory" in label
+            or "specific mention" in label
+            or "sarcasm" in label
+            or "frustration" in label
+            or "analogy" in label
+            or "metaphor" in label
+            or "religious" in label
+            or "gratitude" in label
+            or 'no_technique' in label
+            or "technique" in label):
+        label_fixed = "no_technique"
+
+    return label_fixed
+
+
+def fix_span(prediction):
+    # print(prediction)
+    prediction = prediction.replace("},\n{", "}, {").replace("\\n", " ").replace("\n", " ").replace(
+        '[  ', '[').replace('[ ', '[').replace("  {", "{").replace(" ]", "]").replace('  ]', ']').strip()
+
+    # print(prediction)
+
+    if "provide the paragraph" in prediction: return []
+
+    try:
+        pred_labels = ast.literal_eval(prediction)
+    except:
+        # print("ERRORRR!")
+        pred_labels = json.loads(prediction)
+
+    # print(pred_labels)
+
+    # print(prediction)
+    format_pred_label = []
+    for i, label in enumerate(pred_labels):
+        if 'technique' not in label or 'start' not in label or 'end' not in label \
+                or "text" not in label or len(label["text"]) < 2:
+            continue
+
+        label['technique'] = label['technique'].strip().lower()
+        label['technique'] = fix_single_label(label['technique'])
+
+        format_pred_label.append(label)
+
+    if len(format_pred_label) == 0:
+        return []
+
+    final_labels = []
+    for pred_label in format_pred_label:
+        if pred_label['technique'] != "no_technique":
+            final_labels.append(pred_label)
+
+    return final_labels
+
+
+def post_process(response):
+    labels = response["choices"][0]["message"]["content"].lower().replace("label: ", "").strip()
+    labels = fix_span(labels)
+
+    return labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_FewShot_Explain.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_FewShot_Explain.py
new file mode 100644
index 00000000..712c95d6
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_FewShot_Explain.py
@@ -0,0 +1,257 @@
+import ast
+import json
+import re
+import codecs
+
+from llmebench.datasets import ArProSpanDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import ArProSpanTask
+
+ESCAPE_SEQUENCE_RE = re.compile(r'''
+    ( \\U........      # 8-digit hex escapes
+    | \\u....          # 4-digit hex escapes
+    | \\x..            # 2-digit hex escapes
+    | \\[0-7]{1,3}     # Octal escapes
+    | \\N\{[^}]+\}     # Unicode characters by name
+    | \\[\\'"abfnrtv]  # Single-character escapes
+    )''', re.UNICODE | re.VERBOSE)
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. We implemented correcting predicted span by GPT. 3 samples where chosen per test sample based on MaxMarginalRelevance for few shot learning",
+        "scores": {"Micro-F1": "0.267"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProSpanDataset,
+        "task": ArProSpanTask,
+        "task_args": {"correct_span": True},
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample, examples):
+    prompt_text = (
+        f"Your task is to analyze the Paragraph and determine if it contains the following propaganda techniques and then generate an Explanation/rationale for your predictions.\n"
+        f"'Appeal to Time' , 'Conversation Killer' , 'Slogans' , 'Red Herring' , 'Straw Man' , 'Whataboutism' , "
+        f"'Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving' , "
+        f"'Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation/Vagueness/Confusion' , 'Repetition' , 'Appeal to Hypocrisy' , "
+        f"'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation' , 'Causal Oversimplification' , "
+        f"'Consequential Oversimplification' , 'False Dilemma/No Choice' , 'no technique'"
+    )
+
+    fs_prompt = few_shot_prompt(input_sample, prompt_text, examples)
+
+
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator.",
+        },
+        {
+            "role": "user",
+            "content": fs_prompt,
+        },
+    ]
+
+
+def few_shot_prompt(input_sample, base_prompt, examples):
+    out_prompt = base_prompt + "\n"
+
+    for index, example in enumerate(examples):
+        sent = example["input"]
+        ex_labels = []
+
+        for l in example["label"]:
+            #print(l)
+            l.pop('par_txt',None)
+            ex_labels.append(l)
+
+        ex_labels = str(ex_labels)
+
+        out_prompt = (
+            out_prompt
+            + "Example "
+            + str(index+1)
+            + ":\n"
+            + "Paragraph: "
+            + sent
+            + "\nLabels: "
+            + ex_labels
+            + "\n\n"
+        )
+
+    out_prompt = out_prompt + (
+        f"\nBased on the instructions and examples above analyze the following Paragraph and answer exactly and only by returning a list of "
+        f"the matching labels from the aforementioned techniques, and specify the start position and end position of the text span matching each technique, "
+        f"and for each predicted technique, return a 1-sentence long Explanation for your label."
+        f' Use the following template and return the results as a Labels list of json strings [{{"technique": ,"text": ,"start": ,"end": ,"explanation": }}]\n\n'
+    )
+    out_prompt = out_prompt + "Paragraph: " + input_sample + "\n\nLabels: \n\n"
+
+    out_prompt = out_prompt + (
+        f"Given your predictions in Labels list, read your explanation per prediction and revise your prediction. "
+        f'Analyze the Paragraph AGAIN and answer exactly and only by returning a list of Final Labels as json strings [{{"technique": ,"text": ,"start": ,"end": }}]\n\n'
+    )
+
+    out_prompt = out_prompt + "Final Labels: \n\n"
+
+
+    return out_prompt
+
+
+def decode_escapes(s):
+    def decode_match(match):
+        return codecs.decode(match.group(0), 'unicode-escape')
+
+    return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
+
+
+def fix_single_label(label):
+    if "slogan" in label:
+        label_fixed = "Slogans"
+    if "loaded" in label:
+        label_fixed = "Loaded_Language"
+    if "prejudice" in label or "fear" in label or "mongering" in label:
+        label_fixed = "Appeal_to_Fear-Prejudice"
+    if (
+            "terminating" in label
+            or "thought" in label
+            or "conversation" in label
+            or "killer" in label
+    ):
+        label_fixed = "Conversation_Killer"
+    if "calling" in label or label == "name c" or "labeling" in label:
+        label_fixed = "Name_Calling-Labeling"
+    if (
+            "minimisation" in label
+            or label == "exaggeration minim"
+            or "exaggeration" in label
+    ):
+        label_fixed = "Exaggeration-Minimisation"
+    if "values" in label:
+        label_fixed = "Appeal_to_Values"
+    if "flag" in label or "wav" in label:
+        label_fixed = "Flag_Waving"
+    if "obfusc" in label or "vague" in label or "confusion" in label:
+        label_fixed = "Obfuscation-Vagueness-Confusion"
+    if "causal" in label:
+        label_fixed = "Causal_Oversimplification"
+    if "conseq" in label:
+        label_fixed = "Consequential_Oversimplification"
+    if "authority" in label:
+        label_fixed = "Appeal_to_Authority"
+    if "choice" in label or "dilemma" in label or "false" in label:
+        label_fixed = "False_Dilemma-No_Choice"
+    if "herring" in label or "irrelevant" in label:
+        label_fixed = "Red_Herring"
+    if "straw" in label or "misrepresentation" in label:
+        label_fixed = "Straw_Man"
+    if "guilt" in label or "association" in label:
+        label_fixed = "Guilt_by_Association"
+    if "questioning" in label or "reputation" in label:
+        label_fixed = "Questioning_the_Reputation"
+    if "whataboutism" in label:
+        label_fixed = "Whataboutism"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "time" in label:
+        label_fixed = "Appeal_to_Time"
+    if "popularity" in label:
+        label_fixed = "Appeal_to_Popularity"
+    if "repetition" in label:
+        label_fixed = "Repetition"
+    if "hypocrisy" in label:
+        label_fixed = "Appeal_to_Hypocrisy"
+
+    if (
+            "no propaganda" in label
+            or "no technique" in label
+            or label == ""
+            or label == "no"
+            or label == "appeal to history"
+            or label == "appeal to emotion"
+            or label == "appeal to"
+            or label == "appeal"
+            or label == "appeal to author"
+            or label == "emotional appeal"
+            or "no techn" in label
+            or "hashtag" in label
+            or "theory" in label
+            or "specific mention" in label
+            or "sarcasm" in label
+            or "frustration" in label
+            or "analogy" in label
+            or "metaphor" in label
+            or "religious" in label
+            or "gratitude" in label
+            or 'no_technique' in label
+            or "technique" in label):
+        label_fixed = "no_technique"
+
+    return label_fixed
+
+
+def fix_span(prediction):
+    # print(prediction)
+    if prediction.endswith(","):
+        prediction = prediction[0:-1] + "}]"
+
+    prediction = prediction.replace("},\n{", "}, {").replace("\\n", " ").replace("\n", " ").replace(
+        '[  ', '[').replace('[ ', '[').replace("  {", "{").replace(" ]", "]").replace('  ]', ']').strip()
+
+    # print(prediction)
+
+    if "provide the paragraph" in prediction: return []
+
+    try:
+        pred_labels = ast.literal_eval(prediction)
+    except:
+        # print("ERRORRR!")
+        pred_labels = json.loads(prediction)
+
+    # print(pred_labels)
+
+    # print(prediction)
+    format_pred_label = []
+    for i, label in enumerate(pred_labels):
+        if 'technique' not in label or 'start' not in label or 'end' not in label \
+                or "text" not in label or len(label["text"]) < 2:
+            continue
+
+        label['technique'] = label['technique'].strip().lower()
+        label['technique'] = fix_single_label(label['technique'])
+
+        format_pred_label.append(label)
+
+    if len(format_pred_label) == 0:
+        return []
+
+    final_labels = []
+    for pred_label in format_pred_label:
+        if pred_label['technique'] != "no_technique":
+            final_labels.append(pred_label)
+
+    return final_labels
+
+
+def post_process(response):
+    labels = response["choices"][0]["message"]["content"].lower()
+    labels1,labels2 = labels.split("final labels:")
+    #labels1 = labels1.replace('labels:','').split("\n")[0].strip()
+    #labels1 = fix_span(labels1)
+    labels = fix_span(labels2)
+
+    #labels = fix_span(labels)
+
+    return labels
diff --git a/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_ZeroShot.py b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_ZeroShot.py
new file mode 100644
index 00000000..ae1c4060
--- /dev/null
+++ b/assets/ar/factuality_disinformation_harmful_content/propaganda/ArProSpan_GPT4_ZeroShot.py
@@ -0,0 +1,249 @@
+import ast
+import json
+import re
+import codecs
+
+from llmebench.datasets import ArProSpanDataset
+from llmebench.models import OpenAIModel
+from llmebench.tasks import ArProSpanTask
+
+ESCAPE_SEQUENCE_RE = re.compile(r'''
+    ( \\U........      # 8-digit hex escapes
+    | \\u....          # 4-digit hex escapes
+    | \\x..            # 2-digit hex escapes
+    | \\[0-7]{1,3}     # Octal escapes
+    | \\N\{[^}]+\}     # Unicode characters by name
+    | \\[\\'"abfnrtv]  # Single-character escapes
+    )''', re.UNICODE | re.VERBOSE)
+
+
+def metadata():
+    return {
+        "author": "Arabic Language Technologies, QCRI, HBKU",
+        "model": "gpt-4-32k (version 0314)",
+        "description": "GPT4 32k tokens model hosted on Azure, using the ChatCompletion API. API version '2023-03-15-preview'. We implemented correcting predicted span by GPT.",
+        "scores": {"Micro-F1": "0.117"},
+    }
+
+
+def config():
+    return {
+        "dataset": ArProSpanDataset,
+        "task": ArProSpanTask,
+        "task_args": {"correct_span": True},
+        "model": OpenAIModel,
+        "model_args": {
+            "max_tries": 3,
+        },
+    }
+
+
+def prompt(input_sample):
+    prompt_text = (
+        f"Your task is to analyze the Paragraph and determine if it contains the following propaganda techniques and then generate an Explanation/rationale for your predictions.\n"
+        f"'Appeal to Time' , 'Conversation Killer' , 'Slogans' , 'Red Herring' , 'Straw Man' , 'Whataboutism' , "
+        f"'Appeal to Authority' , 'Appeal to Fear/Prejudice' , 'Appeal to Popularity' , 'Appeal to Values' , 'Flag Waving' , "
+        f"'Exaggeration/Minimisation' , 'Loaded Language' , 'Obfuscation/Vagueness/Confusion' , 'Repetition' , 'Appeal to Hypocrisy' , "
+        f"'Doubt' , 'Guilt by Association' , 'Name Calling/Labeling' , 'Questioning the Reputation' , 'Causal Oversimplification' , "
+        f"'Consequential Oversimplification' , 'False Dilemma/No Choice' , 'no technique'"
+    )
+
+    out_prompt = prompt_text + (
+        f"\nBased on the instructions above analyze the following Paragraph and answer exactly and only by returning a list of "
+        f"the matching labels from the aforementioned techniques, and specify the start position and end position of the text span matching each technique, "
+        f"and for each predicted technique, return a 1-sentence long Explanation for your label."
+        f' Use the following template and return the results as a Labels list of json strings [{{"technique": ,"text": ,"start": ,"end": ,"explanation": }}]\n\n'
+    )
+    # out_prompt = out_prompt + "Paragraph: " + input_sample + "\n\nInitial Labels List: \n\n"
+
+    out_prompt = out_prompt + "Paragraph: " + input_sample + "\n\nLabels: \n\n"
+
+
+    # out_prompt = out_prompt + (
+    #     f"Based on the instructions above, and your predictions in Initial Labels List, "
+    #     f"analyze the Paragraph again and answer exactly and only by returning a list of the matching "
+    #     f"labels from the aforementioned techniques and specify the start position and end position of the "
+    #     f"text span matching each technique. Use the following template and return the results as a Final "
+    #     f'Labels List of json strings [{{"technique": ,"text": ,"start": ,"end": }}]\n\n'
+    # )
+
+    # out_prompt = out_prompt + (
+    #     f"Given your predictions in Initial Labels List and the associated explanations, analyze the Paragraph again "
+    #     f"and revise your decision and make any "
+    #     f"needed corrections/updates on the predicted labels. "
+    #     f"Use the following template and return the predictions after revision as a Final "
+    #     f'Labels List of json strings [{{"technique": ,"text": ,"start": ,"end": , "explanation": }}]\n\n'
+    # )
+
+   # out_prompt = out_prompt + (
+    #f"Given your predictions in Labels list, read your explanation per prediction and revise your prediction. "
+    #f'Analyze the Paragraph AGAIN and answer exactly and only by returning a list of Final Labels as json strings [{{"technique": ,"text": ,"start": ,"end": }}]\n\n'
+    #)
+    #
+    #out_prompt = out_prompt + "Final Labels: \n\n"
+
+    return [
+        {
+            "role": "system",
+            "content": "You are an expert annotator.",
+        },
+        {
+            "role": "user",
+            "content": out_prompt,
+        },
+    ]
+
+
+def decode_escapes(s):
+    def decode_match(match):
+        return codecs.decode(match.group(0), 'unicode-escape')
+
+    return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
+
+
+def fix_single_label(label):
+    if "slogan" in label:
+        label_fixed = "Slogans"
+    if "loaded" in label:
+        label_fixed = "Loaded_Language"
+    if "prejudice" in label or "fear" in label or "mongering" in label:
+        label_fixed = "Appeal_to_Fear-Prejudice"
+    if (
+            "terminating" in label
+            or "thought" in label
+            or "conversation" in label
+            or "killer" in label
+    ):
+        label_fixed = "Conversation_Killer"
+    if "calling" in label or label == "name c" or "labeling" in label:
+        label_fixed = "Name_Calling-Labeling"
+    if (
+            "minimisation" in label
+            or label == "exaggeration minim"
+            or "exaggeration" in label
+    ):
+        label_fixed = "Exaggeration-Minimisation"
+    if "values" in label:
+        label_fixed = "Appeal_to_Values"
+    if "flag" in label or "wav" in label:
+        label_fixed = "Flag_Waving"
+    if "obfusc" in label or "vague" in label or "confusion" in label:
+        label_fixed = "Obfuscation-Vagueness-Confusion"
+    if "causal" in label:
+        label_fixed = "Causal_Oversimplification"
+    if "conseq" in label:
+        label_fixed = "Consequential_Oversimplification"
+    if "authority" in label:
+        label_fixed = "Appeal_to_Authority"
+    if "choice" in label or "dilemma" in label or "false" in label:
+        label_fixed = "False_Dilemma-No_Choice"
+    if "herring" in label or "irrelevant" in label:
+        label_fixed = "Red_Herring"
+    if "straw" in label or "misrepresentation" in label:
+        label_fixed = "Straw_Man"
+    if "guilt" in label or "association" in label:
+        label_fixed = "Guilt_by_Association"
+    if "questioning" in label or "reputation" in label:
+        label_fixed = "Questioning_the_Reputation"
+    if "whataboutism" in label:
+        label_fixed = "Whataboutism"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "doubt" in label:
+        label_fixed = "Doubt"
+    if "time" in label:
+        label_fixed = "Appeal_to_Time"
+    if "popularity" in label:
+        label_fixed = "Appeal_to_Popularity"
+    if "repetition" in label:
+        label_fixed = "Repetition"
+    if "hypocrisy" in label:
+        label_fixed = "Appeal_to_Hypocrisy"
+
+    if (
+            "no propaganda" in label
+            or "no technique" in label
+            or label == ""
+            or label == "no"
+            or label == "appeal to history"
+            or label == "appeal to emotion"
+            or label == "appeal to"
+            or label == "appeal"
+            or label == "appeal to author"
+            or label == "emotional appeal"
+            or "no techn" in label
+            or "hashtag" in label
+            or "theory" in label
+            or "specific mention" in label
+            or "sarcasm" in label
+            or "frustration" in label
+            or "analogy" in label
+            or "metaphor" in label
+            or "religious" in label
+            or "gratitude" in label
+            or 'no_technique' in label
+            or "technique" in label
+            or 'rhetorical' in label):
+        label_fixed = "no_technique"
+
+    return label_fixed
+
+
+def fix_span(prediction):
+    # print(prediction)
+    prediction = prediction.replace("},\n{", "}, {").replace("\\n", " ").replace("\n", " ").replace(
+        '[  ', '[').replace('[ ', '[').replace("  {", "{").replace(" ]", "]").replace('  ]', ']').strip()
+
+    # print(prediction)
+
+    if "provide the paragraph" in prediction: return []
+
+    try:
+        pred_labels = ast.literal_eval(prediction)
+    except:
+        # print("ERRORRR!")
+        pred_labels = json.loads(prediction)
+
+    # print(pred_labels)
+
+    # print(prediction)
+    format_pred_label = []
+    for i, label in enumerate(pred_labels):
+        if 'technique' not in label or 'start' not in label or 'end' not in label \
+                or "text" not in label or len(label["text"]) < 2:
+            continue
+
+        label['technique'] = label['technique'].strip().lower()
+        label['technique'] = fix_single_label(label['technique'])
+
+        format_pred_label.append(label)
+
+    if len(format_pred_label) == 0:
+        return []
+
+    final_labels = []
+    for pred_label in format_pred_label:
+        if pred_label['technique'] != "no_technique":
+            final_labels.append(pred_label)
+
+    return final_labels
+
+
+def post_process(response):
+    labels = response["choices"][0]["message"]["content"].lower()
+    #labels1,labels2 = labels.split("final labels:")
+    #labels1 = labels1.replace('labels:','').split("\n")[0].strip()
+    #labels1 = fix_span(labels1)
+    #labels = fix_span(labels2)
+
+    labels = labels.replace("labels:","")
+    labels = fix_span(labels)
+
+    # if labels1 != labels:
+    #     print(labels1)
+    #     print('=' * 35)
+    #     print(labels)
+    # else:
+    #     print("=================LABELS BEFORE MATCH AFTER===================")
+
+    return labels
diff --git a/llmebench/datasets/ArProBinary.py b/llmebench/datasets/ArProBinary.py
new file mode 100644
index 00000000..71eaa4d9
--- /dev/null
+++ b/llmebench/datasets/ArProBinary.py
@@ -0,0 +1,44 @@
+import json
+
+from llmebench.datasets.dataset_base import DatasetBase
+from llmebench.tasks import TaskType
+
+
+class ArProBinaryDataset(DatasetBase):
+    def __init__(self, **kwargs):
+        super(ArProBinaryDataset, self).__init__(**kwargs)
+
+    @staticmethod
+    def metadata():
+        return {
+            "language": "ar",
+            "citation": """
+                to add
+            """,
+            "link": "",
+            "license": "",
+            "splits": {
+                "test": ":data_dir:ArMPro/binary/ArMPro_binary_test.jsonl",
+                "train": ":data_dir:ArMPro/binary/ArMPro_binary_train.jsonl",
+            },
+            "task_type": TaskType.Classification,
+            "class_labels": ["true", "false"],
+        }
+
+    @staticmethod
+    def get_data_sample():
+        return {"id": "001", "input": "paragraph", "label": "true", "type": "paragraph"}
+
+    def load_data(self, data_path):
+        data_path = self.resolve_path(data_path)
+
+        data = []
+        with open(data_path, "r") as fp:
+            for line_idx, line in enumerate(fp):
+                line_data = json.loads(line)
+                id = line_data.get("paragraph_id", None)
+                text = line_data.get("paragraph", "")
+                label = line_data.get("label", "").lower()
+                data.append({"input": text, "label": label, "line_number": id})
+
+        return data
diff --git a/llmebench/datasets/ArProCoarse.py b/llmebench/datasets/ArProCoarse.py
new file mode 100644
index 00000000..d8f2832c
--- /dev/null
+++ b/llmebench/datasets/ArProCoarse.py
@@ -0,0 +1,81 @@
+import json
+from pathlib import Path
+
+from llmebench.datasets.dataset_base import DatasetBase
+from llmebench.tasks import TaskType
+
+
+class ArProCoarseDataset(DatasetBase):
+    def __init__(self, techniques_path=None, **kwargs):
+        # Get the path to the file listing the target techniques
+        self.techniques_path = Path(techniques_path) if techniques_path else None
+        super(ArProCoarseDataset, self).__init__(**kwargs)
+
+    @staticmethod
+    def metadata():
+        return {
+            "language": "ar",
+            "citation": """
+                to add
+            """,
+            "link": "",
+            "license": "",
+            "splits": {
+                "test": ":data_dir:ArMPro/coarse/ArMPro_coarse_test.jsonl",
+                "train": ":data_dir:ArMPro/coarse/ArMPro_coarse_train.jsonl",
+            },
+            "task_type": TaskType.MultiLabelClassification,
+            "class_labels": [
+                "Manipulative_Wording",
+                "no_technique",
+                "Reputation",
+                "Justification",
+                "Simplification",
+                "Call",
+                "Distraction",
+            ],
+        }
+
+    @staticmethod
+    def get_data_sample():
+        return {
+            "id": "001",
+            "input": "paragraph",
+            "label": ["no_technique"],
+            "type": "paragraph",
+        }
+
+    def get_predefined_techniques(self):
+        # Load a pre-defined list of propaganda techniques, if available
+        if self.techniques_path and self.techniques_path.exists():
+            self.techniques_path = self.resolve_path(self.techniques_path)
+            with open(self.techniques_path, "r", encoding="utf-8") as f:
+                techniques = [label.strip() for label in f.readlines()]
+        else:
+            techniques = [
+                "Manipulative_Wording",
+                "no_technique",
+                "Reputation",
+                "Justification",
+                "Simplification",
+                "Call",
+                "Distraction",
+            ]
+
+        return techniques
+
+    def load_data(self, data_path):
+        data_path = self.resolve_path(data_path)
+
+        data = []
+        with open(data_path, "r") as fp:
+            for line_idx, line in enumerate(fp):
+                line_data = json.loads(line)
+                id = line_data.get("paragraph_id", None)
+                text = line_data.get("paragraph", "")
+                label = line_data.get("labels", "")
+                if len(label) == 0:
+                    label = ["no_technique"]
+                data.append({"input": text, "label": label, "line_number": id})
+
+        return data
diff --git a/llmebench/datasets/ArProMultilabel.py b/llmebench/datasets/ArProMultilabel.py
new file mode 100644
index 00000000..eef099c3
--- /dev/null
+++ b/llmebench/datasets/ArProMultilabel.py
@@ -0,0 +1,115 @@
+import json
+from pathlib import Path
+
+from llmebench.datasets.dataset_base import DatasetBase
+from llmebench.tasks import TaskType
+
+
+class ArProMultilabelDataset(DatasetBase):
+    def __init__(self, techniques_path=None, **kwargs):
+        # Get the path to the file listing the target techniques
+        self.techniques_path = Path(techniques_path) if techniques_path else None
+        super(ArProMultilabelDataset, self).__init__(**kwargs)
+
+    @staticmethod
+    def metadata():
+        return {
+            "language": "ar",
+            "citation": """
+                to add
+            """,
+            "link": "",
+            "license": "",
+            "splits": {
+                "test": ":data_dir:ArMPro/multilabel/ArMPro_multilabel_test.jsonl",
+                "train": ":data_dir:ArMPro_multilabel_train.jsonl"
+            },
+            "task_type": TaskType.MultiLabelClassification,
+            "class_labels": [
+                "Appeal_to_Authority",
+                "Appeal_to_Fear-Prejudice",
+                "Appeal_to_Hypocrisy",
+                "Appeal_to_Popularity",
+                "Appeal_to_Time",
+                "Appeal_to_Values",
+                "Causal_Oversimplification",
+                "Consequential_Oversimplification",
+                "Conversation_Killer",
+                "Doubt",
+                "Exaggeration-Minimisation",
+                "False_Dilemma-No_Choice",
+                "Flag_Waving",
+                "Guilt_by_Association",
+                "Loaded_Language",
+                "Name_Calling-Labeling",
+                "Obfuscation-Vagueness-Confusion",
+                "Questioning_the_Reputation",
+                "Red_Herring",
+                "Repetition",
+                "Slogans",
+                "Straw_Man",
+                "Whataboutism",
+                "no_technique",
+            ],
+        }
+
+    @staticmethod
+    def get_data_sample():
+        return {
+            "id": "001",
+            "input": "paragraph",
+            "label": ["no_technique"],
+            "type": "paragraph",
+        }
+
+    def get_predefined_techniques(self):
+        # Load a pre-defined list of propaganda techniques, if available
+        if self.techniques_path and self.techniques_path.exists():
+            self.techniques_path = self.resolve_path(self.techniques_path)
+            with open(self.techniques_path, "r", encoding="utf-8") as f:
+                techniques = [label.strip() for label in f.readlines()]
+        else:
+            techniques = [
+                "Appeal_to_Authority",
+                "Appeal_to_Fear-Prejudice",
+                "Appeal_to_Hypocrisy",
+                "Appeal_to_Popularity",
+                "Appeal_to_Time",
+                "Appeal_to_Values",
+                "Causal_Oversimplification",
+                "Consequential_Oversimplification",
+                "Conversation_Killer",
+                "Doubt",
+                "Exaggeration-Minimisation",
+                "False_Dilemma-No_Choice",
+                "Flag_Waving",
+                "Guilt_by_Association",
+                "Loaded_Language",
+                "Name_Calling-Labeling",
+                "Obfuscation-Vagueness-Confusion",
+                "Questioning_the_Reputation",
+                "Red_Herring",
+                "Repetition",
+                "Slogans",
+                "Straw_Man",
+                "Whataboutism",
+                "no_technique",
+            ]
+
+        return techniques
+
+    def load_data(self, data_path):
+        data_path = self.resolve_path(data_path)
+
+        data = []
+        with open(data_path, "r") as fp:
+            for line_idx, line in enumerate(fp):
+                line_data = json.loads(line)
+                id = line_data.get("paragraph_id", None)
+                text = line_data.get("paragraph", "")
+                label = line_data.get("labels", "")
+                if len(label) == 0:
+                    label = ["no_technique"]
+                data.append({"input": text, "label": label, "line_number": id})
+
+        return data
diff --git a/llmebench/datasets/ArProSpan.py b/llmebench/datasets/ArProSpan.py
new file mode 100644
index 00000000..d0605332
--- /dev/null
+++ b/llmebench/datasets/ArProSpan.py
@@ -0,0 +1,119 @@
+import json
+from pathlib import Path
+
+from llmebench.datasets.dataset_base import DatasetBase
+from llmebench.tasks import TaskType
+
+
+class ArProSpanDataset(DatasetBase):
+    def __init__(self, techniques_path=None, **kwargs):
+        self.techniques_path = Path(techniques_path) if techniques_path else None
+        super(ArProSpanDataset, self).__init__(**kwargs)
+
+    @staticmethod
+    def metadata():
+        return {
+            "language": "ar",
+            "citation": """
+                to add
+            """,
+            "link": "",
+            "license": "",
+            "splits": {
+                "test": ":data_dir:ArMPro/span/ArMPro_span_test.jsonl",
+                "train": ":data_dir:ArMPro/span/ArMPro_span_train.jsonl",
+            },
+            "task_type": TaskType.SequenceLabeling,
+            "class_labels": [
+                "Appeal_to_Authority",
+                "Appeal_to_Fear-Prejudice",
+                "Appeal_to_Hypocrisy",
+                "Appeal_to_Popularity",
+                "Appeal_to_Time",
+                "Appeal_to_Values",
+                "Causal_Oversimplification",
+                "Consequential_Oversimplification",
+                "Conversation_Killer",
+                "Doubt",
+                "Exaggeration-Minimisation",
+                "False_Dilemma-No_Choice",
+                "Flag_Waving",
+                "Guilt_by_Association",
+                "Loaded_Language",
+                "Name_Calling-Labeling",
+                "Obfuscation-Vagueness-Confusion",
+                "Questioning_the_Reputation",
+                "Red_Herring",
+                "Repetition",
+                "Slogans",
+                "Straw_Man",
+                "Whataboutism",
+                "no_technique",
+            ],
+        }
+
+    @staticmethod
+    def get_data_sample():
+        return {
+            "input_id": "001",
+            "input": "paragraph",
+            "label": [{"technique": "Guilt_by_Association", "start": 13, "end": 52}],
+            "line_number": 1,
+        }
+
+    def get_predefined_techniques(self):
+        # Load a pre-defined list of propaganda techniques, if available
+        if self.techniques_path and self.techniques_path.exists():
+            self.techniques_path = self.resolve_path(self.techniques_path)
+            with open(self.techniques_path, "r", encoding="utf-8") as f:
+                techniques = [label.strip() for label in f.readlines()]
+        else:
+            techniques = [
+                "Appeal_to_Authority",
+                "Appeal_to_Fear-Prejudice",
+                "Appeal_to_Hypocrisy",
+                "Appeal_to_Popularity",
+                "Appeal_to_Time",
+                "Appeal_to_Values",
+                "Causal_Oversimplification",
+                "Consequential_Oversimplification",
+                "Conversation_Killer",
+                "Doubt",
+                "Exaggeration-Minimisation",
+                "False_Dilemma-No_Choice",
+                "Flag_Waving",
+                "Guilt_by_Association",
+                "Loaded_Language",
+                "Name_Calling-Labeling",
+                "Obfuscation-Vagueness-Confusion",
+                "Questioning_the_Reputation",
+                "Red_Herring",
+                "Repetition",
+                "Slogans",
+                "Straw_Man",
+                "Whataboutism",
+                "no_technique",
+            ]
+
+        return techniques
+
+    def load_data(self, data_path):
+        data_path = self.resolve_path(data_path)
+
+        data = []
+        with open(data_path, "r") as fp:
+            for line_idx, line in enumerate(fp):
+                line_data = json.loads(line)
+                id = line_data.get("paragraph_id", "")
+                text = line_data.get("paragraph", "")
+                label = line_data.get("labels", [])
+
+                # we need to par text at evaluation to do some matching against predicted spans
+                for l in label:
+                    l['par_txt'] = text
+
+                data.append({"input": text, "input_id": id, "label": label, "line_number": line_idx})
+
+        print('loaded %d docs from file...'%len(data))
+
+        return data
diff --git a/llmebench/datasets/__init__.py b/llmebench/datasets/__init__.py
index ac3bcfad..10d329c6 100644
--- a/llmebench/datasets/__init__.py
+++ b/llmebench/datasets/__init__.py
@@ -9,6 +9,10 @@
 from .ArapTweet import ArapTweetDataset
 from .ARCD import ARCDDataset
 from .ArSarcasm import ArSarcasmDataset
+from .ArProBinary import ArProBinaryDataset
+from .ArProCoarse import ArProCoarseDataset
+from .ArProMultilabel import ArProMultilabelDataset
+from .ArProSpan import ArProSpanDataset
 from .ArSarcasm2 import ArSarcasm2Dataset
 from .ArSAS import ArSASDataset
 from .ASND import ASNDDataset
diff --git a/llmebench/tasks/ArPro.py b/llmebench/tasks/ArPro.py
new file mode 100644
index 00000000..1f2f7e5f
--- /dev/null
+++ b/llmebench/tasks/ArPro.py
@@ -0,0 +1,26 @@
+from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
+
+from llmebench.tasks.task_base import TaskBase
+
+
+class ArProTask(TaskBase):
+    def __init__(self, **kwargs):
+        super(ArProTask, self).__init__(**kwargs)
+
+    def evaluate(self, true_labels, predicted_labels):
+        predicted_labels = [
+            p if p else self.get_random_prediction(set(true_labels))
+            for p in predicted_labels
+        ]
+        return {
+            "Macro F1": f1_score(true_labels, predicted_labels, average="macro"),
+            "Micro F1": f1_score(true_labels, predicted_labels, average="micro"),
+            "Acc": accuracy_score(true_labels, predicted_labels),
+            "Weighted Precision": precision_score(
+                true_labels, predicted_labels, average="weighted"
+            ),
+            "Weighted Recall": recall_score(
+                true_labels, predicted_labels, average="weighted"
+            ),
+            "Weighted F1": f1_score(true_labels, predicted_labels, average="weighted"),
+        }
diff --git a/llmebench/tasks/ArProSpan.py b/llmebench/tasks/ArProSpan.py
new file mode 100644
index 00000000..c3fbea74
--- /dev/null
+++ b/llmebench/tasks/ArProSpan.py
@@ -0,0 +1,189 @@
+from llmebench.tasks.task_base import TaskBase
+import regex as re
+
+class ArProSpanTask(TaskBase):
+    def __init__(self, correct_span=False, **kwargs):
+        # Decide whether to correct start and end of gpt predictions
+        self.correct_span = correct_span
+        super(ArProSpanTask, self).__init__(**kwargs)
+
+    def sort_spans(self, spans):
+        """
+        sort the list of annotations with respect to the starting offset
+        """
+        spans = sorted(spans, key=lambda span: span[1][0])
+
+        return spans
+
+    def sort_labels(self, all_labels):
+        # print(all_labels)
+        sorted_labels = []
+        for labels in all_labels:
+            per_par_labels = []
+            for label in labels:
+                start = label['start']
+                end = label['end']
+                if 'par_txt' in label:
+                    par_txt = label['par_txt']
+                    per_par_labels.append((label['technique'], [start, end], label['text'], par_txt))
+                else:
+                    per_par_labels.append((label['technique'], [start, end], label['text']))
+
+            per_par_labels = self.sort_spans(per_par_labels)
+            sorted_labels.append(per_par_labels)
+
+        # print(sorted_labels)
+        # print(40*"=")
+
+        return sorted_labels
+
+    def reformatLabels(self, true_labels, predicted_labels):
+        #filtered_true_labels = []
+        #filtered_predicted_labels = []
+
+        # if we apply this, we are like ignoring no technique case at all
+        # to match the original scorer from wanlp22 task 2 we have to comment this line out
+        #true_labels, predicted_labels = zip(*filter(all, zip(true_labels, predicted_labels)))
+
+        filtered_true_labels = self.sort_labels(list(true_labels))
+        filtered_predicted_labels = self.sort_labels(list(predicted_labels))
+
+        return filtered_true_labels, filtered_predicted_labels
+
+    def compute_prec_rec_f1(self, prec_numerator, prec_denominator, rec_numerator, rec_denominator):
+        p, r, f1 = (0, 0, 0)
+        if prec_denominator > 0:
+            p = prec_numerator / prec_denominator
+        if rec_denominator > 0:
+            r = rec_numerator / rec_denominator
+        if prec_denominator == 0 and rec_denominator == 0:
+            f1 = 1.0
+        if p > 0 and r > 0:
+            f1 = 2 * (p * r / (p + r))
+
+        return p, r, f1
+
+    def span_intersection(self, gold_span, pred_span):
+        x = range(gold_span[0], gold_span[1])
+        y = range(pred_span[0], pred_span[1])
+        inter = set(x).intersection(y)
+        return len(inter)
+
+    def compute_technique_frequency(self, annotations, technique_name):
+        all_annotations = []
+        for annot in annotations:
+            for x in annot:
+                all_annotations.append(x[0])
+
+        techn_freq = sum([1 for a in all_annotations if a == technique_name])
+
+        # print(technique_name,techn_freq)
+
+        return techn_freq
+
+    def ammend_span(self, span, span_txt, par):
+        start = span[0]
+        end = span[1]
+
+        try:
+            # get the first matching span
+            for match in re.finditer(span_txt, par):
+                start = match.start()
+                end = match.end()
+                break
+        except:
+            print("Error start end correction")
+
+        return [start,end]
+
+    def compute_span_score(self, gold_annots, pred_annots):
+        # count total no of annotations
+        rec_denominator = sum([len(x) for x in gold_annots])
+        prec_denominator = sum([len(x) for x in pred_annots])
+
+        techniques = self.dataset.get_predefined_techniques()
+        techniques.remove("no_technique")
+
+        technique_Spr_prec = {propaganda_technique: 0 for propaganda_technique in techniques}
+        technique_Spr_rec = {propaganda_technique: 0 for propaganda_technique in techniques}
+        cumulative_Spr_prec, cumulative_Spr_rec = (0, 0)
+        f1_articles = []
+
+        for i, pred_annot_obj in enumerate(pred_annots):
+            gold_annot_obj = gold_annots[i]
+            # print("%s\t%d\t%d" % (example_id, len(gold_annot_obj), len(pred_annot_obj)))
+
+            document_cumulative_Spr_prec, document_cumulative_Spr_rec = (0, 0)
+            for j, pred_ann in enumerate(pred_annot_obj):
+                s = ""
+                ann_length = pred_ann[1][1] - pred_ann[1][0]
+
+                for i, gold_ann in enumerate(gold_annot_obj):
+                    if pred_ann[0] == gold_ann[0]:
+                        if self.correct_span:
+                            pred_ann = list(pred_ann)
+                            # We get the paragraph from the gold par (gold_ann[3]) and the
+                            # predicted span text from pred_ann[2]
+                            pred_ann[1] = self.ammend_span(pred_ann[1], pred_ann[2], gold_ann[3])
+                            pred_ann = tuple(pred_ann)
+
+                        # s += "\tmatch %s %s-%s - %s %s-%s"%(sd[0],sd[1], sd[2], gd[0], gd[1], gd[2])
+                        intersection = self.span_intersection(gold_ann[1], pred_ann[1])
+                        # print(intersection)
+                        # print(intersection)
+                        s_ann_length = gold_ann[1][1] - gold_ann[1][0]
+                        Spr_prec = intersection / ann_length
+                        document_cumulative_Spr_prec += Spr_prec
+                        cumulative_Spr_prec += Spr_prec
+                        s += "\tmatch %s %s-%s - %s %s-%s: S(p,r)=|intersect(r, p)|/|p| = %d/%d = %f (cumulative S(p,r)=%f)\n" \
+                             % (pred_ann[0], pred_ann[1][0], pred_ann[1][1], gold_ann[0],
+                                gold_ann[1][0], gold_ann[1][1], intersection, ann_length, Spr_prec,
+                                cumulative_Spr_prec)
+                        technique_Spr_prec[gold_ann[0]] += Spr_prec
+
+                        Spr_rec = intersection / s_ann_length
+                        document_cumulative_Spr_rec += Spr_rec
+                        cumulative_Spr_rec += Spr_rec
+                        s += "\tmatch %s %s-%s - %s %s-%s: S(p,r)=|intersect(r, p)|/|r| = %d/%d = %f (cumulative S(p,r)=%f)\n" \
+                             % (pred_ann[0], pred_ann[1][0], pred_ann[1][1], gold_ann[0],
+                                gold_ann[1][0], gold_ann[1][1], intersection, s_ann_length, Spr_rec,
+                                cumulative_Spr_rec)
+                        technique_Spr_rec[gold_ann[0]] += Spr_rec
+
+            p_article, r_article, f1_article = self.compute_prec_rec_f1(document_cumulative_Spr_prec,
+                                                                        len(pred_annot_obj),
+                                                                        document_cumulative_Spr_rec,
+                                                                        len(gold_annot_obj))
+            f1_articles.append(f1_article)
+
+        p, r, f1 = self.compute_prec_rec_f1(cumulative_Spr_prec, prec_denominator, cumulative_Spr_rec, rec_denominator)
+
+        f1_per_technique = []
+
+        for technique_name in technique_Spr_prec.keys():
+            prec_tech, rec_tech, f1_tech = self.compute_prec_rec_f1(technique_Spr_prec[technique_name],
+                                                                    self.compute_technique_frequency(pred_annots,
+                                                                                                     technique_name),
+                                                                    technique_Spr_prec[technique_name],
+                                                                    self.compute_technique_frequency(gold_annots,
+                                                                                                     technique_name))
+            f1_per_technique.append(f1_tech)
+
+        return p, r, f1, f1_per_technique
+
+    def evaluate(self, true_labels, predicted_labels):
+        # fix none labels by empty lists instead of randomized predictions
+        predicted_labels = [p if p else [] for p in predicted_labels]
+
+        # gold_labels_set = set(itertools.chain.from_iterable(true_labels))
+
+        true_labels, predicted_labels = self.reformatLabels(true_labels, predicted_labels)
+
+        # for p in predicted_labels:
+        # if p == None or len(p) == 0:
+        # p = [self.get_random_prediction(gold_labels_set) for _ in range(len(t))]
+
+        precision, recall, micro_f1, f1_per_class = self.compute_span_score(true_labels, predicted_labels)
+        macro_f1 = sum(f1_per_class) / len(f1_per_class)
+
+        return {"Micro F1": micro_f1, "Macro F1": macro_f1, "Precision": precision, "Recall": recall}
\ No newline at end of file
diff --git a/llmebench/tasks/__init__.py b/llmebench/tasks/__init__.py
index 41d0f748..e223bd90 100644
--- a/llmebench/tasks/__init__.py
+++ b/llmebench/tasks/__init__.py
@@ -6,6 +6,8 @@
 from .ArabicPOS import ArabicPOSTask
 from .ArabicSegmentation import ArabicSegmentationTask
 from .Attentionworthy import AttentionworthyTask
+from .ArPro import ArProTask
+from .ArProSpan import ArProSpanTask
 from .Checkworthiness import CheckworthinessTask
 from .ClaimDetection import ClaimDetectionTask
 from .Classification import ClassificationTask