EleutherAI · haileyschoelkopf · Oct 8, 2024 · Oct 8, 2024
@@ -57,7 +57,6 @@ class TaskConfig(dict):
     task: Optional[str] = None
     task_alias: Optional[str] = None
     tag: Optional[Union[str, list]] = None
-    group: Optional[Union[str, list]] = None
     # HF dataset options.
     # which dataset to use,
     # and what splits for what purpose
@@ -98,18 +97,6 @@ class TaskConfig(dict):
     )
 
     def __post_init__(self) -> None:
-        if self.group is not None:
-            eval_logger.warning(
-                "A task YAML file was found to contain a `group` key. Groups which provide aggregate scores over several subtasks now require a separate config file--if not aggregating, you may want to use the `tag` config option instead within your config. Setting `group` within a TaskConfig will be deprecated in v0.4.4. Please see https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/task_guide.md for more information."
-            )
-
-            if self.tag is None:
-                self.tag = self.group
-            else:
-                raise ValueError(
-                    "Got both a `group` and `tag` entry within a TaskConfig. Please use one or the other--`group` values will be deprecated in v0.4.4."
-                )
-
         if self.generation_kwargs is not None:
             if self.output_type != "generate_until":
                 eval_logger.warning(

@@ -13,6 +13,7 @@
 from lm_eval.models.huggingface import HFLM
 from lm_eval.models.utils import (
     Collator,
+    flatten_image_list,
     pad_and_concat,
     replace_placeholders,
     stop_sequences_criteria,
@@ -293,6 +294,10 @@ def tok_batch_multimodal_encode(
         if self.rgb:
             images = [[img.convert("RGB") for img in sublist] for sublist in images]
 
+        # certain models like llava expect a single-level image list even for bs>1, multi-image. TODO: port this over to loglikelihoods
+        if getattr(self.config, "model_type", "") == "llava":
+            images = flatten_image_list(images)
+
         encoding = self.processor(
             images=images,
             text=strings,

@@ -698,3 +698,14 @@ def replace_placeholders(
     # Add the last part of the string
     result.append(parts[-1])
     return "".join(result)
+
+
+def flatten_image_list(images: List[List]):
+    """
+    Takes in a list of lists of images, and returns a single list of all images in order.
+    Used for some multimodal models like Llava-1.5 which expects this flattened-list format for its image processor.
+
+    :param images: A list of lists of PIL images.
+    :return: a list of PIL images, via concatenating all the sub-lists in order.
+    """
+    return [image for image_list in images for image in image_list]
@@ -443,39 +443,26 @@ def _get_task_and_group(self, task_dir: str):
 
         def _populate_tags_and_groups(config, task, tasks_and_groups, print_info):
             # TODO: remove group in next release
-            for attr in ["tag", "group"]:
-                if attr in config:
-                    if attr == "group" and print_info:
+            if "tag" in config:
+                attr_list = config["tag"]
+                if isinstance(attr_list, str):
+                    attr_list = [attr_list]
+
+                for tag in attr_list:
+                    if tag not in tasks_and_groups:
+                        tasks_and_groups[tag] = {
+                            "type": "tag",
+                            "task": [task],
+                            "yaml_path": -1,
+                        }
+                    elif tasks_and_groups[tag]["type"] != "tag":
                         self.logger.info(
-                            "`group` and `group_alias` keys in TaskConfigs are deprecated and will be removed in v0.4.5 of lm_eval. "
-                            "The new `tag` field will be used to allow for a shortcut to a group of tasks one does not wish to aggregate metrics across. "
-                            "`group`s which aggregate across subtasks must be only defined in a separate group config file, "
-                            "which will be the official way to create groups that support cross-task aggregation as in `mmlu`. "
-                            "Please see the v0.4.4 patch notes and our documentation: https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md#advanced-group-configs "
-                            "for more information."
+                            f"The tag '{tag}' is already registered as a group, this tag will not be registered. "
+                            "This may affect tasks you want to call."
                         )
-                        print_info = False
-                        # attr = "tag"
-
-                    attr_list = config[attr]
-                    if isinstance(attr_list, str):
-                        attr_list = [attr_list]
-
-                    for tag in attr_list:
-                        if tag not in tasks_and_groups:
-                            tasks_and_groups[tag] = {
-                                "type": "tag",
-                                "task": [task],
-                                "yaml_path": -1,
-                            }
-                        elif tasks_and_groups[tag]["type"] != "tag":
-                            self.logger.info(
-                                f"The tag {tag} is already registered as a group, this tag will not be registered. "
-                                "This may affect tasks you want to call."
-                            )
-                            break
-                        else:
-                            tasks_and_groups[tag]["task"].append(task)
+                        break
+                    else:
+                        tasks_and_groups[tag]["task"].append(task)
 
         # TODO: remove group in next release
         print_info = True