Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Llava-1.5-hf ; Update to version 0.4.5 #2388

Merged
merged 1 commit into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions lm_eval/api/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ class TaskConfig(dict):
task: Optional[str] = None
task_alias: Optional[str] = None
tag: Optional[Union[str, list]] = None
group: Optional[Union[str, list]] = None
# HF dataset options.
# which dataset to use,
# and what splits for what purpose
Expand Down Expand Up @@ -98,18 +97,6 @@ class TaskConfig(dict):
)

def __post_init__(self) -> None:
if self.group is not None:
eval_logger.warning(
"A task YAML file was found to contain a `group` key. Groups which provide aggregate scores over several subtasks now require a separate config file--if not aggregating, you may want to use the `tag` config option instead within your config. Setting `group` within a TaskConfig will be deprecated in v0.4.4. Please see https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/task_guide.md for more information."
)

if self.tag is None:
self.tag = self.group
else:
raise ValueError(
"Got both a `group` and `tag` entry within a TaskConfig. Please use one or the other--`group` values will be deprecated in v0.4.4."
)

if self.generation_kwargs is not None:
if self.output_type != "generate_until":
eval_logger.warning(
Expand Down
5 changes: 5 additions & 0 deletions lm_eval/models/hf_vlms.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from lm_eval.models.huggingface import HFLM
from lm_eval.models.utils import (
Collator,
flatten_image_list,
pad_and_concat,
replace_placeholders,
stop_sequences_criteria,
Expand Down Expand Up @@ -293,6 +294,10 @@ def tok_batch_multimodal_encode(
if self.rgb:
images = [[img.convert("RGB") for img in sublist] for sublist in images]

# certain models like llava expect a single-level image list even for bs>1, multi-image. TODO: port this over to loglikelihoods
if getattr(self.config, "model_type", "") == "llava":
images = flatten_image_list(images)

encoding = self.processor(
images=images,
text=strings,
Expand Down
11 changes: 11 additions & 0 deletions lm_eval/models/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,3 +698,14 @@ def replace_placeholders(
# Add the last part of the string
result.append(parts[-1])
return "".join(result)


def flatten_image_list(images: List[List]):
"""
Takes in a list of lists of images, and returns a single list of all images in order.
Used for some multimodal models like Llava-1.5 which expects this flattened-list format for its image processor.

:param images: A list of lists of PIL images.
:return: a list of PIL images, via concatenating all the sub-lists in order.
"""
return [image for image_list in images for image in image_list]
49 changes: 18 additions & 31 deletions lm_eval/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,39 +443,26 @@ def _get_task_and_group(self, task_dir: str):

def _populate_tags_and_groups(config, task, tasks_and_groups, print_info):
# TODO: remove group in next release
for attr in ["tag", "group"]:
if attr in config:
if attr == "group" and print_info:
if "tag" in config:
attr_list = config["tag"]
if isinstance(attr_list, str):
attr_list = [attr_list]

for tag in attr_list:
if tag not in tasks_and_groups:
tasks_and_groups[tag] = {
"type": "tag",
"task": [task],
"yaml_path": -1,
}
elif tasks_and_groups[tag]["type"] != "tag":
self.logger.info(
"`group` and `group_alias` keys in TaskConfigs are deprecated and will be removed in v0.4.5 of lm_eval. "
"The new `tag` field will be used to allow for a shortcut to a group of tasks one does not wish to aggregate metrics across. "
"`group`s which aggregate across subtasks must be only defined in a separate group config file, "
"which will be the official way to create groups that support cross-task aggregation as in `mmlu`. "
"Please see the v0.4.4 patch notes and our documentation: https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md#advanced-group-configs "
"for more information."
f"The tag '{tag}' is already registered as a group, this tag will not be registered. "
"This may affect tasks you want to call."
)
print_info = False
# attr = "tag"

attr_list = config[attr]
if isinstance(attr_list, str):
attr_list = [attr_list]

for tag in attr_list:
if tag not in tasks_and_groups:
tasks_and_groups[tag] = {
"type": "tag",
"task": [task],
"yaml_path": -1,
}
elif tasks_and_groups[tag]["type"] != "tag":
self.logger.info(
f"The tag {tag} is already registered as a group, this tag will not be registered. "
"This may affect tasks you want to call."
)
break
else:
tasks_and_groups[tag]["task"].append(task)
break
else:
tasks_and_groups[tag]["task"].append(task)

# TODO: remove group in next release
print_info = True
Expand Down
Loading