diff --git a/open_flamingo/train/data.py b/open_flamingo/train/data.py index 828f75bb..9bbb27be 100644 --- a/open_flamingo/train/data.py +++ b/open_flamingo/train/data.py @@ -84,8 +84,6 @@ def preprocess_laion_text(sample, tokenizer, max_tokens=128): (f"{re.sub(CAPTION_BAN_PATTERN, '', s.split('<|synthetic caption|>')[-1].strip())}{tokenizer.eos_token}") for s in sample ] - print(sample[0]) - text = tokenizer( sample, max_length=max_tokens, @@ -528,4 +526,4 @@ def get_data(args, image_processor, tokenizer, dataset_type, epoch=0): args, image_processor=image_processor, epoch=epoch, tokenizer=tokenizer ) else: - raise ValueError(f"Unsupported dataset: {dataset_type}") \ No newline at end of file + raise ValueError(f"Unsupported dataset: {dataset_type}")