From 1e4c7869998f04df5edc57c78c2782dc04140a66 Mon Sep 17 00:00:00 2001 From: robin Date: Wed, 29 Jan 2025 13:57:53 -0500 Subject: [PATCH] modifying method to create scenariolist of images from a pdf --- edsl/scenarios/ScenarioListPdfMixin.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/edsl/scenarios/ScenarioListPdfMixin.py b/edsl/scenarios/ScenarioListPdfMixin.py index d1451f06b..0d68bbb0f 100644 --- a/edsl/scenarios/ScenarioListPdfMixin.py +++ b/edsl/scenarios/ScenarioListPdfMixin.py @@ -155,6 +155,8 @@ def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"): :param pdf_path: Path to the PDF file. :param image_format: Format of the output images (default is 'jpeg'). :return: ScenarioList instance containing the Scenario instances. + + The scenario list has keys "filepath", "page", "content". """ import tempfile from pdf2image import convert_from_path @@ -171,7 +173,13 @@ def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"): image_path = os.path.join(output_folder, f"page_{i+1}.{image_format}") image.save(image_path, image_format.upper()) - scenario = Scenario._from_filepath_image(image_path) + # scenario = Scenario._from_filepath_image(image_path) + from edsl import FileStore + scenario = Scenario({ + "filepath":image_path, + "page":i, + "content":FileStore(image_path) + }) scenarios.append(scenario) # print(f"Saved {len(images)} pages as images in {output_folder}")