updating docs

expectedparrot · Feb 5, 2025 · 5c94407 · 5c94407
2 parents 9541d96 + 9db1bc9
commit 5c94407
Show file tree

Hide file tree

Showing 9 changed files with 698 additions and 8 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,8 +2,11 @@
 
 ## [0.1.43] - TBD
 ### Added
+- Method `ScenarioList._from_pdf_to_image(<filename>)` generates a scenario for each page of a pdf converted into a jpeg (to use as an image instead of converting to text).
+
 ### Changed
 ### Fixed
+- A bug preventing iterations on remote inference.
 
 
 ## [0.1.42] - 2025-01-24

diff --git a/docs/requirements.txt b/docs/requirements.txt
diff --git a/edsl/agents/Invigilator.py b/edsl/agents/Invigilator.py
@@ -156,7 +156,7 @@ def _extract_edsl_result_entry_and_validate(
                         self.question.question_options = new_question_options
 
                 question_with_validators = self.question.render(
-                    self.scenario | prior_answers_dict
+                    self.scenario | prior_answers_dict | {'agent':self.agent.traits}
                 )
                 question_with_validators.use_code = self.question.use_code
             else:

diff --git a/edsl/coop/coop.py b/edsl/coop/coop.py
@@ -549,6 +549,7 @@ def remote_cache_create_many(
     def remote_cache_get(
         self,
         exclude_keys: Optional[list[str]] = None,
+        select_keys: Optional[list[str]] = None,
     ) -> list[CacheEntry]:
         """
         Get all remote cache entries.
@@ -560,10 +561,12 @@ def remote_cache_get(
         """
         if exclude_keys is None:
             exclude_keys = []
+        if select_keys is None:
+            select_keys = []
         response = self._send_server_request(
             uri="api/v0/remote-cache/get-many",
             method="POST",
-            payload={"keys": exclude_keys},
+            payload={"keys": exclude_keys, "selected_keys": select_keys},
             timeout=40,
         )
         self._resolve_server_response(response)

diff --git a/edsl/questions/question_base_gen_mixin.py b/edsl/questions/question_base_gen_mixin.py
@@ -114,6 +114,7 @@ def render_string(value: str) -> str:
                         .render(strings_only_replacement_dict)
                     )
                 except Exception as e:
+                    #breakpoint()
                     import warnings
 
                     warnings.warn("Failed to render string: " + value)

diff --git a/edsl/scenarios/Scenario.py b/edsl/scenarios/Scenario.py
@@ -361,6 +361,39 @@ def from_pdf(cls, pdf_path: str):
         extractor = PdfExtractor(pdf_path)
         return Scenario(extractor.get_pdf_dict())
 
+    @classmethod
+    def from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
+        """
+        Convert each page of a PDF into an image and create key/value for it.
+
+        :param pdf_path: Path to the PDF file.
+        :param image_format: Format of the output images (default is 'jpeg').
+        :return: ScenarioList instance containing the Scenario instances.
+
+        The scenario has a key "filepath" and one or more keys "page_{i}" for each page.
+        """
+        import tempfile
+        from pdf2image import convert_from_path
+        from edsl.scenarios import Scenario
+
+        with tempfile.TemporaryDirectory() as output_folder:
+            # Convert PDF to images
+            images = convert_from_path(pdf_path)
+
+            scenario_dict = {"filepath":pdf_path}
+
+            # Save each page as an image and create Scenario instances
+            for i, image in enumerate(images):
+                image_path = os.path.join(output_folder, f"page_{i}.{image_format}")
+                image.save(image_path, image_format.upper())
+
+                from edsl import FileStore
+                scenario_dict[f"page_{i}"] = FileStore(image_path)
+
+            scenario = Scenario(scenario_dict)
+
+            return cls(scenario)
+
     @classmethod
     def from_docx(cls, docx_path: str) -> "Scenario":
         """Creates a scenario from the text of a docx file.

diff --git a/edsl/scenarios/ScenarioList.py b/edsl/scenarios/ScenarioList.py
@@ -1135,7 +1135,7 @@ def from_excel(
         return cls(observations)
 
     @classmethod
-    def from_google_sheet(cls, url: str, sheet_name: str = None) -> ScenarioList:
+    def from_google_sheet(cls, url: str, sheet_name: str = None, column_names: Optional[List[str]]= None) -> ScenarioList:
         """Create a ScenarioList from a Google Sheet.
 
         This method downloads the Google Sheet as an Excel file, saves it to a temporary file,
@@ -1145,6 +1145,8 @@ def from_google_sheet(cls, url: str, sheet_name: str = None) -> ScenarioList:
             url (str): The URL to the Google Sheet.
             sheet_name (str, optional): The name of the sheet to load. If None, the method will behave
                                         the same as from_excel regarding multiple sheets.
+            column_names (List[str], optional): If provided, use these names for the columns instead
+                                              of the default column names from the sheet.
 
         Returns:
             ScenarioList: An instance of the ScenarioList class.
@@ -1172,8 +1174,25 @@ def from_google_sheet(cls, url: str, sheet_name: str = None) -> ScenarioList:
             temp_file.write(response.content)
             temp_filename = temp_file.name
 
-        # Call the from_excel class method with the temporary file
-        return cls.from_excel(temp_filename, sheet_name=sheet_name)
+        # First create the ScenarioList with default column names
+        scenario_list = cls.from_excel(temp_filename, sheet_name=sheet_name)
+
+        # If column_names is provided, create a new ScenarioList with the specified names
+        if column_names is not None:
+            if len(column_names) != len(scenario_list[0].keys()):
+                raise ValueError(
+                    f"Number of provided column names ({len(column_names)}) "
+                    f"does not match number of columns in sheet ({len(scenario_list[0].keys())})"
+                )
+
+            # Create a codebook mapping original keys to new names
+            original_keys = list(scenario_list[0].keys())
+            codebook = dict(zip(original_keys, column_names))
+
+            # Return new ScenarioList with renamed columns
+            return scenario_list.rename(codebook)
+        else:
+            return scenario_list
 
     @classmethod
     def from_delimited_file(

diff --git a/edsl/scenarios/ScenarioListPdfMixin.py b/edsl/scenarios/ScenarioListPdfMixin.py
@@ -148,7 +148,7 @@ def is_url(string):
             return False
 
     @classmethod
-    def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
+    def from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
         """
         Convert each page of a PDF into an image and create Scenario instances.
 
@@ -173,7 +173,6 @@ def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
                 image_path = os.path.join(output_folder, f"page_{i+1}.{image_format}")
                 image.save(image_path, image_format.upper())
 
-                # scenario = Scenario._from_filepath_image(image_path)
                 from edsl import FileStore
                 scenario = Scenario({
                     "filepath":image_path,
@@ -182,7 +181,6 @@ def _from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
                     })
                 scenarios.append(scenario)
 
-            # print(f"Saved {len(images)} pages as images in {output_folder}")
             return cls(scenarios)
 
     @staticmethod

diff --git a/test_question_base_gen_mixin.py b/test_question_base_gen_mixin.py
@@ -0,0 +1,71 @@
+import pytest
+from unittest.mock import patch
+from edsl import QuestionFreeText, QuestionMultipleChoice, ScenarioList, Agent, Model
+
+
+class TestQuestionBaseGenMixin:
+    def test_copy(self):
+        """Test that the copy method returns a deep copy."""
+        q = QuestionFreeText(question_name="color", question_text="What is your favorite color?")
+        q_copy = q.copy()
+
+        assert q_copy is not q  # Ensure it's a new object
+        assert q_copy.question_name == q.question_name
+        assert q_copy.question_text == q.question_text
+
+    def test_option_permutations(self):
+        """Test that option_permutations generates correct permutations."""
+        q = QuestionMultipleChoice(question_name="fruit", question_text="Pick a fruit", question_options=["Apple", "Banana", "Cherry"])
+        permutations = q.option_permutations()
+
+        assert len(permutations) == 6  # 3! = 6 permutations
+        assert all(len(p.question_options) == len(q.question_options) for p in permutations)
+
+    def test_draw(self):
+        """Test that draw returns a new question with shuffled options."""
+        q = QuestionMultipleChoice(question_name="drink", question_text="Pick a drink", question_options=["Tea", "Coffee", "Juice"])
+
+        with patch("random.sample", return_value=["Juice", "Tea", "Coffee"]):
+            drawn = q.draw()
+
+        assert drawn is not q
+        assert set(drawn.question_options) == set(q.question_options)
+        assert drawn.question_options == ["Juice", "Tea", "Coffee"]
+
+    def test_loop(self):
+        """Test that loop creates correctly named questions based on scenarios."""
+        q = QuestionFreeText(question_name="base_{{subject}}", question_text="What are your thoughts on: {{subject}}?")
+        scenarios = ScenarioList.from_list("subject", ["Math", "Economics", "Chemistry"])
+        looped_questions = q.loop(scenarios)
+
+        assert len(looped_questions) == 3
+        assert looped_questions[0].question_name == "base_Math"
+        assert looped_questions[1].question_name == "base_Economics"
+        assert looped_questions[2].question_name == "base_Chemistry"
+
+    def test_render(self):
+        """Test that render correctly replaces variables in text."""
+        m = Model("test")
+        a = Agent(traits = {"hair_color":"red"})
+        q = QuestionFreeText(question_name = "test", question_text = "How do you say '{{ agent.hair_color }}' in German?")
+        rendered_q = q.render({"agent.hair_color": "red"})
+
+        assert rendered_q.question_text == "How do you say 'red' in German?"
+
+        rendered_q.run(disable_remote_inference=True, stop_on_exception=True)
+
+
+    def test_apply_function(self):
+        """Test that apply_function transforms question fields correctly."""
+        q = QuestionFreeText(question_name="color", question_text="What is your favorite color?")
+        upper_case_func = lambda x: x.upper()
+
+        transformed_q = q.apply_function(upper_case_func)
+
+        assert transformed_q.question_text == "WHAT IS YOUR FAVORITE COLOR?"
+        assert transformed_q.question_name == "color"  # Should remain unchanged
+
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()