ome · JulianHn · Mar 28, 2022 · Mar 28, 2022 · Jun 2, 2022 · Jun 2, 2022
diff --git a/omero/import_scripts/Populate_Metadata.py b/omero/import_scripts/Populate_Metadata.py
@@ -107,7 +107,7 @@ def populate_metadata(client, conn, script_params):
     object_ids = script_params["IDs"]
     object_id = object_ids[0]
     data_type = script_params["Data_Type"]
-
+    encoding = script_params["CSV Encoding"]
     if data_type == "Image":
         try:
             from omero_metadata.populate import ImageWrapper    # noqa: F401
@@ -120,7 +120,7 @@ def populate_metadata(client, conn, script_params):
     original_file = get_original_file(
         conn, data_type, object_id, file_ann_id)
     provider = DownloadingOriginalFileProvider(conn)
-    data_for_preprocessing = provider.get_original_file_data(original_file)
+    data_for_preprocessing = provider.get_original_file_data(original_file, encoding=encoding)
     temp_name = data_for_preprocessing.name
     # 5.9.1 returns NamedTempFile where name is a string.
     if isinstance(temp_name, int):
@@ -172,6 +172,13 @@ def run_script():
             "File_Annotation", grouping="3",
             description="File Annotation ID containing metadata to populate. "
             "Note this is not the same as the File ID."),
+
+        scripts.String(
+            "CSV Encoding", grouping="4",
+            description="""Encoding of the CSV File provided. Can depend on your system locale 
+            as well as the program used to generate the CSV File. E.g. Excel defaults to machine specific
+            ANSI encoding during export to CSV (i.e. cp1252 on US machines, iso-8859-1 on german machines ...).""",
+            default="utf-8"),
 
         authors=["Emil Rozbicki", "OME Team"],
         institutions=["Glencoe Software Inc."],

diff --git a/test/integration/test_import_scripts.py b/test/integration/test_import_scripts.py
@@ -124,3 +124,107 @@ def test_populate_metadata_for_screen(self):
         assert message is not None
         assert message.getValue().startswith('Table data populated')
         conn.close()
+
+    def test_populate_metadata_for_cp1252(self):
+        sid = super(TestImportScripts, self).get_script(populate_metadata)
+        assert sid > 0
+
+        client, user = self.new_client_and_user()
+        update_service = client.getSession().getUpdateService()
+        plates = self.import_plates(client, plate_cols=3, plate_rows=1)
+        plate = plates[0]
+        name = plate.name.val
+        screen = omero.model.ScreenI()
+        screen.name = omero.rtypes.rstring("test_for_cp1252")
+        spl = omero.model.ScreenPlateLinkI()
+        spl.setParent(screen)
+        spl.setChild(plate)
+        spl = update_service.saveAndReturnObject(spl)
+        screen_id = spl.getParent().id.val
+        assert screen_id > 0
+        assert spl.getChild().id.val == plate.id.val
+
+        cvs_file = create_path("test_cp1252", ".csv")
+
+        # create a file annotation
+        with open(cvs_file.abspath(), 'wb+') as f:
+            f.write("Well,Plate, Well Type, Facility-Salt-Batch-ID\n".encode("cp1252"))
+            f.write(("A01,%s,Treatment,FOOL10041-101-2\n" % name).encode("cp1252"))
+            f.write(("A02,%s,Control,\n" % name).encode("cp1252"))
+            f.write(("A03,%s,Treatment,FOOL10041-101-2\n" % name).encode("cp1252"))
+
+        conn = BlitzGateway(client_obj=client)
+        fa = conn.createFileAnnfromLocalFile(cvs_file, mimetype="text/csv")
+        assert fa is not None
+        assert fa.id > 0
+        link = omero.model.ScreenAnnotationLinkI()
+        link.setParent(omero.model.ScreenI(screen_id, False))
+        link.setChild(omero.model.FileAnnotationI(fa.id, False))
+        link = update_service.saveAndReturnObject(link)
+        assert link.id.val > 0
+        # run the script
+        screen_ids = []
+        screen_ids.append(spl.getParent().id)
+
+        args = {
+            "Data_Type": omero.rtypes.rstring("Screen"),
+            "IDs": omero.rtypes.rlist(screen_ids),
+            "File_Annotation": omero.rtypes.rstring(str(fa.id)),
+            "CSV Encoding": omero.rtypes.rstring(str("cp1252"))
+        }
+        message = run_script(client, sid, args, "Message")
+        assert message is not None
+        assert message.getValue().startswith('Table data populated')
+        conn.close()
+
+    def test_populate_metadata_for_iso8859(self):
+        sid = super(TestImportScripts, self).get_script(populate_metadata)
+        assert sid > 0
+
+        client, user = self.new_client_and_user()
+        update_service = client.getSession().getUpdateService()
+        plates = self.import_plates(client, plate_cols=3, plate_rows=1)
+        plate = plates[0]
+        name = plate.name.val
+        screen = omero.model.ScreenI()
+        screen.name = omero.rtypes.rstring("test_for_iso8859")
+        spl = omero.model.ScreenPlateLinkI()
+        spl.setParent(screen)
+        spl.setChild(plate)
+        spl = update_service.saveAndReturnObject(spl)
+        screen_id = spl.getParent().id.val
+        assert screen_id > 0
+        assert spl.getChild().id.val == plate.id.val
+
+        cvs_file = create_path("test_iso8859", ".csv")
+
+        # create a file annotation
+        with open(cvs_file.abspath(), 'wb+') as f:
+            f.write("Well,Plate, Well Type, Facility-Salt-Batch-ID\n".encode("iso-8859-1"))
+            f.write(("A01,%s,Treatment,FOOL10041-101-2\n" % name).encode("iso-8859-1"))
+            f.write(("A02,%s,Control,\n" % name).encode("iso-8859-1"))
+            f.write(("A03,%s,Treatment,FOOL10041-101-2\n" % name).encode("iso-8859-1"))
+
+        conn = BlitzGateway(client_obj=client)
+        fa = conn.createFileAnnfromLocalFile(cvs_file, mimetype="text/csv")
+        assert fa is not None
+        assert fa.id > 0
+        link = omero.model.ScreenAnnotationLinkI()
+        link.setParent(omero.model.ScreenI(screen_id, False))
+        link.setChild(omero.model.FileAnnotationI(fa.id, False))
+        link = update_service.saveAndReturnObject(link)
+        assert link.id.val > 0
+        # run the script
+        screen_ids = []
+        screen_ids.append(spl.getParent().id)
+
+        args = {
+            "Data_Type": omero.rtypes.rstring("Screen"),
+            "IDs": omero.rtypes.rlist(screen_ids),
+            "File_Annotation": omero.rtypes.rstring(str(fa.id)),
+            "CSV Encoding": omero.rtypes.rstring(str("iso-8859-1"))
+        }
+        message = run_script(client, sid, args, "Message")
+        assert message is not None
+        assert message.getValue().startswith('Table data populated')
+        conn.close()