ome · JulianHn · Mar 28, 2022 · Mar 28, 2022 · Jun 2, 2022 · Jun 2, 2022
diff --git a/omero/import_scripts/Populate_Metadata.py b/omero/import_scripts/Populate_Metadata.py
@@ -58,6 +58,33 @@
     for additional features: https://pypi.org/project/omero-metadata/
     """
 
+# Check if the populate_roi scripts was updated to include functionality for
+# encodings other than utf-8.
+# If yes, query all available encodings and set a flag
+# If no, add information for the user
+
+if "encoding" in DownloadingOriginalFileProvider.get_original_file_data.__code__.co_varnames:
+    import os
+    EncSup = True
+    AvailEncodings = []
+    for i in os.listdir(os.path.split(__import__("encodings").__file__)[0]):
+        name = os.path.splitext(i)[0]
+        try:
+            "".encode(name)
+        except:
+            pass
+        else:
+            AvailEncodings.append(name.replace("_", "-"))
+else:
+    encoding = 'utf-8'
+    EncSup = False
+    DEPRECATED += """
+    Warning: This script is using an omero-py version without support
+    for different CSV encodings. All CSV files will be assumed to be
+    utf-8 encoded. If you need support for different encodings,
+    ask your administrator to update the installation.
+    """
+
 
 def link_file_ann(conn, object_type, object_id, file_ann_id):
     """Link File Annotation to the Object, if not already linked."""
@@ -108,6 +135,9 @@ def populate_metadata(client, conn, script_params):
     object_id = object_ids[0]
     data_type = script_params["Data_Type"]
 
+    if EncSup:  # Only get from user if support for encoding is there
+        encoding = script_params["CSV Encoding"]
+
     if data_type == "Image":
         try:
             from omero_metadata.populate import ImageWrapper    # noqa: F401
@@ -120,7 +150,13 @@ def populate_metadata(client, conn, script_params):
     original_file = get_original_file(
         conn, data_type, object_id, file_ann_id)
     provider = DownloadingOriginalFileProvider(conn)
-    data_for_preprocessing = provider.get_original_file_data(original_file)
+    try:
+        data_for_preprocessing = provider.get_original_file_data(original_file, encoding=encoding)
+    except ValueError as e:
+        raise ValueError("The CSV file provided could not be decoded using "
+                         "the specified encoding. Please check the encoding "
+                         "and contents of the file!") from e
+
     temp_name = data_for_preprocessing.name
     # 5.9.1 returns NamedTempFile where name is a string.
     if isinstance(temp_name, int):
@@ -150,16 +186,8 @@ def populate_metadata(client, conn, script_params):
 def run_script():
 
     data_types = [rstring(otype) for otype in OBJECT_TYPES]
-    client = scripts.client(
-        'Populate_Metadata.py',
-        """
-    This script processes a CSV file, using it to
-    'populate' an OMERO.table, with one row per Image, Well or ROI.
-    The table data can then be displayed in the OMERO clients.
-    For full details of the supported CSV format, see
-    https://github.com/ome/omero-metadata/#populate
-        """ + DEPRECATED,
-        scripts.String(
+
+    fields = [scripts.String(
             "Data_Type", optional=False, grouping="1",
             description="Choose source of images",
             values=data_types, default=OBJECT_TYPES[0]),
@@ -171,8 +199,29 @@ def run_script():
         scripts.String(
             "File_Annotation", grouping="3",
             description="File Annotation ID containing metadata to populate. "
-            "Note this is not the same as the File ID."),
+            "Note this is not the same as the File ID.")]
+
+    # Add Encoding field if support for encodings
+    if EncSup:
+        fields.append(scripts.String(
+            "CSV Encoding", grouping="4",
+            description="""Encoding of the CSV File provided. Can depend on
+            your system locale as well as the program used to generate the
+            CSV File. E.g. Excel defaults to machine specific ANSI encoding
+            during export to CSV (i.e. cp1252 on US machines,
+            iso-8859-1 on german machines ...).""",
+            values=AvailEncodings, default="utf-8"))
 
+    client = scripts.client(
+        'Populate_Metadata.py',
+        """
+    This script processes a CSV file, using it to
+    'populate' an OMERO.table, with one row per Image, Well or ROI.
+    The table data can then be displayed in the OMERO clients.
+    For full details of the supported CSV format, see
+    https://github.com/ome/omero-metadata/#populate
+        """ + DEPRECATED,
+        *fields,
         authors=["Emil Rozbicki", "OME Team"],
         institutions=["Glencoe Software Inc."],
         contact="[email protected]",

diff --git a/test/integration/test_import_scripts.py b/test/integration/test_import_scripts.py
@@ -124,3 +124,79 @@ def test_populate_metadata_for_screen(self):
         assert message is not None
         assert message.getValue().startswith('Table data populated')
         conn.close()
+
+    def test_populate_metadata_for_encodings(self):
+        sid = super(TestImportScripts, self).get_script(populate_metadata)
+        assert sid > 0
+        import os
+        from omero.util.populate_roi import DownloadingOriginalFileProvider
+
+        # Skip test if the omero-py version does not support encodings
+        if "encoding" in DownloadingOriginalFileProvider.get_original_file_data.__code__.co_varnames:
+            print("Skipping test of populate_metadata.py for encodings"
+                  "as omero-py version does not support it!")
+            return
+
+        AvailEncodings = []
+        for i in os.listdir(os.path.split(__import__("encodings").__file__)[0]):
+            name = os.path.splitext(i)[0]
+            try:
+                "".encode(name)
+            except:
+                pass
+            else:
+                AvailEncodings.append(name.replace("_", "-"))
+
+        client, user = self.new_client_and_user()
+        conn = BlitzGateway(client_obj=client)
+        update_service = client.getSession().getUpdateService()
+
+        for enc in AvailEncodings:
+            plates = self.import_plates(client, plate_cols=3, plate_rows=1)
+            plate = plates[0]
+            name = plate.name.val
+            screen = omero.model.ScreenI()
+            screen.name = omero.rtypes.rstring("test_for_%s" % (enc))
+            spl = omero.model.ScreenPlateLinkI()
+            spl.setParent(screen)
+            spl.setChild(plate)
+            spl = update_service.saveAndReturnObject(spl)
+            screen_id = spl.getParent().id.val
+            assert screen_id > 0
+            assert spl.getChild().id.val == plate.id.val
+            cvs_file = create_path("test_cp1252", ".csv")
+            # create a file annotation.
+            try:
+                with open(cvs_file.abspath(), 'wb+') as f:
+                    f.write("Well, Plate, Well Type, Facility-Salt-Batch-ID, Comment,\n".encode(enc))
+                    f.write(("A01, %s, Treatment, FOOL10041-101-2, TestString containing greek µ\n" % name).encode(enc))
+                    f.write(("A02, %s, Control, FOOL10041-101-2, TestString containing symbol ±\n" % name).encode(enc))
+                    f.write(("A03, %s, Treatment, FOOL10041-101-2,TestString containing special character §\n" % name).encode(enc))
+            except UnicodeError:  # Skip if test strings are not supported
+                next
+            fa = conn.createFileAnnfromLocalFile(cvs_file, mimetype="text/csv")
+            assert fa is not None
+            assert fa.id > 0
+            link = omero.model.ScreenAnnotationLinkI()
+            link.setParent(omero.model.ScreenI(screen_id, False))
+            link.setChild(omero.model.FileAnnotationI(fa.id, False))
+            link = update_service.saveAndReturnObject(link)
+            assert link.id.val > 0
+            # run the script
+            screen_ids = []
+            screen_ids.append(spl.getParent().id)
+
+            args = {
+                "Data_Type": omero.rtypes.rstring("Screen"),
+                "IDs": omero.rtypes.rlist(screen_ids),
+                "File_Annotation": omero.rtypes.rstring(str(fa.id)),
+                "CSV Encoding": omero.rtypes.rstring(str(enc))
+            }
+            message = None
+            try:
+                message = run_script(client, sid, args, "Message")
+                assert message is not None
+                assert message.getValue().startswith('Table data populated')
+            except ValueError as e:
+                assert str(e).startswith('The CSV file provided could')
+        conn.close()