Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Support for different CSV Encodings in Import_Scripts/Populate_Metadata.py #198

Open
wants to merge 16 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 61 additions & 12 deletions omero/import_scripts/Populate_Metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,33 @@
for additional features: https://pypi.org/project/omero-metadata/
"""

# Check if the populate_roi scripts was updated to include functionality for
# encodings other than utf-8.
# If yes, query all available encodings and set a flag
# If no, add information for the user

if "encoding" in DownloadingOriginalFileProvider.get_original_file_data.__code__.co_varnames:
import os
EncSup = True
AvailEncodings = []
for i in os.listdir(os.path.split(__import__("encodings").__file__)[0]):
name = os.path.splitext(i)[0]
try:
"".encode(name)
except:
pass
else:
AvailEncodings.append(name.replace("_", "-"))
else:
encoding = 'utf-8'
EncSup = False
DEPRECATED += """
Warning: This script is using an omero-py version without support
for different CSV encodings. All CSV files will be assumed to be
utf-8 encoded. If you need support for different encodings,
ask your administrator to update the installation.
"""


def link_file_ann(conn, object_type, object_id, file_ann_id):
"""Link File Annotation to the Object, if not already linked."""
Expand Down Expand Up @@ -108,6 +135,9 @@ def populate_metadata(client, conn, script_params):
object_id = object_ids[0]
data_type = script_params["Data_Type"]

if EncSup: # Only get from user if support for encoding is there
encoding = script_params["CSV Encoding"]

if data_type == "Image":
try:
from omero_metadata.populate import ImageWrapper # noqa: F401
Expand All @@ -120,7 +150,13 @@ def populate_metadata(client, conn, script_params):
original_file = get_original_file(
conn, data_type, object_id, file_ann_id)
provider = DownloadingOriginalFileProvider(conn)
data_for_preprocessing = provider.get_original_file_data(original_file)
try:
data_for_preprocessing = provider.get_original_file_data(original_file, encoding=encoding)
except ValueError as e:
raise ValueError("The CSV file provided could not be decoded using "
"the specified encoding. Please check the encoding "
"and contents of the file!") from e

temp_name = data_for_preprocessing.name
# 5.9.1 returns NamedTempFile where name is a string.
if isinstance(temp_name, int):
Expand Down Expand Up @@ -150,16 +186,8 @@ def populate_metadata(client, conn, script_params):
def run_script():

data_types = [rstring(otype) for otype in OBJECT_TYPES]
client = scripts.client(
'Populate_Metadata.py',
"""
This script processes a CSV file, using it to
'populate' an OMERO.table, with one row per Image, Well or ROI.
The table data can then be displayed in the OMERO clients.
For full details of the supported CSV format, see
https://github.com/ome/omero-metadata/#populate
""" + DEPRECATED,
scripts.String(

fields = [scripts.String(
"Data_Type", optional=False, grouping="1",
description="Choose source of images",
values=data_types, default=OBJECT_TYPES[0]),
Expand All @@ -171,8 +199,29 @@ def run_script():
scripts.String(
"File_Annotation", grouping="3",
description="File Annotation ID containing metadata to populate. "
"Note this is not the same as the File ID."),
"Note this is not the same as the File ID.")]

# Add Encoding field if support for encodings
if EncSup:
fields.append(scripts.String(
"CSV Encoding", grouping="4",
description="""Encoding of the CSV File provided. Can depend on
your system locale as well as the program used to generate the
CSV File. E.g. Excel defaults to machine specific ANSI encoding
during export to CSV (i.e. cp1252 on US machines,
iso-8859-1 on german machines ...).""",
values=AvailEncodings, default="utf-8"))

client = scripts.client(
'Populate_Metadata.py',
"""
This script processes a CSV file, using it to
'populate' an OMERO.table, with one row per Image, Well or ROI.
The table data can then be displayed in the OMERO clients.
For full details of the supported CSV format, see
https://github.com/ome/omero-metadata/#populate
""" + DEPRECATED,
*fields,
authors=["Emil Rozbicki", "OME Team"],
institutions=["Glencoe Software Inc."],
contact="[email protected]",
Expand Down
76 changes: 76 additions & 0 deletions test/integration/test_import_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,79 @@ def test_populate_metadata_for_screen(self):
assert message is not None
assert message.getValue().startswith('Table data populated')
conn.close()

def test_populate_metadata_for_encodings(self):
sid = super(TestImportScripts, self).get_script(populate_metadata)
assert sid > 0
import os
from omero.util.populate_roi import DownloadingOriginalFileProvider

# Skip test if the omero-py version does not support encodings
if "encoding" in DownloadingOriginalFileProvider.get_original_file_data.__code__.co_varnames:
print("Skipping test of populate_metadata.py for encodings"
"as omero-py version does not support it!")
return

AvailEncodings = []
for i in os.listdir(os.path.split(__import__("encodings").__file__)[0]):
name = os.path.splitext(i)[0]
try:
"".encode(name)
except:
pass
else:
AvailEncodings.append(name.replace("_", "-"))

client, user = self.new_client_and_user()
conn = BlitzGateway(client_obj=client)
update_service = client.getSession().getUpdateService()

for enc in AvailEncodings:
plates = self.import_plates(client, plate_cols=3, plate_rows=1)
plate = plates[0]
name = plate.name.val
screen = omero.model.ScreenI()
screen.name = omero.rtypes.rstring("test_for_%s" % (enc))
spl = omero.model.ScreenPlateLinkI()
spl.setParent(screen)
spl.setChild(plate)
spl = update_service.saveAndReturnObject(spl)
screen_id = spl.getParent().id.val
assert screen_id > 0
assert spl.getChild().id.val == plate.id.val
cvs_file = create_path("test_cp1252", ".csv")
# create a file annotation.
try:
with open(cvs_file.abspath(), 'wb+') as f:
f.write("Well, Plate, Well Type, Facility-Salt-Batch-ID, Comment,\n".encode(enc))
f.write(("A01, %s, Treatment, FOOL10041-101-2, TestString containing greek µ\n" % name).encode(enc))
f.write(("A02, %s, Control, FOOL10041-101-2, TestString containing symbol ±\n" % name).encode(enc))
f.write(("A03, %s, Treatment, FOOL10041-101-2,TestString containing special character §\n" % name).encode(enc))
except UnicodeError: # Skip if test strings are not supported
next
fa = conn.createFileAnnfromLocalFile(cvs_file, mimetype="text/csv")
assert fa is not None
assert fa.id > 0
link = omero.model.ScreenAnnotationLinkI()
link.setParent(omero.model.ScreenI(screen_id, False))
link.setChild(omero.model.FileAnnotationI(fa.id, False))
link = update_service.saveAndReturnObject(link)
assert link.id.val > 0
# run the script
screen_ids = []
screen_ids.append(spl.getParent().id)

args = {
"Data_Type": omero.rtypes.rstring("Screen"),
"IDs": omero.rtypes.rlist(screen_ids),
"File_Annotation": omero.rtypes.rstring(str(fa.id)),
"CSV Encoding": omero.rtypes.rstring(str(enc))
}
message = None
try:
message = run_script(client, sid, args, "Message")
assert message is not None
assert message.getValue().startswith('Table data populated')
except ValueError as e:
assert str(e).startswith('The CSV file provided could')
conn.close()