-
Notifications
You must be signed in to change notification settings - Fork 1
/
streamlit_app.py
257 lines (204 loc) · 10.8 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
import geopandas
import pandas
import numpy
import tifffile
import shapely
import streamlit as st
from lmd.lib import SegmentationLoader
from lmd.lib import Collection, Shape
from lmd import tools
from PIL import Image
from pathlib import Path
import ast
import string
from loguru import logger
import sys
logger.remove()
logger.add(sys.stdout, format="<green>{time:HH:mm:ss.SS}</green> | <level>{level}</level> | {message}")
st.title("Convert a GeoJSON polygons to xml for Laser Microdissection, 26.04.2024")
st.subheader("From Jose Nimo, PhD at AG Coscia in the Max Delbrueck Center for Molecular Medicine in Berlin")
st.divider()
uploaded_file = st.file_uploader("Choose a file", accept_multiple_files=False)
def load_and_QC_geojson_file(geojson_path: str, list_of_calibpoint_names: list = ['calib1','calib2','calib3']):
"""
This function loads a geojson file and checks for common issues that might arise when converting it to xml for LMD
Parameters:
geojson_path (str): path to the geojson file
list_of_calibpoint_names (list): list of calibration point names in the geojson file
Returns:
None
Latest update: 26.04.2024 by Jose Nimo
"""
#load geojson file
df = geopandas.read_file(geojson_path)
logger.info(f"Geojson file loaded with shape {df.shape}")
try:
df['annotation_name'] = df['name']
except:
logger.warning('No name column found, meaning no annotation in Qupath was named, at least calibration points should be named')
st.stop()
geometry_counts = df.geometry.geom_type.value_counts()
log_message = ", ".join(f"{count} {geom_type}s" for geom_type, count in geometry_counts.items())
logger.info(f"Geometries in DataFrame: {log_message}")
st.write(f"Geometries in DataFrame: {log_message}")
#save calib points in a list
caliblist = []
for point_name in list_of_calibpoint_names:
if point_name in df['annotation_name'].unique():
caliblist.append(df.loc[df['annotation_name'] == point_name, 'geometry'].values[0])
else:
logger.error(f'Your given annotation_name {point_name} is not present in the file')
logger.error(f'These are the calib points you passed: {list_of_calibpoint_names}')
logger.error(f"These are the calib points found in the geojson you gave me: {df[df['geometry'].geom_type == 'Point']['annotation_name']}")
st.write('Your given annotation_name is not present in the file \n',
f'These are the calib points you passed: {list_of_calibpoint_names} \n',
f"These are the calib points found in the geojson you gave me: ")
st.table(df[df['geometry'].geom_type == 'Point']['annotation_name'])
#remove points
df = df[df['geometry'].apply(lambda geom: not isinstance(geom, shapely.geometry.Point))]
logger.info(f"Point geometries have been removed")
#check and remove empty classifications
if df['classification'].isna().sum() !=0 :
st.write(f"you have {df['classification'].isna().sum()} NaNs in your classification column",
"these are unclassified objects from Qupath, they will be ignored")
df = df[df['classification'].notna()]
#get classification name from inside geometry properties
df['classification_name'] = df['classification'].apply(lambda x: x.get('name'))
#create coordenate list
listarray = []
for point in caliblist:
listarray.append([point.x, point.y])
calib_np_array = numpy.array(listarray)
#now that calibration points are saved, remove them from the dataframe
df = df[df['annotation_name'].isin(list_of_calibpoint_names) == False]
#check for MultiPolygon objects
if 'MultiPolygon' in df.geometry.geom_type.value_counts().keys():
st.write('MultiPolygon objects present: \n')
st.table(df[df.geometry.geom_type == 'MultiPolygon'][['annotation_name','classification_name']])
st.write('these are not supported, please convert them to polygons in Qupath \n',
'the script will continue but these objects will be ignored')
df = df[df.geometry.geom_type != 'MultiPolygon']
def extract_coordinates(geometry):
if geometry.geom_type == 'Polygon':
return [list(coord) for coord in geometry.exterior.coords]
elif geometry.geom_type == 'LineString':
return [list(coord) for coord in geometry.coords]
else:
st.write(f'Geometry type {geometry.geom_type} not supported, please convert to Polygon or LineString in Qupath')
st.stop()
df['coords'] = df.geometry.simplify(1).apply(extract_coordinates)
st.success('The file QC is complete')
calibration_point_1 = st.text_input("Enter the name of the first calibration point: ", placeholder ="calib1")
calibration_point_2 = st.text_input("Enter the name of the second calibration point: ", placeholder ="calib2")
calibration_point_3 = st.text_input("Enter the name of the third calibration point: ", placeholder ="calib3")
list_of_calibpoint_names = [calibration_point_1, calibration_point_2, calibration_point_3]
if st.button("Load and check the geojson file"):
if uploaded_file is not None:
load_and_QC_geojson_file(geojson_path=uploaded_file, list_of_calibpoint_names=list_of_calibpoint_names)
else:
st.warning("Please upload a file first.")
samples_and_wells_input = st.text_area("Enter the desired samples and wells scheme")
def load_and_QC_SamplesandWells(samples_and_wells_input, geojson_path, list_of_calibpoint_names):
df = geopandas.read_file(geojson_path)
df = df[df['name'].isin(list_of_calibpoint_names) == False]
df = df[df['classification'].notna()]
df = df[df.geometry.geom_type != 'MultiPolygon']
df['Name'] = numpy.nan
for i in df.index:
tmp = df.classification[i].get('name')
df.at[i,'Name'] = tmp
# parse common human copy paste formats
# remove newlines
samples_and_wells_processed = samples_and_wells_input.replace("\n", "")
# remove spaces
samples_and_wells_processed = samples_and_wells_processed.replace(" ", "")
# parse into python dictionary
samples_and_wells = ast.literal_eval(samples_and_wells_processed)
#create list of acceptable wells, default is using a space in between columns
list_of_acceptable_wells =[]
#Allowing columns 2->22, rows B->N
for row in list(string.ascii_uppercase[1:14]):
for column in range(2,23):
list_of_acceptable_wells.append(str(row) + str(column))
#check for improper wells
for well in samples_and_wells.values():
if well not in list_of_acceptable_wells:
st.write(f'Your well {well} is not in the list of acceptable wells for 384 well plate, please correct it',
'ask an expert if unsure, the script will continue')
# st.stop(), let users know, but don't stop the script
df['Name'] = numpy.nan
for i in df.index:
tmp = df.classification[i].get('name')
df.at[i,'Name'] = tmp
#check that names in df are all present in the samples and wells
for name in df.Name.unique():
if name not in samples_and_wells.keys():
st.write(f'Your name {name} is not in the list of samples_and_wells, please correct either',
'please change the class name in Qupath or add it to the samples_and_wells dictionary',
'and then rerun the web app')
st.stop()
st.success('The samples and wells scheme QC is done!')
if st.button("Check the samples and wells"):
samples_and_wells = load_and_QC_SamplesandWells(samples_and_wells_input=samples_and_wells_input,
geojson_path=uploaded_file,
list_of_calibpoint_names=list_of_calibpoint_names)
def create_collection(geojson_path, list_of_calibpoint_names, samples_and_wells_input ):
###### dataframe processing ######
df = geopandas.read_file(geojson_path)
caliblist = []
for point_name in list_of_calibpoint_names:
if point_name in df['name'].unique():
caliblist.append(df.loc[df['name'] == point_name, 'geometry'].values[0])
else:
st.stop('Your given name is not present in the file')
listarray = []
for point in caliblist:
listarray.append([point.x, point.y])
calib_np_array = numpy.array(listarray)
df = df[df['name'].isin(list_of_calibpoint_names) == False]
df = df[df['classification'].notna()]
df = df[df.geometry.geom_type != 'MultiPolygon']
def extract_coordinates(geometry):
if geometry.geom_type == 'Polygon':
return [list(coord) for coord in geometry.exterior.coords]
elif geometry.geom_type == 'LineString':
return [list(coord) for coord in geometry.coords]
df['coords'] = df.geometry.simplify(1).apply(extract_coordinates)
df['Name'] = numpy.nan
for i in df.index:
tmp = df.classification[i].get('name')
df.at[i,'Name'] = tmp
###### samples and wells processing ######
samples_and_wells_processed = samples_and_wells_input.replace("\n", "")
samples_and_wells_processed = samples_and_wells_processed.replace(" ", "")
samples_and_wells = ast.literal_eval(samples_and_wells_processed)
###### create the collection ######
the_collection = Collection(calibration_points = calib_np_array)
the_collection.orientation_transform = numpy.array([[1,0 ], [0,-1]])
for i in df.index:
the_collection.new_shape(df.at[i,'coords'], well = samples_and_wells[df.at[i, "Name"]])
the_collection.plot(save_name= "./TheCollection.png")
st.image("./TheCollection.png", caption='Your Contours', use_column_width=True)
st.write(the_collection.stats())
the_collection.save(f'./{uploaded_file.name.replace("geojson", "xml")}')
#create and export dataframe with sample placement in 384 well plate
rows_A_P= [i for i in string.ascii_uppercase[:16]]
columns_1_24 = [str(i) for i in range(1,25)]
df_wp384 = pandas.DataFrame('',columns=columns_1_24, index=rows_A_P)
#fill in the dataframe with samples and wells
for i in samples_and_wells:
location = samples_and_wells[i]
df_wp384.at[location[0],location[1:]] = i
#save dataframe as csv
df_wp384.to_csv(f'./{uploaded_file.name.replace("geojson", "_384_wellplate.csv")}', index=True)
if st.button("Process geojson and create the contours"):
create_collection(geojson_path=uploaded_file,
list_of_calibpoint_names=list_of_calibpoint_names,
samples_and_wells_input=samples_and_wells_input)
st.success("Contours created successfully!")
st.download_button("Download contours file",
Path(f'./{uploaded_file.name.replace("geojson", "xml")}').read_text(),
f'./{uploaded_file.name.replace("geojson", "xml")}')
st.download_button("Download 384 plate scheme",
Path(f'./{uploaded_file.name.replace("geojson", "_384_wellplate.csv")}').read_text(),
f'./{uploaded_file.name.replace("geojson", "_384_wellplate.csv")}')