This repository has been archived by the owner on Aug 27, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoutputAdapterODMax.py
164 lines (101 loc) · 4.85 KB
/
outputAdapterODMax.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import csv
import numpy
import pandas
import os
import matplotlib
matplotlib.use('PDF')
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
aggregatedData = []
def createOutputFile(folderName):
return folderName
def outputPlateData(plate, folderName):
aggregatedData.extend([[plate.Name]+t.ToTable() for t in plate.DataArray]);
return 0
def finish(folderName):
# read data
dataFrame = pandas.DataFrame(aggregatedData, columns=['Plate','Coordinate','Medium','Strain']+list(range(1,49)))
dataFrame.set_index(['Strain']);
# separate by blank /nonblank
dataFrameNonBlanks = dataFrame.loc[(dataFrame['Strain'] != 'BLANK')]
dataFrameNonBlanks.set_index(['Plate','Medium','Strain'])
dataFrameBlanks = dataFrame.loc[dataFrame['Strain'] == 'BLANK']
dataFrameBlanks.set_index(['Plate','Medium']);
dataFrameBlanks[["Medium"]+(list(range(1,49)))].groupby(['Medium']).aggregate(['mean','std']).to_csv(os.path.join(folderName, "output_blanks.csv"), sep = '\t');
#correct for blank by medium
corrected = dataFrameNonBlanks.apply(correctForBlank,axis=1,args=[dataFrameBlanks])
#select only non-mcpsm for next step
dataFrameMedia = corrected.loc[corrected['Medium'] == 'MCPSM']
dataFrameMedia.set_index(['Plate','Medium','Strain'])
#correct for medium (positive test)
# corrected = corrected.loc[corrected['Medium'] != 'MCPSM']
corrected = corrected.apply(correctForMedium,axis=1,args=[dataFrameMedia])
corrected.to_csv(os.path.join(folderName, "output_table.csv"),sep='\t')
# generate aggregated by experiment file
aggregated = corrected.groupby(['Medium','Strain']).aggregate(['mean'])
aggregated.to_csv(os.path.join(folderName, "output_table_corr_by_strain_medium.csv"), sep='\t')
#generate count file
count = corrected[['Medium','Strain','Plate','Coordinate']].groupby(['Medium','Strain']).aggregate(['count']);
count.to_csv(os.path.join(folderName,"output_count.csv"), sep='\t');
# generate global result file
correctedColumns = list(map(lambda x:"Cor_"+str(x), list(range(1,49))))
posTestColumns = list(map(lambda x:"PosTest_"+str(x), list(range(1,49))))
corrected["MaxCorOD"] = corrected[correctedColumns].max(axis=1);
corrected["MaxPosTestOD"] = corrected[posTestColumns].max(axis=1);
corrected[["Medium","MaxCorOD","MaxPosTestOD"]].groupby(["Medium"]).aggregate(['mean','std']).to_csv(os.path.join(folderName,"output_conclusion_by_medium.csv"), sep='\t');
corrected[["Medium","Strain","MaxCorOD","MaxPosTestOD"]].groupby(["Strain","Medium"]).aggregate(['mean','std']).to_csv(os.path.join(folderName,"output_conclusion_by_strain_medium.csv"), sep='\t');
corrected[["Strain","MaxCorOD","MaxPosTestOD"]].groupby(["Strain"]).aggregate(['mean','std']).to_csv(os.path.join(folderName, "output_conclusion_by_strain.csv"), sep='\t');
plotData(aggregated, folderName)
#grouped = dataFrameNonBlanks[['Strain','Medium','ODMax']].groupby(['Strain','Medium']).aggregate(['max','min','mean','std','count'])
return 0
def correctForBlank(row, dataFrameBlanks):
medium = row['Medium']
#get corresponding blanks
df=dataFrameBlanks.loc[(dataFrameBlanks['Medium']==medium)]
blankMean=df[list(range(0,52))].mean()
numBlanks = len(df.index)
if numBlanks == 0:
print("ERROR NO BLANKS FOUND FOR "+medium)
#attach corrected data!
row['NumberBlanks'] = len(df.index);
for x in range(1,49):
row['Blank_'+str(x)] = blankMean[x]
for x in range(1,49):
row['Cor_'+str(x)] = row[x]-blankMean[x]
return row;
def correctForMedium(row, dataFrameMedia):
strain = row['Strain']
plate = row['Plate']
medium = "MCPSM"
df=dataFrameMedia.loc[(dataFrameMedia['Strain']==strain) & (dataFrameMedia['Medium']==medium)]
correctedColumns = list(map(lambda x:"Cor_"+str(x), list(range(1,49))))
mediaMean=df[["Plate","Strain"]+correctedColumns].mean()
numPosTests = len(df.index)
if numPosTests == 0:
print("ERROR NO POSMEDIA FOUND FOR "+plate+"/"+strain)
row['NumberPosTests'] = numPosTests
for x in range(1,49):
if(numPosTests == 0):
row['PosTest_'+str(x)] = 0
else:
row['PosTest_'+str(x)] = mediaMean['Cor_'+str(x)]
return row
def plotData(df, folderName):
df=df.reset_index()
correctedColumns = list(map(lambda x:"Cor_"+str(x), list(range(1,49))))
allMedia = df["Medium"].unique();
for medium in allMedia:
# one dataFrame per Medium with only the corrected timeseries
dfMedium = df[correctedColumns].loc[(df['Medium'] == medium)];
dfMediumT = dfMedium.T.rename(columns=lambda x:df["Strain"][x]);
# get list of end OD
#dfEndOD = df[["Strain","Cor_48"]].sort("Cor_48");
#print(dfEndOD)
#generate colormap
#numEntries = len(dfMedium.index)
#colorMap = ListedColormap(["red"]+ ["gray"]*(numEntries-1),"test");
ax = dfMediumT.plot(legend = "Strain", title = "OD in "+medium,colormap="gist_ncar")
ax.set_xlabel("Time 0-48h")
ax.set_ylabel("OD")
plt.savefig(os.path.join(folderName,medium+".pdf"))
return 0