-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluate.py
executable file
·103 lines (85 loc) · 3.9 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
from torch import nn
from torch.utils.data import DataLoader
import pytorch_lightning as pl
import argparse
from architecture import MicrocolonyNet
from datatools import McolonyTestData
# Input image dimensions (match this with your dataset)
max_px = 640
min_px = 401
def main(args):
# Arguments from command line
root_train = args.root_train
root = args.root
workers = args.workers
batch = args.batch
ckpt = args.ckpt
# Set multiprocessing strategy to 'file_system' for pytorch
torch.multiprocessing.set_sharing_strategy('file_system')
# Get class names from the training dataset directory
ds_classes = sorted(os.listdir(root_train))
# Get experimental group name from test dataset directory
ds_group = root.split('/')[-2] # this reads the first two of each folder's name
# Initialize DataModule
ds = McolonyTestData(root=root)
test_dl = DataLoader(ds, batch_size=batch, shuffle=False, num_workers=workers, pin_memory=False)
# Load model from checkpoint
model = MicrocolonyNet()
model = model.load_from_checkpoint(ckpt)
model.cuda() # Move model to GPU
model.eval() # Set model to evaluation mode
print('Model loaded')
itt = iter(test_dl)
batches = int(np.ceil(len(ds)/batch))
name_list = []
pred_list = []
gt_list = []
# Loop over data batches
for i in tqdm(range(batches)):
d = next(itt)
input = d[0]['image'].to('cuda') # move input data to GPU
fname = d[1]
# Perform inference
with torch.no_grad():
pred = model(input)
pred = torch.argmax(pred, dim=1) # get class with highest probability
pred = list(pred.cpu().numpy()) # move predictions back to CPU and convert to list
pred_list.append(pred) #list, e.g. [[5], [0]]
name_list.append(fname)
# Clear GPU memory every 25 batches
if i % 25 == 0:
torch.cuda.empty_cache()
# Post-processing on the prediction and ground truth lists
pred_list = [float(x) for x in sum(pred_list,[])] #list, e.g. [5.0, 0.0]
name_list = sum([list(x) for x in name_list],[])
gt_list = [name_list[i][:2] for i in range(len(name_list))]
pred_list = [ds_classes[int(x)] for x in pred_list]
pred_list = [pred_list[i][:2] for i in range(len(pred_list))]
# Save model inference results as a csv file
test_df = pd.DataFrame({'filename': name_list, 'gt strain': gt_list, 'pred strain': pred_list})
test_df.to_csv('microcolony_'+ds_group+'.csv', index=False, header=True)
# Compute the accuracy
accuracy = (test_df['gt strain'] == test_df['pred strain']).mean()
print(f'Accuracy: {accuracy * 100:.2f}%')
# Generate and save confusion matrix
df = pd.read_csv('microcolony_'+ds_group+'.csv', sep=",")
confusion_matrix = pd.crosstab(df['gt strain'], df['pred strain'], rownames=['Ground Truth'], colnames=['Predicted'])
sn.heatmap(confusion_matrix/confusion_matrix.sum(axis=1), annot=True, fmt='.2f', cmap='Blues', annot_kws = {"size": 20})
plt.savefig('confusion_matrix_'+ds_group+'.png', dpi=400)
if __name__ == '__main__':
# Define and parse command line arguments
parser = argparse.ArgumentParser()
parser.add_argument('-rt', '--root_train', type=str, help='Root folder of the training dataset', required=True)
parser.add_argument('-r', '--root', type=str, help='Root folder of the test dataset', required=True)
parser.add_argument('-c', '--ckpt', type=str, help='Path to checkpoint file', required=True)
parser.add_argument('-w', '--workers', type=int, help='Number of dataloader workers per GPU', default=0)
parser.add_argument('-b', '--batch', type=int, help='Batch size per GPU', default=1)
args = parser.parse_args()
main(args)