-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclassifier.py
246 lines (201 loc) · 8.04 KB
/
classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# Import necessary libraries and modules
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import cv2
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import transforms
from efficientnet_pytorch import EfficientNet
import time
import glob
import argparse
MODEL_DIRECTORY = './EfficientNet-Models/B0/9350/trained_model.pth'
FONT = "arial.ttf"
FONT_SIZE = 25
PRETRAIN_MODEL = 'efficientnet-b0'
use_gpu = torch.cuda.is_available()
def show_image(header, image):
"""
Displays an image in a window with the specified header.
Args:
header (str): Header for the image window.
image (numpy.ndarray): Image array to be displayed.
"""
print("[Console] Showing image")
cv2.imshow(header, image)
cv2.waitKey()
def write_image(directory, image):
"""
Saves an image to the specified directory.
Args:
directory (str): Path to the directory where the image will be saved.
image (numpy.ndarray): Image array to be saved.
"""
print("[Console] Saving image")
cv2.imwrite(directory, image)
def get_images(directory):
"""
Loads images from the specified directory.
Args:
directory (str): Path to the directory containing the images.
Returns:
List of loaded images (list of numpy.ndarray).
Raises:
Exception: If the directory is invalid or no images are found.
"""
print("[Console] Accessing folder")
image_paths = glob.glob(directory)
print(image_paths)
if len(image_paths) == 0:
raise Exception("[INFO] Invalid directory")
images = []
# Add images to memory
print("[Console] Loading Images")
for image_path in image_paths:
image = cv2.imread(image_path)
images.append(image)
print(f"[INFO] Loaded {len(images)} image(s)")
return images
def assign_image_label(images, labels, confs, font="arial.ttf", font_size=25):
"""
Add labels to the input images.
Args:
images (List[Image.Image]): List of PIL Image objects representing the input images.
labels (List[str]): List of labels corresponding to the input images.
confs (List[float]): List of confidence level of each prediction for the corresponding input image
font (str, optional): The font file to be used for the labels. Defaults to "arial.ttf".
font_size (int, optional): The font size for the labels. Defaults to 25.
Returns:
List[Image.Image]: List of PIL Image objects with labels added to the top left corner.
"""
image_w_label = []
font_setting = ImageFont.truetype(font, font_size)
for i, image in enumerate(images):
image = Image.fromarray(image)
# image = image.resize((400, 400))
I1 = ImageDraw.Draw(image)
# I1.text((10, 10), f"{labels[index]} ({confs[index]:4f})", fill=(255, 0, 0), font=font_setting) # with confidence index
I1.text((10, 10), f"{labels[i]}", fill=(0, 255, 0), font=font_setting) # without confidence index
image = np.array(image)
image_w_label.append(image)
return image_w_label
def get_data(np_images):
"""
Prepare the list of numpy array images for classification.
Args:
np_images (List[numpy.ndarray]): List of numpy array images (RGB format).
Returns:
List[torch.Tensor]: List of preprocessed images as PyTorch tensors.
"""
data_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.Resize(254),
transforms.CenterCrop(224),
transforms.ToTensor(),
])
data = []
for image in np_images:
# Convert numpy ndarray [3, 224, 224] to PyTorch tensor
image = data_transform(image)
# Expand to [batch_size, 3, 224, 224]
image = torch.unsqueeze(image, 0)
data.append(image)
return data
def get_pretrained_model(model_dir=MODEL_DIRECTORY, weights=PRETRAIN_MODEL):
"""
Retrieve the VGG-16 pre-trained model and modify the classifier with a fine-tuned one.
Args:
model_dir (str, optional): Directory path for loading a pre-trained model state dictionary. Defaults to ''.
weights (str or dict, optional): Pre-trained model weights. Defaults to models.vgg16_bn(pretrained=True).state_dict().
Returns:
torchvision.models.vgg16_bn: VGG-16 model with modified classifier.
"""
print("[INFO] Getting VGG-16 pre-trained model...")
# Load pretrained model
model = EfficientNet.from_pretrained(weights)
model.eval()
# Freeze training for all layers
for param in model.parameters():
param.requires_grad = False
# Get number of features in the _fc layer
num_features = model._fc.in_features
# Add custom layer with custom number of output classes
model._fc = nn.Linear(num_features, 2)
# Load VGG-16 pretrained model
model.load_state_dict(torch.load(model_dir))
model.eval()
print("[INFO] Loaded VGG-16 pre-trained model\n", model, "\n")
return model
def get_prediction(model, images):
"""
Perform image classification using the provided model.
Args:
model (torchvision.models.vgg16_bn): The fine-tuned VGG-16 model.
images (List[torch.Tensor]): List of preprocessed images as PyTorch tensors.
Returns:
Tuple[List[str], List[float], float]: A tuple containing the list of predicted labels, the confidence for the predictions, and the time taken for classification.
"""
since = time.time()
labels = []
confs = []
model.train(False)
model.eval()
for image in images:
with torch.no_grad():
if use_gpu:
image = Variable(image.cuda())
else:
image = Variable(image)
outputs = model(image)
probs = torch.nn.functional.softmax(outputs.data, dim=1)
conf, pred = torch.max(probs, 1)
if pred == 0:
labels.append('mask')
confs.append(round(float(conf.cpu()), 4))
elif pred == 1:
labels.append('no-mask')
confs.append(round(float(conf.cpu()), 4))
else:
print('[INFO] Labeling went wrong')
elapsed_time = time.time() - since
return labels, confs, elapsed_time
if __name__ == "__main__":
# Args Parser
parser = argparse.ArgumentParser(prog='detect-mask',
epilog='Text help'
)
parser.add_argument('-d', '--dir',
type=str,
default="./data/test/random/*.jpg",
help='Path of input images')
parser.add_argument('-o', '--out',
type=str,
default="./output",
help='Path of input images')
args = parser.parse_args()
input_dir = args.dir
output_dir = args.out
images = get_images(input_dir)
# Preparing data and loading the model
data = get_data(images)
model = get_pretrained_model()
# Use GPU if available
print('[INFO] Classification in progress')
print("[INFO] Using CUDA") if use_gpu else print("[INFO] Using CPU")
if use_gpu:
torch.cuda.empty_cache()
model.cuda()
labels, confs, elapsed_time = get_prediction(model, data)
print(f"[INFO] Label : {labels} with confidence {confs} in time {(elapsed_time // 60):.0f}m {(elapsed_time % 60):.0f}s")
# Add label and confidence level to the top left corner of the input image
print('[INFO] Writing labels onto output images')
image_w_label = assign_image_label(images, labels, confs, font=FONT, font_size=FONT_SIZE)
# Output API
print('[INFO] Returning output')
response_text = f'Label : {labels} with confidence {confs} in time {(elapsed_time // 60):.0f}m {(elapsed_time % 60):.0f}s'
print(response_text)
for i, image in enumerate(image_w_label):
out = output_dir + f'/im_{i}.jpg'
show_image("image", image)
write_image(out, image)