-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
149 lines (138 loc) · 7.48 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import torch
import torch.nn as nn
from custom_loss import CustomLoss, CLoss
import matplotlib.pyplot as plt
import pandas as pd
class Train:
def __init__(self, model, number_of_epochs: int, path_saving: str, device='cpu'):
self.losses = None
self.optimizer = None
self.loss_f_1 = None
self.loss_f = None
self.memory_stats = None
self.path = path_saving
self.model = model # model to train
self.number_of_epochs = number_of_epochs # number of training epochs
self.device = device # device to train on
def initialize(self, loss_f="BCE_LL", optimizer='Adam',
learning_rate=0.0001):
# compile loss and optimizer for training
# self.loss_f = CLoss()
self.loss_f = nn.CrossEntropyLoss()
if loss_f == "BCE_LL":
self.loss_f_1 = nn.BCEWithLogitsLoss()
# set BinaryCrossEntropy loss function for abnormal output as
# operator's preference
elif loss_f == "BCE":
self.loss_f_1 = nn.BCELoss()
# set BinaryCrossEntropy loss function as operator's preference for binary classification
elif loss_f == "custom_2":
self.loss_f_1 = nn.CrossEntropyLoss()
# set CrossEntropy loss function as operator's preference for multiclass classification
elif loss_f == "MSE":
self.loss_f_1 = nn.MSELoss()
# set Mean Squared Error loss function as operator's preference for regression problems
if optimizer == "Adam":
self.optimizer = torch.optim.Adam(self.model.parameters(),
lr=learning_rate)
# set Adam optimizer as operator's preference
def train(self, train_loader, val_loader, verbose=True):
self.memory_stats = {"Allocated": [], "Max Allocated" : [], "Reserved" : []}
# define the training loop
self.losses = {"Training Loss" : [], "Validation Loss" : [],
"Training Loss Angles" : [], "Validation Loss Angles" : [],
"Training Loss Positions" : [], "Validation Loss Positions" : []}
Length = len(train_loader)
for epoch in range(self.number_of_epochs):
total_loss, loss_angle, loss_position = 0.0, 0, 0
# at the beginning of each epoch, the loss will be assigned to zer0
batch = 0
for images, labels in train_loader: # iterate through training examples
batch += 1
images = images.to(self.device).to(torch.float32) # save the batch of images in device memory
labels = labels.to(self.device).to(torch.float32)
# save the batch of labels in device memory
predicts, _ = self.model(images)
predicts = predicts.squeeze_()
# pass the image to the model to obtain the output then squeeze it
# print(predicts)
# loss_p = self.loss_f_1(predicts[:, :4], labels[:, :4])
# compute loss between the training examples and the outputs of the model
# loss_a = self.loss_f(predicts[:, 4:], labels[:, 4:])
loss = self.loss_f(predicts, labels)
# loss = loss_a + loss_p
# compute loss between the training examples and the outputs of the model
loss.backward()
# back propagate the loss
self.optimizer.step()
# compute the backpropagation step by optimizer function
self.optimizer.zero_grad()
total_loss += loss.item()
# loss_angle += loss_a.item()
# loss_position += loss_p.item()
# add loss of each batch to the total loss of the epoch
# if batch in [Length//4, Length//2, Length*3//4]:
# print(f"Loss[batch = {batch}/{Length}]= {loss.item()}"
# f" , total_loss = {total_loss}")
# print(f"Loss_angle = {loss_a} and Loss Points = {loss_p}")
self.losses["Training Loss"].append(total_loss)
# self.losses["Training Loss Angles"].append(loss_angle)
# self.losses["Training Loss Positions"].append(loss_position)
self._save_(dynamic_save=True)
self.memory_stats["Allocated"].append(torch.cuda.memory_allocated())
self.memory_stats["Max Allocated"].append(torch.cuda.max_memory_allocated())
self.memory_stats["Reserved"].append(torch.cuda.memory_reserved())
if verbose:
print(
f'For the epoch number {epoch + 1}: The training loss is {total_loss/6250}.')
print(f"Loss_angle = {loss_angle} and Loss Points = {loss_position}")
# if the verbose flag is set to True, print the training loss value
with torch.no_grad():
loss, loss_a, loss_p = 0, 0, 0
total_loss = 0
for images, labels in val_loader:
images = images.to(self.device).to(torch.float32)
labels = labels.to(self.device).to(torch.float32)
predicts, _ = self.model(images)
predicts = predicts.squeeze_()
# pass the image to the model to obtain the output then squeeze it
# loss_p += self.loss_f_1(predicts[:, :4], labels[:, :4])
# compute loss between the training examples and the outputs of the model
# loss_a += self.loss_f(predicts[:, 4:], labels[:, 4:])
loss = self.loss_f(predicts, labels)
# loss = loss_a + loss_p
del images, labels
self.losses["Validation Loss"].append(loss.item())
# self.losses["Validation Loss Angles"].append(loss_a.item())
# self.losses["Validation Loss Positions"].append(loss_p.item())
print(
f'For the epoch number {epoch + 1}: The validation loss is {loss/6250/4}.')
# print(f"Loss_angle = {loss_a} and Loss Points = {loss_p}")
self._save_()
return self.model # return the trained model
def n_parameters(self): # compute the number of parameters
total_par = 0
for par in self.model.parameters(): # iterate through model parameters
if par.requires_grad: # compute only the trainable parameters
total_par += par.numel() # add the number of parameters of each layer to the total number of parameters
print(f"Total number of trainable parameters for this model is {total_par}")
def plot_loss(self, x, title: str, name_parameter: str):
plt.plot(x)
plt.title(title)
plt.xlabel('Epoch')
plt.ylabel(name_parameter)
plt.savefig(self.path + rf"/{title}.png")
def _save_(self, dynamic_save=False):
# save the model parameters
if dynamic_save:
torch.save(self.model.state_dict(), self.path + "/model_dynamic.pt")
else:
torch.save(self.model.state_dict(), self.path + "/model.pt")
df_losses = pd.DataFrame(self.losses)
df_losses.to_html("losses.html", index=False)
df_memory = pd.DataFrame(self.memory_stats)
df_memory.to_html("memory.html", index=False)
def load_model(self, path):
self.model.load_state_dict(torch.load(path))
# load the preserved model parameters
return self.model