-
Notifications
You must be signed in to change notification settings - Fork 1
/
dataloader.py
92 lines (81 loc) · 2.97 KB
/
dataloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""
Created on Thu Oct 10 16:51:19 2019
@author: Kartik
"""
from glob import glob
import numpy as np
import os
import cv2
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import openface
from keras_vggface.vggface import VGGFace
from openface.align_dlib import AlignDlib #align_dlib.py was explicitly copied in the site-packages of openface and we want to import a class "AligbDlib" from that .py file
import dlib
import random
#from inception_resnet_v1 import * #inception_resnet_v1.py was also copied in the current directory
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
from keras.models import load_model
from matplotlib import pyplot as plt
def get_dataset():
#creating a dictionary of images where dict is of form :
#{'Person_name': matrices of all images in that folder}
images = {}
cwd = os.getcwd() + "\\modified_dataset_158\\"
for folder in glob(cwd+"\\*"):
key = (folder.split('\\'))[-1]
l = []
for image in glob(folder+"\\*"):
img = cv2.imread(image)
l.append(img)
images[key] = l
return images
def shuffle_and_split(x, y):
#shuffle
x, y = shuffle(x, y)
#split
x_train, x_test, y_train, y_test = train_test_split(x, y , test_size = 0.3, random_state = 0)
return x_train, x_test, y_train, y_test
def split_dataset(data):
count = 0
label_mapping = {}
x, y = [], []
#x_train, y_train, x_test, y_test = [], [], [], []
for k,v in data.items():
'''
#training: 100 folders, test: 58 folders
if count >= 100:
#test split
x_test+=v
for _ in range(len(v)):
y_test.append(count)
else:
#training split
x_train+=v #extending all the images corresponding to a person into a single list
for _ in range(len(v)):
y_train.append(count) #maintaining label corresponding to each image of the folder
'''
label_mapping[count] = k #maintaining a mapping {number: 'Person_name'}
count+=1
#Taking max 20 images from each folder to deak with imbalance dataset
l=[]
lim = 52 #initiallly lim = 20 for folder size of 10 in mod_dataset.py
'''if len(v)>lim:
x += v[:lim]
l = [count]*lim
else:
x+=v
l=[count]*len(v)
y.extend(l)'''
#for modified script with 10 classes and considering all images from each folder
x += v
l = [count]*len(v)
y.extend(l)
#return shuffle_and_split(x, y)
return x, y, label_mapping
#return x_train, y_train, x_test, y_test