-
Notifications
You must be signed in to change notification settings - Fork 1
/
mnist_loader.py
51 lines (37 loc) · 1.86 KB
/
mnist_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import numpy as np
def training_data_loader():
images_path = 'training_data/images.ubyte' # Images Data Location
labels_path = 'training_data/labels.ubyte' # Labels Data Location
images_data = []
labels_data = []
with open(images_path,'rb') as f: # rb -> read in binary
f.read(16) #useless magic number and imazge specs
for i in range(60000): # 60,000 images
images_data.append(list(f.read(784))) #read next 784 bytes of single image data
with open(labels_path,'rb') as f:
f.read(8)
for i in range(60000):
labels_data.append(int.from_bytes(f.read(1),"big")) #read 1 byte of label data
images_data = [np.reshape(x,(784,1))/256.0 for x in images_data] # convert image data to numpy arrays of size 784x1
labels_data = [vectorize(x) for x in labels_data] # vectorize labels data as 10x1 array
return (images_data,labels_data)
def test_data_loader():
images_path = 'test_data/images_t.ubyte' # Images Data Location
labels_path = 'test_data/labels_t.ubyte' # Labels Data Location
images_data = []
labels_data = []
with open(images_path,'rb') as f: # rb -> read in binary
f.read(16) #useless magic number and imazge specs
for i in range(10000): # 60,000 images
images_data.append(list(f.read(784))) #read next 784 bytes of single image data
with open(labels_path,'rb') as f:
f.read(8)
for i in range(10000):
labels_data.append(int.from_bytes(f.read(1),"big")) #read 1 byte of label data
images_data = [np.reshape(x,(784,1))/256.0 for x in images_data] # convert image data to numpy arrays of size 784x1
labels_data = [vectorize(x) for x in labels_data] # vectorize labels data as 10x1 array
return (images_data,labels_data)
def vectorize(x):
vec = np.zeros((10,1))
vec[x] = [1]
return vec