Skip to content

Commit

Permalink
added week0_03 materials
Browse files Browse the repository at this point in the history
  • Loading branch information
irina-rud committed Mar 1, 2021
1 parent 5e303cb commit 0e30b8d
Show file tree
Hide file tree
Showing 3 changed files with 819 additions and 0 deletions.
2 changes: 2 additions & 0 deletions week0_03_linear_classification/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
PyTorch intro:
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/girafe-ai/ml-mipt/blob/basic_f20/week0_03_linear_classification/week0_03_intro_to_pytorch.ipynb)
43 changes: 43 additions & 0 deletions week0_03_linear_classification/notmnist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os
import numpy as np
from matplotlib.pyplot import imread
from sklearn.model_selection import train_test_split
from glob import glob

def load_notmnist(path='./notMNIST_small',letters='ABCDEFGHIJ',
img_shape=(28,28),test_size=0.25,one_hot=False):

# download data if it's missing. If you have any problems, go to the urls and load it manually.
if not os.path.exists(path):
if not os.path.exists('./notMNIST_small.tar.gz'):
print("Downloading data...")
assert os.system('curl http://yaroslavvb.com/upload/notMNIST/notMNIST_small.tar.gz > notMNIST_small.tar.gz') == 0
print("Extracting ...")
assert os.system('tar -zxvf notMNIST_small.tar.gz > untar_notmnist.log') == 0

data,labels = [],[]
print("Parsing...")
for img_path in glob(os.path.join(path,'*/*')):
class_i = img_path.split(os.sep)[-2]
if class_i not in letters: continue
try:
data.append(imread(img_path))
labels.append(class_i,)
except:
print("found broken img: %s [it's ok if <10 images are broken]" % img_path)

data = np.stack(data)[:,None].astype('float32')
data = (data - np.mean(data)) / np.std(data)

#convert classes to ints
letter_to_i = {l:i for i,l in enumerate(letters)}
labels = np.array(list(map(letter_to_i.get, labels)))

if one_hot:
labels = (np.arange(np.max(labels) + 1)[None,:] == labels[:, None]).astype('float32')

#split into train/test
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=test_size, random_state=42)

print("Done")
return X_train, y_train, X_test, y_test
Loading

0 comments on commit 0e30b8d

Please sign in to comment.