-
Notifications
You must be signed in to change notification settings - Fork 5
/
dataset.py
102 lines (92 loc) · 3.41 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from typing import Dict
import ml_collections
import torch
import torchvision
from datasets import MNIST_rot, PCam
def get_dataset(
config: ml_collections.ConfigDict, num_workers: int = 4, data_root: str = "./data"
) -> Dict[str, torch.utils.data.DataLoader]:
"""
Create dataloaders for the chosen datasets
:return: {'train': training_loader, 'validation': validation_loader, 'test': test_loader}
"""
dataset = {
"cifar10": torchvision.datasets.CIFAR10,
"mnist": torchvision.datasets.MNIST,
"rotmnist": MNIST_rot,
"pcam": PCam,
}[config["dataset"].lower()]
if "cifar" in config.dataset.lower():
data_mean = (0.4914, 0.4822, 0.4465)
data_stddev = (0.2023, 0.1994, 0.2010)
if config.augment:
transform_train = torchvision.transforms.Compose(
[
torchvision.transforms.RandomCrop(32, padding=4),
torchvision.transforms.RandomHorizontalFlip(),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(data_mean, data_stddev),
]
)
else:
transform_train = torchvision.transforms.Compose(
[
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(data_mean, data_stddev),
]
)
elif "mnist" in config.dataset.lower():
data_mean = (0.1307,)
data_stddev = (0.3081,)
transform_train = torchvision.transforms.Compose(
[
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(data_mean, data_stddev),
]
)
elif "pcam" in config.dataset.lower():
data_mean = (0.701, 0.538, 0.692)
data_stddev = (0.235, 0.277, 0.213)
transform_train = torchvision.transforms.Compose(
[
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(data_mean, data_stddev),
]
)
else:
raise ValueError(f"Unkown preprocessing for datasets '{config.dataset}'")
transform_test = torchvision.transforms.Compose(
[
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(data_mean, data_stddev),
]
)
training_set = dataset(root=data_root, train=True, download=True, transform=transform_train)
test_set = dataset(root=data_root, train=False, download=True, transform=transform_test)
training_loader = torch.utils.data.DataLoader(
training_set,
batch_size=config.batch_size,
shuffle=True,
num_workers=num_workers,
)
test_loader = torch.utils.data.DataLoader(
test_set,
batch_size=config.batch_size,
shuffle=False,
num_workers=num_workers,
)
dataloaders = {"train": training_loader, "test": test_loader}
if "pcam" in config.dataset.lower():
validation_set = dataset(
root=data_root, train=False, valid=True, download=False, transform=transform_test
)
val_loader = torch.utils.data.DataLoader(
validation_set,
batch_size=config.batch_size,
shuffle=False,
num_workers=num_workers,
)
dataloaders["validation"] = val_loader
else:
dataloaders["validation"] = test_loader
return dataloaders