forked from Project-MONAI/tutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_dataset.py
99 lines (86 loc) · 3.15 KB
/
create_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# Copyright (c) MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import torch.distributed as dist
from monai.data import CacheDataset, DataLoader, load_decathlon_datalist, load_decathlon_properties, partition_dataset
from task_params import task_name
from transforms import get_task_transforms
def get_data(args, batch_size=1, mode="train"):
# get necessary parameters:
fold = args.fold
task_id = args.task_id
root_dir = args.root_dir
datalist_path = args.datalist_path
dataset_path = os.path.join(root_dir, task_name[task_id])
transform_params = (args.pos_sample_num, args.neg_sample_num, args.num_samples)
multi_gpu_flag = args.multi_gpu
transform = get_task_transforms(mode, task_id, *transform_params)
if mode == "test":
list_key = "test"
else:
list_key = "{}_fold{}".format(mode, fold)
datalist_name = "dataset_task{}.json".format(task_id)
property_keys = [
"name",
"description",
"reference",
"licence",
"tensorImageSize",
"modality",
"labels",
"numTraining",
"numTest",
]
datalist = load_decathlon_datalist(os.path.join(datalist_path, datalist_name), True, list_key, dataset_path)
properties = load_decathlon_properties(os.path.join(datalist_path, datalist_name), property_keys)
if mode in ["validation", "test"]:
if multi_gpu_flag:
datalist = partition_dataset(
data=datalist,
shuffle=False,
num_partitions=dist.get_world_size(),
even_divisible=False,
)[dist.get_rank()]
val_ds = CacheDataset(
data=datalist,
transform=transform,
num_workers=4,
)
data_loader = DataLoader(
val_ds,
batch_size=batch_size,
shuffle=False,
num_workers=args.val_num_workers,
)
elif mode == "train":
if multi_gpu_flag:
datalist = partition_dataset(
data=datalist,
shuffle=True,
num_partitions=dist.get_world_size(),
even_divisible=True,
)[dist.get_rank()]
train_ds = CacheDataset(
data=datalist,
transform=transform,
num_workers=8,
cache_rate=args.cache_rate,
)
data_loader = DataLoader(
train_ds,
batch_size=batch_size,
shuffle=True,
num_workers=args.train_num_workers,
drop_last=True,
)
else:
raise ValueError(f"mode should be train, validation or test.")
return properties, data_loader