forked from snapfinger/pancreas-seg
-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.py
127 lines (96 loc) · 4.06 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""
This code is to
1. Create train & test input to Network as numpy arrays
2. Load the train & test numpy arrays
"""
import numpy as np
from utils import *
# data type to save as np array
npdtype = np.float32
def create_train_data(current_fold, plane):
"""
Crop each slice by its ground truth bounding box,
then pad zeros to form uniform dimension,
rescale pixel intensities to [0,1]
"""
# get the list of image and label number of current_fold
imlb_list = open(training_set_filename(current_fold), 'r').read().splitlines()
current_fold = current_fold
training_image_set = np.zeros((len(imlb_list)), dtype = np.int)
for i in range(len(imlb_list)):
s = imlb_list[i].split(' ')
training_image_set[i] = int(s[0])
slice_list = open(list_training[plane], 'r').read().splitlines()
slices = len( slice_list)
image_ID = np.zeros(( slices), dtype = np.int)
slice_ID = np.zeros(( slices), dtype = np.int)
image_filename = ['' for l in range( slices)]
label_filename = ['' for l in range( slices)]
pixels = np.zeros((slices), dtype = np.int)
for l in range(slices):
s = slice_list[l].split(' ')
image_ID[l] = s[0]
slice_ID[l] = s[1]
image_filename[l] = s[2]
label_filename[l] = s[3]
pixels[l] = int(s[organ_ID * 5])
create_slice_list = []
create_label_list = []
for l in range(slices):
if image_ID[l] in training_image_set and pixels[l] >= 100:
create_slice_list.append(image_filename[l])
create_label_list.append(label_filename[l])
if len(create_slice_list)!= len(create_label_list):
raise ValueError('slice number does not equal label number!')
total = len(create_slice_list)
img_rows = XMAX
img_cols = YMAX
imgs = np.ndarray((total, img_rows, img_cols), dtype = npdtype)
imgs_mask = np.ndarray((total, img_rows, img_cols), dtype = npdtype)
print('-'*30)
print(' Creating training data...')
print('-'*30)
for i in range(len(create_slice_list)):
cur_im = np.load(create_slice_list[i])
cur_mask = np.load(create_label_list[i])
cur_im = (cur_im - low_range) / float(high_range - low_range)
arr = np.nonzero(cur_mask)
width = cur_mask.shape[0]
height = cur_mask.shape[1]
minA = min(arr[0])
maxA = max(arr[0])
minB = min(arr[1])
maxB = max(arr[1])
# with margin
cropped_im = cur_im[max(minA - margin, 0): min(maxA + margin + 1, width), \
max(minB - margin, 0): min(maxB + margin + 1, height)]
cropped_mask = cur_mask[max(minA - margin, 0): min(maxA + margin + 1, width), \
max(minB - margin, 0): min(maxB + margin + 1, height)]
imgs[i] = pad_2d(cropped_im, plane, 0, XMAX, YMAX, ZMAX)
imgs_mask[i] = pad_2d(cropped_mask, plane, 0, XMAX, YMAX, ZMAX)
if i % 100 == 0:
print('Done: {0}/{1} slices'.format(i, total))
np.save('imgs_train_%s_%s.npy'%(current_fold, plane), imgs)
np.save('masks_train_%s_%s.npy'%(current_fold, plane), imgs_mask)
print('Training data created for fold %s, plane %s'%(current_fold, plane))
def load_train_data(current_fold, plane):
imgs_train = np.load('imgs_train_%s_%s.npy'%(current_fold, plane))
mask_train = np.load('masks_train_%s_%s.npy'%(current_fold, plane))
return imgs_train, mask_train
def load_test_data(current_fold, plane):
imgs_test = np.load('imgs_test_%s_%s.npy'%(current_fold, plane))
mask_test = np.load('masks_test_$s_%s.npy'%(current_fold, plane))
return imgs_test, mask_test
if __name__ == '__main__':
data_path = sys.argv[1]
current_fold = int(sys.argv[2])
plane = sys.argv[3]
# dim of each case (after padding zeors to max gt bounding box)
ZMAX = int(sys.argv[4])
YMAX = int(sys.argv[5])
XMAX = int(sys.argv[6])
margin = int(sys.argv[7])
organ_ID = int(sys.argv[8])
low_range = int(sys.argv[9])
high_range = int(sys.argv[10])
create_train_data(current_fold, plane)