-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathconvert_json_to_csv.py
113 lines (100 loc) · 3.91 KB
/
convert_json_to_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import json
import pandas as pd
import random
def get_annotations(metadata):
"""
Args:
metadata: Returns COCO data dictionary
"""
annotations_dict = {}
# Gather all annotations associated with each image to a list, image id as key
for annotation in metadata["annotations"]:
image_id = annotation["image_id"]
if image_id not in annotations_dict:
annotations_dict[image_id] = []
annotations_dict[image_id].append(annotation)
else:
# annotations_dict[image_id] = []
if annotation not in annotations_dict[image_id]:
annotations_dict[image_id].append(annotation)
# Find out images without any annotations in database
missing_annotation_count = 0
for image in metadata["images"]:
image_id = image["id"]
if image_id not in annotations_dict:
missing_annotation_count += 1
annotations_dict[image_id] = []
return annotations_dict, missing_annotation_count
def create_csv_files(output_folder):
"""[summary]
Args:
output_folder ([str]): [description]
"""
custom_labels = {"cobia"}
label_map = {k: v + 1 for v, k in enumerate(custom_labels)}
label_map["background"] = 0
rev_label_map = {v: k for k, v in label_map.items()} # Inverse mapping
data = json.load(open("master-coco/coco.json"))
# annotations_dict, count = get_annotations(data)
annotations_dict, _ = get_annotations(data)
data = []
keys = list(annotations_dict.keys())
print("Total images::", len(keys))
# for key in keys:
# if len(annotations_dict[key])>=10:
# print (key)
random.seed(6)
random.shuffle(keys)
test_images = keys[:400]
train_images = keys[400:]
train_data = []
test_data = []
# crowds = 0
# test_crowds = 0
for key in annotations_dict:
if key in train_images:
num_of_boxes = len(annotations_dict[key])
for index in range(num_of_boxes):
img_path = (
"master-coco/" + annotations_dict[key][index]["image_id"] + ".jpg"
)
label_id = annotations_dict[key][index]["category_id"]
label = rev_label_map[label_id]
bbox = annotations_dict[key][index]["bbox"]
x, y, w, h = bbox
# print (x,y,w,h)
xmax = w + x
ymax = h + y
value = (img_path, x, y, xmax, ymax, label)
train_data.append(value)
elif key in test_images:
num_of_boxes = len(annotations_dict[key])
for index in range(num_of_boxes):
img_path = (
"master-coco/" + annotations_dict[key][index]["image_id"] + ".jpg"
)
label_id = annotations_dict[key][index]["category_id"]
label = rev_label_map[label_id]
bbox = annotations_dict[key][index]["bbox"]
x, y, w, h = bbox
if key == "b72cc30b-9235-47a9-8bd8-8dc098091bfa":
print(annotations_dict[key][index]["bbox"], num_of_boxes)
break
# print (x,y,w,h)
xmax = w + x
ymax = h + y
value = (img_path, x, y, xmax, ymax, label)
test_data.append(value)
# random.shuffle(data)
# print (crowds, test_crowds)
# test_data = data[:400]
# train_data = data[400:]
# if not os.path.exists(output_folder):
# os.makedirs(output_folder, exist_ok=True)
train_df = pd.DataFrame(train_data)
test_df = pd.DataFrame(test_data)
print("Train data::", train_df.shape, " Test data::", test_df.shape)
# print (train_df.head())
train_df.to_csv((output_folder + "/train" + "_labels.csv"), index=None, header=None)
test_df.to_csv((output_folder + "/test" + "_labels.csv"), index=None, header=None)
create_csv_files("dataset")