-
Notifications
You must be signed in to change notification settings - Fork 18
/
webcam_detection.py
80 lines (57 loc) · 3.3 KB
/
webcam_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import cv2
import time
import numpy as np
import core.utils as utils
import tensorflow as tf
from PIL import Image
obj_classes = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
return_elements = ["input/input_data:0", "pred_sbbox/concat_2:0", "pred_mbbox/concat_2:0", "pred_lbbox/concat_2:0"]
pb_file = "./yolov3_coco.pb"
# video_path = "./docs/images/road.mp4"
video_path = 0
num_classes = 80
input_size = 416
graph = tf.Graph()
return_tensors = utils.read_pb_return_tensors(graph, pb_file, return_elements)
vid = cv2.VideoCapture(video_path)
def detection(vid):
with tf.Session(graph=graph) as sess:
return_value, frame = vid.read()
if return_value:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image = Image.fromarray(frame)
else:
raise ValueError("No image!")
frame_size = frame.shape[:2]
image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size])
image_data = image_data[np.newaxis, ...]
prev_time = time.time()
pred_sbbox, pred_mbbox, pred_lbbox = sess.run(
[return_tensors[1], return_tensors[2], return_tensors[3]],
feed_dict={ return_tensors[0]: image_data})
pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + num_classes)),
np.reshape(pred_mbbox, (-1, 5 + num_classes)),
np.reshape(pred_lbbox, (-1, 5 + num_classes))], axis=0)
bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3)
bboxes = utils.nms(bboxes, 0.45, method='nms')
image, detected = utils.draw_bbox(frame, bboxes)
detected = np.asarray(detected)
print("------- frame i ---------")
class_count = []
for i in range(len(obj_classes)): # 80
obj_count = 0
for j in range(len(detected)):
if int(detected[j][5]) == i: obj_count += 1
class_count = np.append(class_count, obj_count)
curr_time = time.time()
exec_time = curr_time - prev_time
result = np.asarray(image)
info = "time: %.2f ms" %(1000*exec_time)
# cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
return result, class_count
# if __name__ == "__main__":
# while True:
# result = detection(vid)
# cv2.imshow("result", result)
# if cv2.waitKey(1) & 0xFF == ord('q'): break