-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict.py
executable file
·163 lines (150 loc) · 7.42 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#coding=utf-8
import torch
import cv2
import numpy as np
from pathlib import Path
import argparse
import os
from utils.general import scale_coords, non_max_suppression
from utils.torch_utils import time_synchronized
from utils.datasets import letterbox
from utils.plots import colors, plot_one_box
from models.experimental import attempt_load
def adaptive_threshold(diff_image, max_val=225):
# 自适应阈值,可以更好地检测慢速运动物体的轮廓
adaptive_thresh = cv2.adaptiveThreshold(
diff_image,
max_val,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # 使用高斯加权
cv2.THRESH_BINARY,
5, # 阈值计算时邻域的大小,可以根据需求调整
5 # 常量C,调节阈值敏感度
)
return adaptive_thresh
def process_video(opt, video_path, model):
# 打开视频文件或摄像头
cap = cv2.VideoCapture(video_path) # 可以改成 0 来使用摄像头
# 读取第一帧作为参考帧
ret, first_frame = cap.read()
while not ret:
ret, first_frame = cap.read()
# 将第一帧转换为灰度图
prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)
# 读取当前帧
ret, second_frame = cap.read()
while not ret:
ret, second_frame = cap.read()
# 将当前帧转换为灰度图
gray = cv2.cvtColor(second_frame, cv2.COLOR_BGR2GRAY)
# 计算当前帧与上一帧的差分
diff1 = cv2.absdiff(gray, prev_gray)
# 累积初始化
accumulated_diff = np.zeros_like(gray, dtype=np.float32)
# 设置累积的衰减参数
alpha = 0.5 # 衰减系数,用于控制累积的效果
thresh = 0 # 根据噪声情况调整
frame_count = 0
curr_frame = second_frame
names = ['bird']
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame_count += 1
# 将下一帧转换为灰度图
next_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 计算下一帧与当前帧的差分
diff2 = cv2.absdiff(next_gray, gray)
# 取与运算
diff = cv2.bitwise_and(diff1, diff2)
# 去掉残影
diff = adaptive_threshold(diff)
diff = cv2.bitwise_not(diff)
# 更新累积图像 并做归一化
accumulated_diff = cv2.addWeighted(accumulated_diff, alpha, diff1.astype(np.float32), 1 - alpha, 0)
normalized_accumulated_diff_ = cv2.normalize(accumulated_diff, None, 0, 255, cv2.NORM_MINMAX)
normalized_accumulated_diff_ = normalized_accumulated_diff_.astype(np.uint8)
diff_expanded_ = np.expand_dims(normalized_accumulated_diff_, axis=2)
# _, diff = cv2.threshold(diff, 25, 255, cv2.THRESH_BINARY)
# accumulated_diff += diff.astype(np.float32)
# 将累积图像进行归一化
# normalized_accumulated_diff = cv2.normalize(diff, None, 0, 255, cv2.NORM_MINMAX)
# normalized_accumulated_diff = normalized_accumulated_diff.astype(np.uint8)
diff = diff.astype(np.uint8)
diff_expanded = np.expand_dims(diff, axis=2)
img = letterbox(curr_frame, opt.img_size, stride=32)[0]
img = img[:, :, ::-1].transpose(2, 0, 1)
img = np.ascontiguousarray(img)
gray = np.expand_dims(gray, axis=2)
two_channel_image = np.concatenate((gray, diff_expanded_), axis=2)
three_channel_image = np.concatenate((two_channel_image, diff_expanded), axis=2)
img1 = letterbox(three_channel_image, opt.img_size, stride=32)[0]
img1 = img1[:, :, ::-1].transpose(2, 0, 1)
img1 = np.ascontiguousarray(img1)
# four_channel_image = np.concatenate((frame_rgb, diff_expanded_), axis=2)
# five_channel_image = np.concatenate((four_channel_image, diff_expanded), axis=2)
img = torch.from_numpy(img).cuda()
img1 = torch.from_numpy(img1).cuda()
img = img.half() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
img1 = img1.half() # uint8 to fp16/32
img1 /= 255.0 # 0 - 255 to 0.0 - 1.0
if img1.ndimension() == 3:
img1 = img1.unsqueeze(0)
t1 = time_synchronized()
pred = model(img, img1, augment=False)[0]
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = time_synchronized()
# Print time (inference + NMS)
print(f'({t2 - t1:.6f}s, {1 / (t2 - t1):.6f}Hz)')
for i, det in enumerate(pred):
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], curr_frame.shape).round()
for *xyxy, conf, cls in reversed(det):
c = int(cls) # integer class
label = None if opt.hide_labels else (names[c] if opt.hide_conf else f'{names[c]} {conf:.2f}')
plot_one_box(xyxy, curr_frame, label=label, color=colors(c, True), line_thickness=opt.line_thickness)
cv2.imshow('YOLOv8 Inference', curr_frame)
# 显示结果
# cv2.imshow("Accumulated Frame Difference", diff)
video_name = Path(video_path).name
video_name = Path(video_name).stem
# cv2.imwrite(f'pictures/{video_name}_{frame_count:06}.jpg', curr_frame)
# cv2.imwrite(f'/home/jia/bird/images/val/{video_name}_{frame_count:06}.tiff', four_channel_image)
# tiff.imwrite(f'/home/jia/bird/images/train/{video_name}_{frame_count:06}.tiff', five_channel_image)
# cv2.imwrite(f'/home/jia/bird/images/train/{video_name}_{frame_count:06}.jpg', curr_frame)
# cv2.imwrite(f'/home/jia/bird/image/train/{video_name}_{frame_count:06}.jpg', three_channel_image)
# 更新前一帧
gray = next_gray
diff1 = diff2
curr_frame = frame
# 按下 'q' 键退出循环
if cv2.waitKey(30) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='weights/yolov11_multi_v1/weights/best.pt', help='object confidence threshold')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--img-size', nargs='+', type=int, default=[736,1280], help='inference size (pixels)')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--line-thickness', default=2, type=int, help='bounding box thickness (pixels)')
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
parser.add_argument('--hide-conf', default=True, action='store_true', help='hide confidences')
opt = parser.parse_args()
model = attempt_load(opt.weights, map_location='cuda')
model.half()
video_dir = '/home/jia/anktechDrive/09_dataset/FBD-SV-2024/videos/val'
for video in os.listdir(video_dir):
video_path = os.path.join(video_dir, video)
with torch.no_grad():
process_video(opt, video_path, model)
# video_path = '/home/jia/anktechDrive/12_原始数据/我奥赛事/20230912-175555/0.mp4'
# process_video(video_path, model)