forked from leopd/SFD_pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdetect_faces.py
executable file
·73 lines (65 loc) · 2.77 KB
/
detect_faces.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from typing import List, Tuple
torch.backends.cudnn.benchmark = True
import os,sys,cv2,random,datetime,time,math
import argparse
import numpy as np
from bbox import decode, nms
def detect_faces(net:nn.Module, img:np.ndarray, minscale:int=3, ovr_threshhold:float=0.3,
score_threshhold:float=0.5) -> List[Tuple]:
"""returns an list of tuples describing bounding boxes: [x1,y1,x2,y2,score].
Setting minscale to 0 finds the smallest faces, but takes the longest.
"""
bboxlist = detect(net, img, minscale)
keep_idx = nms(bboxlist, ovr_threshhold)
bboxlist = bboxlist[keep_idx,:]
out = []
for b in bboxlist:
x1,y1,x2,y2,s = b
if s<0.5:
continue
out.append((int(x1),int(y1),int(x2),int(y2),s))
return out
def detect(net:nn.Module, img:np.ndarray, minscale:int=3) -> torch.Tensor:
"""returns an Nx5 tensor describing bounding boxes: [x1,y1,x2,y2,score].
This will have LOTS of similar/overlapping regions. Need to call bbox.nms to reconcile them.
Setting minscale to 0 finds the smallest faces, but takes the longest.
"""
img = img - np.array([104,117,123])
img = img.transpose(2, 0, 1)
img = img.reshape((1,)+img.shape)
img = Variable(torch.from_numpy(img).float()).cuda()
BB,CC,HH,WW = img.size()
olist = net(img)
bboxlist = []
for i in range(minscale, len(olist)//2):
ocls = F.softmax(olist[i*2], dim=1).data
oreg = olist[i*2+1].data
FB,FC,FH,FW = ocls.size() # feature map size
stride = 2**(i+2) # 4,8,16,32,64,128
anchor = stride*4
# this workload is small enough that it's faster on CPU than GPU (~55ms vs ~65ms)
# but most of that time (40ms) is spend moving the data from GPU to CPU.
all_scores = ocls[0,1,:,:].cpu()
oreg = oreg.cpu()
# instead of running a sliding window, first find the places where score is big enough to bother
bigenough = torch.nonzero(all_scores > 0.05)
for hindex, windex in bigenough:
score = all_scores[hindex,windex]
loc = oreg[0,:,hindex,windex].contiguous().view(1,4)
axc,ayc = stride/2+windex*stride,stride/2+hindex*stride
priors = torch.Tensor([[axc/1.0,ayc/1.0,stride*4/1.0,stride*4/1.0]])
variances = [0.1,0.2]
box = decode(loc,priors,variances)
x1,y1,x2,y2 = box[0]*1.0
# cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1)
bboxlist.append([x1,y1,x2,y2,score])
if len(bboxlist) == 0:
bboxlist=torch.zeros((1, 5))
bboxlist = torch.Tensor(bboxlist)
return bboxlist