Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor face extraction and alignment logic; enhance bounding box transformation and facial landmark adjustments #1414

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 107 additions & 45 deletions deepface/modules/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ def extract_face(
mouth_left = facial_area.mouth_left
mouth_right = facial_area.mouth_right

# Expand the facial area if needed
if expand_percentage > 0:
# Expand the facial region height and width by the provided percentage
# ensuring that the expanded region stays within img.shape limits
Expand All @@ -300,48 +301,53 @@ def extract_face(
# extract detected face unaligned
detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
# align original image, then find projection of detected face area after alignment
if align is True: # and left_eye is not None and right_eye is not None:

if align:
# we were aligning the original image before, but this comes with an extra cost
# instead we now focus on the facial area with a margin
# and align it instead of original image to decrese the cost
# and align it instead of original image to decrease the cost
# 1. Extract sub-image (bigger region) around face
sub_img, relative_x, relative_y = extract_sub_image(img=img, facial_area=(x, y, w, h))

aligned_sub_img, angle = align_img_wrt_eyes(
# 2. Align sub_img wrt eyes
aligned_sub_img, angle, M, new_w, new_h = align_img_wrt_eyes(
img=sub_img, left_eye=left_eye, right_eye=right_eye
)

rotated_x1, rotated_y1, rotated_x2, rotated_y2 = project_facial_area(
facial_area=(
relative_x,
relative_y,
relative_x + w,
relative_y + h,
),
angle=angle,
size=(sub_img.shape[0], sub_img.shape[1]),
# 3. Transform the bounding box (relative_x, relative_y, w, h)
# in sub_img space => bounding box corners => then rotate them
x1_rel = relative_x
y1_rel = relative_y
x2_rel = x1_rel + w
y2_rel = y1_rel + h

(rotated_x1, rotated_y1, rotated_x2, rotated_y2) = transform_bounding_box(
x1_rel, y1_rel, x2_rel, y2_rel, M, new_w, new_h
)
detected_face = aligned_sub_img[
int(rotated_y1) : int(rotated_y2), int(rotated_x1) : int(rotated_x2)
]

# do not spend memory for these temporary variables anymore
del aligned_sub_img, sub_img
# 4. Crop from rotated image
detected_face = aligned_sub_img[rotated_y1:rotated_y2, rotated_x1:rotated_x2]

# restore x, y, le and re before border added
x = x - width_border
y = y - height_border
# w and h will not change
if left_eye is not None:
left_eye = (left_eye[0] - width_border, left_eye[1] - height_border)
if right_eye is not None:
right_eye = (right_eye[0] - width_border, right_eye[1] - height_border)
if nose is not None:
nose = (nose[0] - width_border, nose[1] - height_border)
if mouth_left is not None:
mouth_left = (mouth_left[0] - width_border, mouth_left[1] - height_border)
if mouth_right is not None:
mouth_right = (mouth_right[0] - width_border, mouth_right[1] - height_border)
# 5. Adjust original facial_area's top-left in case we have added borders
x -= width_border
y -= height_border

# 6. Re-position facial landmarks
def shift_landmark(landmark):
if landmark is not None:
return (landmark[0] - width_border, landmark[1] - height_border)
return None

left_eye = shift_landmark(left_eye)
right_eye = shift_landmark(right_eye)
nose = shift_landmark(nose)
mouth_left = shift_landmark(mouth_left)
mouth_right = shift_landmark(mouth_right)

# free memory from big arrays
del aligned_sub_img, sub_img

# Return final DetectedFace
return DetectedFace(
img=detected_face,
facial_area=FacialAreaRegion(
Expand Down Expand Up @@ -412,38 +418,94 @@ def extract_sub_image(
return extracted_face, relative_x, relative_y


def transform_bounding_box(
x1: float, y1: float, x2: float, y2: float, M: np.ndarray, new_w: int, new_h: int
) -> Tuple[int, int, int, int]:
"""
Applies the rotation matrix M to the corners of the bounding box,
then returns the bounding box (min_x, min_y, max_x, max_y) in the rotated image space.
All values are clamped to the new image boundaries.
"""
# corners in homogeneous coordinates
corners = np.array(
[[x1, y1, 1.0], [x2, y1, 1.0], [x2, y2, 1.0], [x1, y2, 1.0]], dtype=np.float32
)

# apply transformation
transformed = (M @ corners.T).T # shape (4, 2)

xs = transformed[:, 0]
ys = transformed[:, 1]

min_x, max_x = np.min(xs), np.max(xs)
min_y, max_y = np.min(ys), np.max(ys)

# clamp to boundaries
min_x = max(0, min_x)
min_y = max(0, min_y)
max_x = min(new_w, max_x)
max_y = min(new_h, max_y)

return int(min_x), int(min_y), int(max_x), int(max_y)


def align_img_wrt_eyes(
img: np.ndarray,
left_eye: Optional[Union[list, tuple]],
right_eye: Optional[Union[list, tuple]],
) -> Tuple[np.ndarray, float]:
) -> Tuple[np.ndarray, float, np.ndarray, int, int]:
"""
Align a given image horizantally with respect to their left and right eye locations
Align a given image horizontally with respect to the left and right eye locations.
Returns the rotated image, rotation angle in degrees, rotation matrix M, and final image size.
Args:
img (np.ndarray): pre-loaded image with detected face
left_eye (list or tuple): coordinates of left eye with respect to the person itself
right_eye(list or tuple): coordinates of right eye with respect to the person itself
Returns:
img (np.ndarray): aligned facial image
rotated_img (np.ndarray): The rotated image
angle (float): The rotation angle in degrees
M (np.ndarray): The rotation matrix
new_w (int): The width of the rotated image
new_h (int): The height of the rotated image
"""
# if eye could not be detected for the given image, return image itself
if left_eye is None or right_eye is None:
return img, 0

# sometimes unexpectedly detected images come with nil dimensions
if img.shape[0] == 0 or img.shape[1] == 0:
return img, 0
# if eyes are missing or image has nil dimensions, do nothing
if left_eye is None or right_eye is None or img.shape[0] == 0 or img.shape[1] == 0:
return img, 0.0, np.eye(3, dtype=np.float32), img.shape[1], img.shape[0]

angle = float(np.degrees(np.arctan2(left_eye[1] - right_eye[1], left_eye[0] - right_eye[0])))
# compute rotation angle
dy = left_eye[1] - right_eye[1]
dx = left_eye[0] - right_eye[0]
angle = float(np.degrees(np.arctan2(dy, dx)))

(h, w) = img.shape[:2]
center = (w // 2, h // 2)

# get initial rotation matrix
M = cv2.getRotationMatrix2D(center, angle, 1.0)
img = cv2.warpAffine(
img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0)

# compute the absolute values of cos and sin
cos_val = np.abs(M[0, 0])
sin_val = np.abs(M[0, 1])

# compute new bounding dimensions after rotation
new_w = int((h * sin_val) + (w * cos_val))
new_h = int((h * cos_val) + (w * sin_val))

# adjust the rotation matrix to account for translation
M[0, 2] += (new_w / 2) - center[0]
M[1, 2] += (new_h / 2) - center[1]

# rotate the image with a resized bounding box
rotated_img = cv2.warpAffine(
img,
M,
(new_w, new_h),
flags=cv2.INTER_CUBIC,
borderMode=cv2.BORDER_CONSTANT,
borderValue=(0, 0, 0),
)

return img, angle
return rotated_img, angle, M, new_w, new_h


def project_facial_area(
Expand Down
Loading