Phát hiện cử động khuôn mặt bằng Facemesh

Thư viện Mediapipe cung cấp tính năng Facemesh detection rất nhanh & chính xác. Hôm nay mình sẽ giới thiệu về khả năng, cách dùng & source code để detect facemesh.

Theo lý thuyết Mediapipe detect được 468 điểm trên khuôn mặt theo như hình bên dưới. Click vào ảnh để xem full size (4096 x 4096px) và index của các điểm.

Dựa vào các điểm đó các bạn có thể biết được 1 số thứ như:

Mắt nhắm hay mở: dùng cho phát hiện người buồn ngủ
Miệng đóng hay mở: đang cười hoặc nói
Xoay trái hoặc xoay phải
…

Ứng dụng FaceComp sử dụng mediapipe để phát hiện khi khuôn mặt nhìn trực diện thì mới nhận diện

Cài đặt thư viện

Sử dụng Python 3.6 trở lên trên Windows 10 hoặc Ubuntu 18.04 (OS khác chưa test)
pip install mediapipe

Bước 1: tìm landmarks

Landmark là tập hợp các điểm trên khuôn mặt trong tọa độ 3D (x, y, z), tìm landmarks (có s, số nhiều) là tìm nhiều khuôn mặt trong ảnh nếu có.

Thuật toán sẽ map tọa độ theo hệ trục 3D có nghĩa là bạn sẽ có thêm trục Z, dùng để tính được khoảng cách xa gần. Sau khi tìm được landmarks chúng ta vẽ tọa độ bằng function có sẵn của mediapipe.

Bước 2: convert landmark to points để tính toán

Trong bài nảy mình sẽ làm ví dụ mẫu về tính toán góc xoay trái phải của khuôn mặt. Tương tự các bạn có thể xác định mở mắt hay nhắm mắt, mở miệng hay không.

Để xác định xoay hướng nào cần xác định 2 nửa khuôn mặt trái và phải tính từ mũi (center)

Tra cứu theo coordinate ta có điểm trung tâm (lỗ mũi) là 5, má trái là 234 và má phải là 454.

So sánh giá trị trái phải, lấy giá trị lớn hơn chia cho giá trị nhỏ hơn ta được tỷ lệ. Tùy theo nhu cầu sử dụng các bạn có thể set ngưỡng cho phù hợp từ 2.0 -> 3.0 (hoặc tùy ý)

def DetectDirection(landmark):
    left = CalcDistance(landmark[5], landmark[234])
    right = CalcDistance(landmark[5], landmark[454])

    threshold = 2.5
    result = "straight"

    if(left < right):
        ratio = right / left
        if(ratio > threshold):
            result = "left"
    elif(right < left):
        ratio = left / right
        if(ratio > threshold):
            result = "right"
    
    return result

def DetectDirection(landmark):

left = CalcDistance(landmark[5], landmark[234])

right = CalcDistance(landmark[5], landmark[454])

threshold = 2.5

result = "straight"

if(left < right):

ratio = right / left

if(ratio > threshold):

result = "left"

elif(right < left):

ratio = left / right

if(ratio > threshold):

result = "right"

return result

Một số ví dụ

Full source code

import math
import time
import cv2
import mediapipe as mp


mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
        static_image_mode=True,
        max_num_faces=1,
        refine_landmarks=True,
        min_detection_confidence=0.5)

####################################################################################################

def ConvertToPoint(landmark):
    return [landmark.x, landmark.y, landmark.z]

####################################################################################################

def CalcDistance(point1, point2):
    x1, y1, z1 = ConvertToPoint(point1)
    x2, y2, z2 = ConvertToPoint(point2)
    distance = math.sqrt((x1 - x2)**2 + (y1 - y2)**2)
    return distance

####################################################################################################

def DetectLandmarks(frame):
    results = face_mesh.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    if not results.multi_face_landmarks:
        print("no face")
        return None

    landmarks = results.multi_face_landmarks
    return landmarks    
    
####################################################################################################

def DetectDirection(landmark):
    left = CalcDistance(landmark[5], landmark[234])
    right = CalcDistance(landmark[5], landmark[454])

    threshold = 2.5
    result = "straight"

    if(left < right):
        ratio = right / left
        if(ratio > threshold):
            result = "left"
    elif(right < left):
        ratio = left / right
        if(ratio > threshold):
            result = "right"
    
    return result

####################################################################################################

def DrawLandmark(frame, landmarks):
    mp_drawing.draw_landmarks(
                image=frame,
                landmark_list=landmarks,
                connections=mp_face_mesh.FACEMESH_TESSELATION,
                landmark_drawing_spec=None,
                connection_drawing_spec=mp_drawing_styles
                .get_default_face_mesh_tesselation_style())

    mp_drawing.draw_landmarks(
        image=frame,
        landmark_list=landmarks,
        connections=mp_face_mesh.FACEMESH_CONTOURS,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp_drawing_styles
        .get_default_face_mesh_contours_style())
        
    mp_drawing.draw_landmarks(
        image=frame,
        landmark_list=landmarks,
        connections=mp_face_mesh.FACEMESH_IRISES,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp_drawing_styles
        .get_default_face_mesh_iris_connections_style())

####################################################################################################

# For static images:
IMAGE_FILES = []

startTime = time.time()
if(len(IMAGE_FILES) > 0):
    for idx, file in enumerate(IMAGE_FILES):
        frame = cv2.imread(file)   
        landmarks = DetectLandmarks(frame)

        if(len(landmarks) == 0):
            continue

        landmark = landmarks[0].landmark    

        direction = DetectDirection(landmark)
        DrawLandmark(frame, landmarks[0])

        cv2.putText(frame, direction, (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0, 255), 2, cv2.LINE_AA)
        cv2.imshow("result {}".format(idx), frame)
        
    print("Elapsed: " + str(time.time() - startTime))
    cv2.waitKey()

else: #không có ảnh truyền vào thì đọc webcam
    cap = cv2.VideoCapture(0)
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("Ignoring empty camera frame.")
            # If loading a video, use 'break' instead of 'continue'.
            continue


        landmarks = DetectLandmarks(frame)

        if(len(landmarks) == 0):
            continue

        landmark = landmarks[0].landmark    

        direction = DetectDirection(landmark)
        DrawLandmark(frame, landmarks[0])

        cv2.putText(frame, direction, (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0, 255), 2, cv2.LINE_AA)
        cv2.imshow("result", frame)
        
        if cv2.waitKey(5) & 0xFF == 27:
            break

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

import math

import time

import cv2

import mediapipe as mp

mp_drawing = mp.solutions.drawing_utils

mp_drawing_styles = mp.solutions.drawing_styles

mp_face_mesh = mp.solutions.face_mesh

face_mesh = mp_face_mesh.FaceMesh(

static_image_mode=True,

max_num_faces=1,

refine_landmarks=True,

min_detection_confidence=0.5)

####################################################################################################

def ConvertToPoint(landmark):

return [landmark.x, landmark.y, landmark.z]

####################################################################################################

def CalcDistance(point1, point2):

x1, y1, z1 = ConvertToPoint(point1)

x2, y2, z2 = ConvertToPoint(point2)

distance = math.sqrt((x1 - x2)**2 + (y1 - y2)**2)

return distance

####################################################################################################

def DetectLandmarks(frame):

results = face_mesh.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

if not results.multi_face_landmarks:

print("no face")

return None

landmarks = results.multi_face_landmarks

return landmarks

####################################################################################################

def DetectDirection(landmark):

left = CalcDistance(landmark[5], landmark[234])

right = CalcDistance(landmark[5], landmark[454])

threshold = 2.5

result = "straight"

if(left < right):

ratio = right / left

if(ratio > threshold):

result = "left"

elif(right < left):

ratio = left / right

if(ratio > threshold):

result = "right"

return result

####################################################################################################

def DrawLandmark(frame, landmarks):

mp_drawing.draw_landmarks(

image=frame,

landmark_list=landmarks,

connections=mp_face_mesh.FACEMESH_TESSELATION,

landmark_drawing_spec=None,

connection_drawing_spec=mp_drawing_styles

.get_default_face_mesh_tesselation_style())

mp_drawing.draw_landmarks(

image=frame,

landmark_list=landmarks,

connections=mp_face_mesh.FACEMESH_CONTOURS,

landmark_drawing_spec=None,

connection_drawing_spec=mp_drawing_styles

.get_default_face_mesh_contours_style())

mp_drawing.draw_landmarks(

image=frame,

landmark_list=landmarks,

connections=mp_face_mesh.FACEMESH_IRISES,

landmark_drawing_spec=None,

connection_drawing_spec=mp_drawing_styles

.get_default_face_mesh_iris_connections_style())

####################################################################################################

# For static images:

IMAGE_FILES = []

startTime = time.time()

if(len(IMAGE_FILES) > 0):

for idx, file in enumerate(IMAGE_FILES):

frame = cv2.imread(file)

landmarks = DetectLandmarks(frame)

if(len(landmarks) == 0):

continue

landmark = landmarks[0].landmark

direction = DetectDirection(landmark)

DrawLandmark(frame, landmarks[0])

cv2.putText(frame, direction, (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0, 255), 2, cv2.LINE_AA)

cv2.imshow("result {}".format(idx), frame)

print("Elapsed: " + str(time.time() - startTime))

cv2.waitKey()

else: #không có ảnh truyền vào thì đọc webcam

cap = cv2.VideoCapture(0)

while cap.isOpened():

success, frame = cap.read()

if not success:

print("Ignoring empty camera frame.")

# If loading a video, use 'break' instead of 'continue'.

continue

landmarks = DetectLandmarks(frame)

if(len(landmarks) == 0):

continue

landmark = landmarks[0].landmark

direction = DetectDirection(landmark)

DrawLandmark(frame, landmarks[0])

cv2.putText(frame, direction, (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0, 255), 2, cv2.LINE_AA)

cv2.imshow("result", frame)

if cv2.waitKey(5) & 0xFF == 27:

break

Kết luận

Mediapipe detect facemesh rất nhanh & chính xác, các bạn có thể an tâm sử dụng trong nhiều bài toán. Dựa vào cách làm ở trên các bạn có thể detect mặt nghiêng góc bao nhiêu độ,… Chúc các bạn thành công

Cài đặt thư viện

Bước 1: tìm landmarks

Bước 2: convert landmark to points để tính toán

Full source code

Kết luận

Leave a Reply Cancel reply