Can someone please help me fix this issue, I am trying to use mediapipe to computer differences in facial landmarks in two different faces. The code works at first but after several steps it breaks and I am getting this error
type here
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
F0000 00:00:1704710377.941893 106652 threadpool_pthread_impl.cc:53] Check failed: res == 0 (11 vs. 0) pthread_create failed
*** Check failure stack trace: ***
@ 0x7d6b03b093e9 absl::lts_20230125::log_internal::LogMessageFatal::~LogMessageFatal()
@ 0x7d6b02ddd170 mediapipe::ThreadPool::WorkerThread::WorkerThread()
@ 0x7d6b035c79e0 mediapipe::ThreadPool::StartWorkers()
@ 0x7d6b035aa0b0 mediapipe::ThreadPoolExecutor::Start()
@ 0x7d6b035aa232 mediapipe::ThreadPoolExecutor::ThreadPoolExecutor()
@ 0x7d6b035aa407 mediapipe::ThreadPoolExecutor::Create()
@ 0x7d6b03594450 mediapipe::CalculatorGraph::CreateDefaultThreadPool()
@ 0x7d6b03594672 mediapipe::CalculatorGraph::InitializeDefaultExecutor()
@ 0x7d6b03595055 mediapipe::CalculatorGraph::InitializeExecutors()
@ 0x7d6b03595c28 mediapipe::CalculatorGraph::Initialize()
@ 0x7d6b03596314 mediapipe::CalculatorGraph::Initialize()
@ 0x7d6b0359650f mediapipe::CalculatorGraph::Initialize()
@ 0x7d6b0351295d mediapipe::python::CalculatorGraphSubmodule()::{lambda()#1}::operator()()
@ 0x7d6b03512cf9 pybind11::cpp_function::initialize<>()::{lambda()#3}::_FUN()
@ 0x7d6b02e2a992 pybind11::cpp_function::dispatcher()
@ 0x560389a37093 PyCFunction_Call
from torch import nn
import mediapipe as mp
import torch.nn.functional as F
import torch
import cv2
import numpy as np
import math
class MouthLandmarkLoss(nn.Module):
def __init__(self):
super(MouthLandmarkLoss, self).__init__()
self.mp_face_mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True,
max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=0.5)
def extract_landmarks(self, images):
landmarks_list = []
specific_landmark_indices = [61, 146, 146, 91, 91, 181, 181, 84, 84, 17, 17, 314, 314, 405, 405, 321,
321, 375, 375, 291, 61, 185, 185, 40, 40, 39, 39, 37, 37, 0, 0, 267, 267,
269, 269, 270, 270, 409, 409, 291, 78, 95, 95, 88, 88, 178, 178, 87, 87, 14,
14, 317, 317, 402, 402, 318, 318, 324, 324, 308, 78, 191, 191, 80, 80, 81,
81, 82, 82, 13, 13, 312, 312, 311, 311, 310, 310, 415, 415, 308]
for image in images:
dense_img = image.to_dense()
img = dense_img.detach().permute(1, 2, 0).cpu().numpy()
img_save = (img * 127.5 + 128).clip(0, 255).astype(np.uint8)
# Convert to BGR if the image has 3 channels
if img_save.shape[2] == 3:
img_save = cv2.cvtColor(img_save, cv2.COLOR_RGB2BGR)
face_detection = mp.solutions.face_detection.FaceDetection(min_detection_confidence=0.3)
face_detection_results = face_detection.process(img_save)
# Check if face is detected
if face_detection_results.detections:
bounding_box = face_detection_results.detections[0].location_data.relative_bounding_box
# Process the image with Mediapipe Face Mesh
face_mesh_results = self.mp_face_mesh.process(img_save)
# Extract only mouth landmarks
if face_mesh_results.multi_face_landmarks:
mouth_landmarks = face_mesh_results.multi_face_landmarks[0]
# Extract specific landmarks using the defined indices
specific_landmarks = torch.tensor([[mouth_landmarks.landmark[i].x, mouth_landmarks.landmark[i].y] for i in specific_landmark_indices], dtype=torch.float32)
# Draw smaller circles for landmarks
for landmark in specific_landmarks:
x, y = int(landmark[0] * img_save.shape[1]), int(landmark[1] * img_save.shape[0])
cv2.circle(img_save, (x, y), 1, (0, 255, 0), -1)
# Display the image with smaller circles
h, w, _ = img_save.shape
x, y, width, height = int(bounding_box.xmin * w), int(bounding_box.ymin * h), int(bounding_box.width * w), int(bounding_box.height * h)
cv2.rectangle(img_save, (x, y), (x + width, y + height), (0, 255, 0), 2)
# Append the mouth landmarks to the list
landmarks_list.append(specific_landmarks)
# Save the image with drawn smaller circles (only for debugging)
# cv2.imwrite('/content/img.png', img_save)
if landmarks_list:
return torch.stack(landmarks_list)
else:
# Handle the case when all tensors are empty
return torch.empty(0)
def calculate_distances(self, landmarks):
# Calculate pairwise Euclidean distances between landmarks
num_landmarks = landmarks.shape[1]
distances = torch.norm(landmarks.unsqueeze(1) - landmarks.unsqueeze(0), dim=-1)
return distances.view(-1, num_landmarks * num_landmarks)
def forward(self, y_hat, y):
# Assuming x and y_hat are batches of images in tensor format
# x and y_hat shape: (batch_size, channels, height, width)
# Extract landmarks using MediaPipe Face Mesh for the mouth
landmarks_y = self.extract_landmarks(y)
landmarks_y_hat = self.extract_landmarks(y_hat)
# Create masks indicating the presence of landmarks
landmarks_mask_y = torch.any(landmarks_y != 0, dim=-1)
landmarks_mask_y_hat = torch.any(landmarks_y_hat != 0, dim=-1)
# Ensure that both landmarks_y and landmarks_y_hat have at least one landmark
if not torch.any(landmarks_mask_y) or not torch.any(landmarks_mask_y_hat):
# No landmarks detected, return zero loss
return torch.tensor(0.000000001, dtype=torch.float32, device=y.device)
# Apply the masks to landmarks_y and landmarks_y_hat
landmarks_y_masked = landmarks_y[landmarks_mask_y]
landmarks_y_hat_masked = landmarks_y_hat[landmarks_mask_y_hat]
# Calculate distances between landmarks
distances_y = self.calculate_distances(landmarks_y_masked)
distances_y_hat = self.calculate_distances(landmarks_y_hat_masked)
# Round off distances to two decimal places
distances_y = torch.round(distances_y * 100) / 100
distances_y_hat = torch.round(distances_y_hat * 100) / 100
# Calculate expression loss based on rounded distances
loss = F.mse_loss(distances_y_hat, distances_y)
# Add a regularizer term to the loss
regularizer = 1000
loss *= regularizer
return loss
I tried to calculate landmark loss for in two different images and it is supposed to return a loss calculated through code using mediapipe i am using google colab pro+'s Tesla V100 gpu? here is the code