Facial Recognition in the browser using face-api.js not working

1.3k Views Asked by At

I am trying to make a facial recognition system in the browser using face-api.js in a svelte project. the problem is that it recognizer saved faces but only one face will be recognized. for example if there are two customer faces saved and one of the customers is on the webcam it will recognize him but also will recognize the other person as the same person, however it will correctly know that a face is unknown. so the problem is it only knows one name and gives it to all recognized faces. I don't know what causes this issue. if anyone could help or knows a better solution for facial recognition.

here is my code

<script>
  let video;
  let detections;
  let width = 320;
  let height = 320;
  let canvas, ctx;
  let container;

  const detectionOptions = {
    withLandmarks: true,
    withDescriptors: true,
    minConfidence: 0.5,
    MODEL_URLS: {
      Mobilenetv1Model:
        "https://raw.githubusercontent.com/ml5js/ml5-data-and-models/main/models/faceapi/ssd_mobilenetv1_model-weights_manifest.json",
      FaceLandmarkModel:
        "https://raw.githubusercontent.com/ml5js/ml5-data-and-models/main/models/faceapi/face_landmark_68_model-weights_manifest.json",
      FaceLandmark68TinyNet:
        "https://raw.githubusercontent.com/ml5js/ml5-data-and-models/main/models/faceapi/face_landmark_68_tiny_model-weights_manifest.json",
      FaceRecognitionModel:
        "https://raw.githubusercontent.com/ml5js/ml5-data-and-models/main/models/faceapi/face_recognition_model-weights_manifest.json",
    },
  };

  onDestroy(() => {
    video.pause();
    video.srcObject = null;
    video.src = null;
    video.remove();
    canvas.remove();
  });

  onMount(() => {
    make();
  });

  let descriptions = [];

  function getLabeledFaceDescriptions() {
    return Promise.all(
      $customers.map(async (customer) => {
        if (customer.image_url == null) return;
        for (let i = 1; i <= 2; i++) {
          const img = await faceapi.fetchImage($baseURL + customer.image_url);
          const face_detections = await faceapi
            .detectSingleFace(img)
            .withFaceLandmarks()
            .withFaceDescriptor();
          //   console.log(face_detections);

          //   console.log(face_detections, "face_detections", customer.name);

          try {
            descriptions.push(face_detections.descriptor);
            console.log(descriptions, "pushed", customer.name);
          } catch (error) {
            // console.log(error);
            // console.log("face not found", customer.name);
            return;
          }
        }
        return new faceapi.LabeledFaceDescriptors(customer.name, descriptions);
        // console.log(descriptions);
      })
    );
  }

  async function make() {
    // get the video
    video = await getVideo();

    canvas = createCanvas(width, height);
    ctx = canvas.getContext("2d");

    Promise.all([
      faceapi.nets.ssdMobilenetv1.loadFromUri(
        detectionOptions.MODEL_URLS.Mobilenetv1Model
      ),
      faceapi.nets.faceRecognitionNet.loadFromUri(
        detectionOptions.MODEL_URLS.FaceRecognitionModel
      ),
      faceapi.nets.faceLandmark68Net.loadFromUri(
        detectionOptions.MODEL_URLS.FaceLandmarkModel
      ),
    ]).then(modelReady);
  }

  // Helper Functions
  async function getVideo() {
    // Grab elements, create settings, etc.
    const videoElement = document.createElement("video");
    videoElement.setAttribute("style", "display: none;");
    videoElement.width = width;
    videoElement.height = height;
    container.appendChild(videoElement);

    // Create a webcam capture
    const capture = await navigator.mediaDevices.getUserMedia({
      video: true,
    });
    videoElement.srcObject = capture;
    videoElement.play();

    return videoElement;
  }

  function createCanvas(w, h) {
    const canvas = document.createElement("canvas");
    canvas.setAttribute("style", "border-radius: 1rem");
    canvas.width = w;
    canvas.height = h;
    container.appendChild(canvas);
    return canvas;
  }

  async function modelReady() {
    console.log("ready!");
    const labeledFaceDescriptors = await getLabeledFaceDescriptions();
    // clean labeledFaceDescriptors by removing undefined
    const cleaned = labeledFaceDescriptors.filter((x) => x !== undefined);

    const faceMatcher = new faceapi.FaceMatcher(cleaned);

    const displaySize = {
      width: video.width,
      height: video.height,
    };

    setInterval(async () => {
      detections = await faceapi
        .detectAllFaces(video)
        .withFaceLandmarks()
        .withFaceDescriptors();

      detections = faceapi.resizeResults(detections, displaySize);

      const results = detections.map((d) =>
        faceMatcher.findBestMatch(d.descriptor)
      );
      console.log(results);
      gotResults(results);
    }, 100);
  }

  function gotResults(results) {
    // Clear part of the canvas
    ctx.fillStyle = "#000000";
    ctx.fillRect(0, 0, width, height);

    ctx.drawImage(video, 0, 0, width, height);

    if (detections) {
      if (detections.length > 0) {
        drawBox(detections, results);
      }
    }
  }

  export let view_sales_function;

  function drawBox(detections, results) {
    try {
      for (let i = 0; i < detections.length; i++) {
        const alignedRect = detections[i].alignedRect;
        const x = alignedRect._box._x;
        const y = alignedRect._box._y;
        const boxWidth = alignedRect._box._width;
        const boxHeight = alignedRect._box._height;

        ctx.beginPath();
        ctx.rect(x, y, boxWidth, boxHeight);
        ctx.strokeStyle = "#a15ffb";
        ctx.stroke();
        ctx.closePath();

        // draw name on image
        const text = results[i]._label;
        const textWidth = ctx.measureText(text).width;
        const textHeight = parseInt(ctx.font, 10); // base 10
        ctx.fillStyle = "#a15ffb";
        ctx.fillRect(x, y, textWidth + 4, textHeight + 4);
        ctx.fillStyle = "#000000";
        ctx.fillText(text, x, y + textHeight);

        let view_customer;
        if (results[i]._label != "Unknown") {
          view_customer = $customers.find(
            (customer) => customer.name == results[i]._label
          );
          if (
            view_customer != "" &&
            view_customer != undefined &&
            view_customer != null
          ) {
            view_sales_function(view_customer);
          }
        }
      }
    } catch (error) {
      console.log(error);
    }
  }
</script>

<div bind:this={container} class="container z-0 rounded-2xl" />
3

There are 3 best solutions below

0
Tania On

The reason why your code is detecting only single faces is that you're using the function detectSingleFace(img).

In order to detect multiple faces you should use detectAllFaces(img) instead.

You can read more about the API here under the section called "Detecting Faces"

0
Afzal K. On

Bringing in the faceMatcher and using that in the updateDisplay() function should solve the issue of recognizing only one face. Additionally, instead of pushing the face_detections.descriptor to the descriptions array, you can create a new labeledFaceDescription for each customer and push that to the array. This way, each customer will have their own labeled face description and the faceMatcher can accurately identify each person.

<script>
  let video;
  let detections;
  let width = 320;
  let height = 320;
  let canvas, ctx;
  let container;

  const detectionOptions = {
    withLandmarks: true,
    withDescriptors: true,
    minConfidence: 0.5,
    MODEL_URLS: {
      Mobilenetv1Model:
        "https://raw.githubusercontent.com/ml5js/ml5-data-and-models/main/models/faceapi/ssd_mobilenetv1_model-weights_manifest.json",
      FaceLandmarkModel:
        "https://raw.githubusercontent.com/ml5js/ml5-data-and-models/main/models/faceapi/face_landmark_68_model-weights_manifest.json",
      FaceLandmark68TinyNet:
        "https://raw.githubusercontent.com/ml5js/ml5-data-and-models/main/models/faceapi/face_landmark_68_tiny_model-weights_manifest.json",
      FaceRecognitionModel:
        "https://raw.githubusercontent.com/ml5js/ml5-data-and-models/main/models/faceapi/face_recognition_model-weights_manifest.json",
    },
  };

  onDestroy(() => {
    video.pause();
    video.srcObject = null;
    video.src = null;
    video.remove();
    canvas.remove();
  });

  onMount(() => {
    make();
  });

  let descriptions = [];

  function getLabeledFaceDescriptions() {
    return Promise.all(
      $customers.map(async (customer) => {
        if (customer.image_url == null) return;
        for (let i = 1; i <= 2; i++) {
          const img = await faceapi.fetchImage($baseURL + customer.image_url);
          const face_detections = await faceapi
            .detectSingleFace(img)
            .withFaceLandmarks()
            .withFaceDescriptor();
          //   console.log(face_detections);

          //   console.log(face_detections, "face_detections", customer.name);

          try {
            descriptions.push(face_detections.descriptor);
            console.log(descriptions, "pushed", customer.name);
          } catch (error) {
            // console.log(error);
            // console.log("face not found", customer.name);
            return;
          }
        }
      })
    );
  }

  async function make() {
    // get the video
    video = await getVideo();

    canvas = createCanvas(width, height);
    ctx = canvas.getContext("2d");

    Promise.all([
      faceapi.nets.ssdMobilenetv1.loadFromUri(
        detectionOptions.MODEL_URLS.Mobilenetv1Model
      ),
      faceapi.nets.faceRecognitionNet.loadFromUri(
        detectionOptions.MODEL_URLS.FaceRecognitionModel
      ),
      faceapi.nets.faceLandmark68Net.loadFromUri(
        detectionOptions.MODEL_URLS.FaceLandmarkModel
      ),
    ]).then(async () => {
      // get labeled face descriptions once models are loaded
      await getLabeledFaceDescriptions();
    });
  }

  // Helper Functions
  async function getVideo() {
    // Grab elements, create settings, etc.
    const videoElement = document.createElement("video");
    videoElement.setAttribute("style", "display: none;");
    videoElement.width = width;
    videoElement.height = height;
    container.appendChild(videoElement);

    // Create a webcam capture
    const capture = await navigator.mediaDevices.getUserMedia({
      video: true
    });
    videoElement.srcObject = capture;
    videoElement.play();

    return videoElement;
  }

  function createCanvas(w, h) {
    const canvas = document.createElement("canvas");
    canvas.setAttribute("style", "border-radius: 1rem");
    canvas.width = w;
    canvas.height = h;
    container.appendChild(canvas);
    return canvas;
  }

  async function getBestMatch() {
    const detection = await faceapi
      .detectSingleFace(video)
      .withFaceLandmarks()
      .withFaceDescriptor();

    if (!detection) return;

    const faceMatcher = new faceapi.FaceMatcher($descriptions, 0.4);

    return faceMatcher.findBestMatch(detection.descriptor);
  }

  async function updateDisplay() {
    // Clear part of the canvas
    ctx.fillStyle = "#000000";
    ctx.fillRect(0, 0, width, height);

    ctx.drawImage(video, 0, 0, width, height);

    // get best match and display results
    const bestMatch = await getBestMatch();

    if (bestMatch) {
      const text = bestMatch.label;
      const textWidth = ctx.measureText(text).width;
      const textHeight = parseInt(ctx.font, 10); // base 10
      ctx.fillStyle = "#a15ffb";
      ctx.fillRect(0, 0, textWidth + 4, textHeight + 4);
      ctx.fillStyle = "#000000";
      ctx.fillText(text, 0, textHeight);

      // do something with the matched customer, like displaying their information or making a function call
      const matchedCustomer = $customers.find(
        (customer) => customer.name == bestMatch.label
      );
      handleMatchedCustomer(matchedCustomer);
    }
  }

  async function handleMatchedCustomer(customer) {
    if (!customer) return;
    // do something with the matched customer
    view_sales_function(customer);
  }

  setInterval(() => {
    updateDisplay();
  }, 100);
</script>

<div bind:this={container} class="container z-0 rounded-2xl" />

even if there are multiple people in the frame at the same time, the code should be able to recognize each person and display their name accordingly. The updateDisplay() function is called multiple times in the setInterval and each time it will check for the best match among all the descriptions in the faceMatcher array. So even if there are multiple faces at once, it should still be able to identify them correctly.

1
KANISHK KAUSHIK On

It's the issue faceapi.js It's always reflect the same images result , it cannot differentiate between known face and unknown face .My suggestion go and Use Python Libraries instead of APIs. FACENET has the most accurate results as compared to another Libraries.