Hi,

I'm trying to implement this gaze process but it's breaking when a face in the background enters frame.

If I'm correct the pipeline is:
1. Input image -->

2. Face-detection-model ( I've tried both face-detection-retail-0004 (and 0005) -->
3. Cropped face -->

4A. landmarks-regression-retail-0009 &
4B. head-pose-estimation-adas-0001
-->
5. Cropped L+R eyes & head pose angles -->
6. Gaze-estimation-adas-0002

What confuses me is that up minus 4A ( landmarks-regression-retail-0009) this is a very similar pipeline to gen2-face-recognition which I've tested and allows me to have multiple faces simultaneously in frame.

  • jakaskerl replied to this.
  • jakaskerl

    Hey Jaka,

    Thanks for the reply but I was able to solve this issue by implementing logic to only apply gaze estimation to the largest face in frame. As my use case was to estimate gaze of a driver WITHOUT estimating gaze of backseat passengers for Oak D placed above steering wheel, my approach works.

    Hopefully my documentation below can help others encountering similar issues.

    Best,
    Vlad


    For documentation-

    My project is building heavily upon depthai-experiments/gen2-gaze-estimation. In order to record the driver's gaze estimation without having the program break each time the backseat passenger was detected, I made changes to main.py and script.py


    ORIGINAL LOGS (before implementing solution):

    Process Process-1:
    
    Traceback (most recent call last):
    
      File "C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\\Lib\\multiprocessing\\process.py", line 314, in _bootstrap
    
        self.run()
    
      File "C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\\Lib\\multiprocessing\\process.py", line 108, in run
    
        self._target(\*self._args, \*\*self._kwargs)
    
      File "C:\\Users\\srd0157\\OneDrive - subarujapan\\Desktop\\CV_test\\oakd_gaze\\gazeModule.py", line 242, in run_gaze_detection
    
        gaze = np.array(msgs["gaze"][i].getFirstLayerFp16())
    
                        \~\~\~\~\~\~\~\~\~\~\~\~^^^
    
    IndexError: list index out of range

    SOLUTION:


    In the main.py, I changed:

            msgs = sync.get_msgs()
    
            if msgs is not None:
    
                frame = msgs["color"].getCvFrame()
    
                dets = msgs["detection"].detections
    
                for i, detection in enumerate(dets):
    
                    det = BoundingBox(detection)
    
                    tl, br = det.denormalize(frame.shape)
    
                    cv2.rectangle(frame, tl, br, (10, 245, 10), 1)

    TO:

                    msgs = sync.get_msgs()
    
                    if msgs is not None:
    
                        frame = msgs["color"].getCvFrame()
    
                        dets = msgs["detection"].detections
    
                        gaze_data = msgs["gaze"]
    
                        for i, detection in enumerate(dets):
    
                            if i>= len(gaze_data):
    
                                continue
    
                            det = BoundingBox(detection)
    
                            tl, br = det.denormalize(frame.shape)
    
                            cv2.rectangle(frame, tl, br, (10, 245, 10), 1)

    The other major changes can be seen in script.py further below:


    script.py

    THE ORIGINAL face_dets section
        face_dets = node.io['face_det_in'].tryGet()
        if face_dets is not None:
            passthrough = node.io['face_pass'].get()
            seq = passthrough.getSequenceNum()
    
            # No detections, carry on
            if len(face_dets.detections) == 0:
                del sync[str(seq)]
                continue
    
            # node.warn(f"New detection {seq}")
            if len(sync) == 0: continue
            img = find_in_dict(seq, "frame")
            if img is None: continue
    
            add_to_dict(face_dets.detections[0], seq, "detections")
    
            for det in face_dets.detections:
                correct_bb(det)
    
                # To head post estimation model
                cfg1 = ImageManipConfig()
                cfg1.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax)
                cfg1.setResize(60, 60)
                cfg1.setKeepAspectRatio(False)
                node.io['headpose_cfg'].send(cfg1)
                node.io['headpose_img'].send(img)
    
                # To face landmark detection model
                cfg2 = ImageManipConfig()
                cfg2.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax)
                cfg2.setResize(48, 48)
                cfg2.setKeepAspectRatio(False)
                node.io['landmark_cfg'].send(cfg2)
                node.io['landmark_img'].send(img)
                break # Only 1 face at the time currently supported
    MY MODIFIED face_dets SECTION
    
    
        
        face_dets = node.io['face_det_in'].tryGet()
        if face_dets is not None:
            passthrough = node.io['face_pass'].get()
            seq = passthrough.getSequenceNum()
    
            # No detections, carry on
            if len(face_dets.detections) == 0:
                del sync[str(seq)]
                continue
    
            if len(sync) == 0: continue
            img = find_in_dict(seq, "frame")
            if img is None: continue
    
            # Find largest face
            largest_face = max(face_dets.detections,
                              key=lambda det: (det.xmax - det.xmin) * (det.ymax - det.ymin))
            add_to_dict(largest_face, seq, "detections")
            correct_bb(largest_face)
    
            #  should process only largest face
            cfg1 = ImageManipConfig()
            cfg1.setCropRect(largest_face.xmin, largest_face.ymin, largest_face.xmax, largest_face.ymax)
            cfg1.setResize(60, 60)
            cfg1.setKeepAspectRatio(False)
            node.io['headpose_cfg'].send(cfg1)
            node.io['headpose_img'].send(img)
    
            cfg2 = ImageManipConfig()
            cfg2.setCropRect(largest_face.xmin, largest_face.ymin, largest_face.xmax, largest_face.ymax)
            cfg2.setResize(48, 48)
            cfg2.setKeepAspectRatio(False)
            node.io['landmark_cfg'].send(cfg2)
            node.io['landmark_img'].send(img)

    vedoua
    Ok, what do you mean it is breaking? What are the logs like? Maybe there is a pipeline block..

    Thanks,
    Jaka

      jakaskerl

      Hey Jaka,

      Thanks for the reply but I was able to solve this issue by implementing logic to only apply gaze estimation to the largest face in frame. As my use case was to estimate gaze of a driver WITHOUT estimating gaze of backseat passengers for Oak D placed above steering wheel, my approach works.

      Hopefully my documentation below can help others encountering similar issues.

      Best,
      Vlad


      For documentation-

      My project is building heavily upon depthai-experiments/gen2-gaze-estimation. In order to record the driver's gaze estimation without having the program break each time the backseat passenger was detected, I made changes to main.py and script.py


      ORIGINAL LOGS (before implementing solution):

      Process Process-1:
      
      Traceback (most recent call last):
      
        File "C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\\Lib\\multiprocessing\\process.py", line 314, in _bootstrap
      
          self.run()
      
        File "C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\\Lib\\multiprocessing\\process.py", line 108, in run
      
          self._target(\*self._args, \*\*self._kwargs)
      
        File "C:\\Users\\srd0157\\OneDrive - subarujapan\\Desktop\\CV_test\\oakd_gaze\\gazeModule.py", line 242, in run_gaze_detection
      
          gaze = np.array(msgs["gaze"][i].getFirstLayerFp16())
      
                          \~\~\~\~\~\~\~\~\~\~\~\~^^^
      
      IndexError: list index out of range

      SOLUTION:


      In the main.py, I changed:

              msgs = sync.get_msgs()
      
              if msgs is not None:
      
                  frame = msgs["color"].getCvFrame()
      
                  dets = msgs["detection"].detections
      
                  for i, detection in enumerate(dets):
      
                      det = BoundingBox(detection)
      
                      tl, br = det.denormalize(frame.shape)
      
                      cv2.rectangle(frame, tl, br, (10, 245, 10), 1)

      TO:

                      msgs = sync.get_msgs()
      
                      if msgs is not None:
      
                          frame = msgs["color"].getCvFrame()
      
                          dets = msgs["detection"].detections
      
                          gaze_data = msgs["gaze"]
      
                          for i, detection in enumerate(dets):
      
                              if i>= len(gaze_data):
      
                                  continue
      
                              det = BoundingBox(detection)
      
                              tl, br = det.denormalize(frame.shape)
      
                              cv2.rectangle(frame, tl, br, (10, 245, 10), 1)

      The other major changes can be seen in script.py further below:


      script.py

      THE ORIGINAL face_dets section
          face_dets = node.io['face_det_in'].tryGet()
          if face_dets is not None:
              passthrough = node.io['face_pass'].get()
              seq = passthrough.getSequenceNum()
      
              # No detections, carry on
              if len(face_dets.detections) == 0:
                  del sync[str(seq)]
                  continue
      
              # node.warn(f"New detection {seq}")
              if len(sync) == 0: continue
              img = find_in_dict(seq, "frame")
              if img is None: continue
      
              add_to_dict(face_dets.detections[0], seq, "detections")
      
              for det in face_dets.detections:
                  correct_bb(det)
      
                  # To head post estimation model
                  cfg1 = ImageManipConfig()
                  cfg1.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax)
                  cfg1.setResize(60, 60)
                  cfg1.setKeepAspectRatio(False)
                  node.io['headpose_cfg'].send(cfg1)
                  node.io['headpose_img'].send(img)
      
                  # To face landmark detection model
                  cfg2 = ImageManipConfig()
                  cfg2.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax)
                  cfg2.setResize(48, 48)
                  cfg2.setKeepAspectRatio(False)
                  node.io['landmark_cfg'].send(cfg2)
                  node.io['landmark_img'].send(img)
                  break # Only 1 face at the time currently supported
      MY MODIFIED face_dets SECTION
      
      
          
          face_dets = node.io['face_det_in'].tryGet()
          if face_dets is not None:
              passthrough = node.io['face_pass'].get()
              seq = passthrough.getSequenceNum()
      
              # No detections, carry on
              if len(face_dets.detections) == 0:
                  del sync[str(seq)]
                  continue
      
              if len(sync) == 0: continue
              img = find_in_dict(seq, "frame")
              if img is None: continue
      
              # Find largest face
              largest_face = max(face_dets.detections,
                                key=lambda det: (det.xmax - det.xmin) * (det.ymax - det.ymin))
              add_to_dict(largest_face, seq, "detections")
              correct_bb(largest_face)
      
              #  should process only largest face
              cfg1 = ImageManipConfig()
              cfg1.setCropRect(largest_face.xmin, largest_face.ymin, largest_face.xmax, largest_face.ymax)
              cfg1.setResize(60, 60)
              cfg1.setKeepAspectRatio(False)
              node.io['headpose_cfg'].send(cfg1)
              node.io['headpose_img'].send(img)
      
              cfg2 = ImageManipConfig()
              cfg2.setCropRect(largest_face.xmin, largest_face.ymin, largest_face.xmax, largest_face.ymax)
              cfg2.setResize(48, 48)
              cfg2.setKeepAspectRatio(False)
              node.io['landmark_cfg'].send(cfg2)
              node.io['landmark_img'].send(img)