Hi!jakaskerl
I tested it and found that when the camera is turned on and there is a face in the image, the measured depth and pose information are accurate. But if there is no face within the visible range of the camera when it is turned on, the code will get stuck. Even if a face appears in the lens after the camera is turned on, the code will still be stuck and will not run normally
I don't know if it's caused by synchronization. I made the following modifications, but they were not successful。
import blobconverter
import cv2
import depthai as dai
from tools import *
from datetime import timedelta
def frame_norm(frame, bbox):
h, w = frame.shape[:2]
return [
int(bbox[0] * w),
int(bbox[1] * h),
int(bbox[2] * w),
int(bbox[3] * h)
]
def create_pipeline():
pipeline = dai.Pipeline()
print("Creating Color Camera...")
cam = pipeline.create(dai.node.ColorCamera)
cam.setPreviewSize(300, 300)
cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
cam.setInterleaved(False)
cam.setBoardSocket(dai.CameraBoardSocket.RGB)
cam.setPreviewKeepAspectRatio(False)
# Passthrough manip
face_det_manip = pipeline.create(dai.node.ImageManip)
cam.preview.link(face_det_manip.inputImage)
monoLeft = pipeline.create(dai.node.MonoCamera)
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
monoRight = pipeline.create(dai.node.MonoCamera)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)
stereo = pipeline.create(dai.node.StereoDepth)
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
stereo.setDepthAlign(dai.CameraBoardSocket.RGB)
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
# Spatial Detection network
face_det_nn = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)
face_det_nn.setBoundingBoxScaleFactor(0.8)
stereo.depth.link(face_det_nn.inputDepth)
face_det_nn.setConfidenceThreshold(0.5)
face_det_nn.setBlobPath(blobconverter.from_zoo(name="face-detection-retail-0005", shaves=6))
cam.preview.link(face_det_nn.input)
# face_det_manip.out.link(face_det_nn.input) # Passthrough manip
# Script node will take the output from the face detection NN as an input and set ImageManipConfig
# to crop the initial frame
image_manip_script = pipeline.create(dai.node.Script)
face_det_nn.out.link(image_manip_script.inputs['face_det_in'])
cam.preview.link(image_manip_script.inputs['preview'])
image_manip_script.setScript("""
import time
def correct_bb(xmin,ymin,xmax,ymax):
if xmin < 0: xmin = 0.001
if ymin < 0: ymin = 0.001
if xmax > 1: xmax = 0.999
if ymax > 1: ymax = 0.999
return [xmin,ymin,xmax,ymax]
while True:
face_dets = node.io['face_det_in'].tryGet()
if face_dets is not None:
img = node.io['preview'].get()
for i, det in enumerate(face_dets.detections):
cfg = ImageManipConfig()
bb = correct_bb(det.xmin-0.03, det.ymin-0.03, det.xmax+0.03, det.ymax+0.03)
cfg.setCropRect(*bb)
cfg.setResize(60, 60)
cfg.setKeepAspectRatio(True)
node.io['manip_cfg'].send(cfg)
node.io['manip_img'].send(img)
time.sleep(0.001) # Avoid lazy looping
""")
# ImageManip for cropping and resizing face detections
recognition_manip = pipeline.create(dai.node.ImageManip)
recognition_manip.initialConfig.setResize(60, 60)
recognition_manip.setWaitForConfigInput(True)
image_manip_script.outputs['manip_cfg'].link(recognition_manip.inputConfig)
image_manip_script.outputs['manip_img'].link(recognition_manip.inputImage)
# Second stage recognition NN
print("Creating recognition Neural Network...")
recognition_nn = pipeline.create(dai.node.NeuralNetwork)
recognition_nn.setBlobPath(blobconverter.from_zoo(name="head-pose-estimation-adas-0001", shaves=6))
recognition_manip.out.link(recognition_nn.input)
# Synced output
sync_xout = pipeline.create(dai.node.XLinkOut)
sync_xout.setStreamName("sync")
# Sync all streams
sync = pipeline.create(dai.node.Sync)
# cam.video.link(sync.inputs["color"])
face_det_nn.out.link(sync.inputs["detection"])
recognition_nn.out.link(sync.inputs["recognition"])
sync.setSyncThreshold(timedelta(milliseconds=40))
sync.out.link(sync_xout.input)
cam_out = pipeline.create(dai.node.XLinkOut)
cam_out.setStreamName("color")
cam.video.link(cam_out.input)
return pipeline
with dai.Device() as device:
device.startPipeline(create_pipeline())
inSync = device.getOutputQueue("sync", 1, False)
incolor = device.getOutputQueue("color",1,False)
while True:
msgGrp = inSync.get()
frame = incolor.get().getCvFrame()
detections = msgGrp["detection"].detections
recognitions = msgGrp["recognition"]
if frame is not None:
for i, detection in enumerate(detections):
bbox = frame_norm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
print(i, bbox, detection.label, detection.spatialCoordinates.z / 1000)
roiData = detection.boundingBoxMapping
roi = roiData.roi
roi = roi.denormalize(frame.shape[1], frame.shape[0]) # Normalize bounding box
topLeft = roi.topLeft()
bottomRight = roi.bottomRight()
xmin = int(topLeft.x)
ymin = int(topLeft.y)
xmax = int(bottomRight.x)
ymax = int(bottomRight.y)
# Decoding of recognition results
rec = recognitions
yaw = rec.getLayerFp16('angle_y_fc')[0]
pitch = rec.getLayerFp16('angle_p_fc')[0]
roll = rec.getLayerFp16('angle_r_fc')[0]
decode_pose(yaw, pitch, roll, bbox, frame)
# Spatial BBOX mapping
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 0, 0), 2)
# Detection BBOX
cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (10, 245, 10), 2)
y = (bbox[1] + bbox[3]) // 2
coords = "Z: {:.2f} m".format(detection.spatialCoordinates.z / 1000)
cv2.putText(frame, coords, (bbox[0], y + 60), cv2.FONT_HERSHEY_TRIPLEX, 1, (0, 0, 0), 8)
cv2.putText(frame, coords, (bbox[0], y + 60), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 255), 2)
cv2.imshow("Camera", frame)
if cv2.waitKey(1) == ord('q'):
break
Thanks