walawala111
LMK if it works:
reqs:
opencv-python==4.5.5.64
blobconverter==1.4.3
depthai==2.27
numpy<2.0.0
import blobconverter
import cv2
import depthai as dai
from tools import *
from datetime import timedelta
def frame_norm(frame, bbox):
h, w = frame.shape[:2]
return [
int(bbox[0] * w),
int(bbox[1] * h),
int(bbox[2] * w),
int(bbox[3] * h)
]
def create_pipeline():
pipeline = dai.Pipeline()
print("Creating Color Camera...")
cam = pipeline.create(dai.node.ColorCamera)
cam.setPreviewSize(300, 300)
cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_800_P)
cam.setInterleaved(False)
cam.setBoardSocket(dai.CameraBoardSocket.RGB)
cam.setPreviewKeepAspectRatio(False)
# Passthrough manip
face_det_manip = pipeline.create(dai.node.ImageManip)
cam.preview.link(face_det_manip.inputImage)
monoLeft = pipeline.create(dai.node.MonoCamera)
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
monoRight = pipeline.create(dai.node.MonoCamera)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)
stereo = pipeline.create(dai.node.StereoDepth)
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
stereo.setDepthAlign(dai.CameraBoardSocket.RGB)
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
# Spatial Detection network
face_det_nn = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)
face_det_nn.setBoundingBoxScaleFactor(0.8)
stereo.depth.link(face_det_nn.inputDepth)
face_det_nn.setConfidenceThreshold(0.5)
face_det_nn.setBlobPath(blobconverter.from_zoo(name="face-detection-retail-0005", shaves=6))
cam.preview.link(face_det_nn.input)
# face_det_manip.out.link(face_det_nn.input) # Passthrough manip
# Script node will take the output from the face detection NN as an input and set ImageManipConfig
# to crop the initial frame
image_manip_script = pipeline.create(dai.node.Script)
face_det_nn.out.link(image_manip_script.inputs['face_det_in'])
cam.preview.link(image_manip_script.inputs['preview'])
image_manip_script.setScript("""
import time
def correct_bb(xmin,ymin,xmax,ymax):
if xmin < 0: xmin = 0.001
if ymin < 0: ymin = 0.001
if xmax > 1: xmax = 0.999
if ymax > 1: ymax = 0.999
return [xmin,ymin,xmax,ymax]
while True:
face_dets = node.io['face_det_in'].tryGet()
if face_dets is not None:
img = node.io['preview'].get()
for i, det in enumerate(face_dets.detections):
cfg = ImageManipConfig()
bb = correct_bb(det.xmin-0.03, det.ymin-0.03, det.xmax+0.03, det.ymax+0.03)
cfg.setCropRect(*bb)
cfg.setResize(60, 60)
cfg.setKeepAspectRatio(True)
node.io['manip_cfg'].send(cfg)
node.io['manip_img'].send(img)
time.sleep(0.001) # Avoid lazy looping
""")
# ImageManip for cropping and resizing face detections
recognition_manip = pipeline.create(dai.node.ImageManip)
recognition_manip.initialConfig.setResize(60, 60)
recognition_manip.setWaitForConfigInput(True)
image_manip_script.outputs['manip_cfg'].link(recognition_manip.inputConfig)
image_manip_script.outputs['manip_img'].link(recognition_manip.inputImage)
# Second stage recognition NN
print("Creating recognition Neural Network...")
recognition_nn = pipeline.create(dai.node.NeuralNetwork)
recognition_nn.setBlobPath(blobconverter.from_zoo(name="head-pose-estimation-adas-0001", shaves=6))
recognition_manip.out.link(recognition_nn.input)
# Synced output
sync_xout = pipeline.create(dai.node.XLinkOut)
sync_xout.setStreamName("sync")
# Sync all streams
sync = pipeline.create(dai.node.Sync)
cam.video.link(sync.inputs["color"])
face_det_nn.out.link(sync.inputs["detection"])
recognition_nn.out.link(sync.inputs["recognition"])
sync.setSyncThreshold(timedelta(milliseconds=40))
sync.out.link(sync_xout.input)
return pipeline
with dai.Device() as device:
device.startPipeline(create_pipeline())
inSync = device.getOutputQueue("sync", 1, False)
while True:
msgGrp = inSync.get()
frame = msgGrp["color"].getCvFrame()
detections = msgGrp["detection"].detections
recognitions = msgGrp["recognition"]
if frame is not None:
for i, detection in enumerate(detections):
bbox = frame_norm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
print(i, bbox, detection.label, detection.spatialCoordinates.z/1000)
roiData = detection.boundingBoxMapping
roi = roiData.roi
roi = roi.denormalize(frame.shape[1], frame.shape[0]) # Normalize bounding box
topLeft = roi.topLeft()
bottomRight = roi.bottomRight()
xmin = int(topLeft.x)
ymin = int(topLeft.y)
xmax = int(bottomRight.x)
ymax = int(bottomRight.y)
# Decoding of recognition results
rec = recognitions
yaw = rec.getLayerFp16('angle_y_fc')[0]
pitch = rec.getLayerFp16('angle_p_fc')[0]
roll = rec.getLayerFp16('angle_r_fc')[0]
decode_pose(yaw, pitch, roll, bbox, frame)
# Spatial BBOX mapping
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 0, 0), 2)
# Detection BBOX
cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (10, 245, 10), 2)
y = (bbox[1] + bbox[3]) // 2
coords = "Z: {:.2f} m".format(detection.spatialCoordinates.z/1000)
cv2.putText(frame, coords, (bbox[0], y + 60), cv2.FONT_HERSHEY_TRIPLEX, 1, (0, 0, 0), 8)
cv2.putText(frame, coords, (bbox[0], y + 60), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 255), 2)
cv2.imshow("Camera", frame)
if cv2.waitKey(1) == ord('q'):
break
Thanks,
Jaka