Hi erik , thanks for the feedback. is the following alright?
import cv2
import depthai as dai
import numpy as np
import matplotlib.pyplot as plt
from depthai_sdk.fps import FPSHandler
import torch
# %%
class HostSync:
# adapted from https://github.com/luxonis/depthai-experiments/blob/master/gen2-depth-driven-focus/main.py
def __init__(self):
self.arrays = {}
def add_msg(self, name, msg):
if not name in self.arrays:
self.arrays[name] = []
# Add msg to array
self.arrays[name].append({"msg": msg, "seq": msg.getSequenceNum()})
synced = {}
for name, arr in self.arrays.items():
for i, obj in enumerate(arr):
if msg.getSequenceNum() == obj["seq"]:
synced[name] = obj["msg"]
if len(synced) == (2): # Color, Depth
# Remove old msgs
for name, arr in self.arrays.items():
for i, obj in enumerate(arr):
if obj["seq"] < msg.getSequenceNum():
arr.remove(obj)
else:
break
return synced
return False
# %%
device = dai.Device("184430102145421300", maxUsbSpeed=dai.UsbSpeed.SUPER_PLUS)
# %%
# Create pipeline
pipeline = dai.Pipeline()
pipeline.setXLinkChunkSize(0)
# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
xoutRgb = pipeline.create(dai.node.XLinkOut)
xoutDepth = pipeline.create(dai.node.XLinkOut)
xoutRgb.setStreamName("rgb")
xoutDepth.setStreamName("depth")
# mono and rgb properties
set_fps = 30
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP)
camRgb.setFps(set_fps)
# camRgb.setInterleaved(False)
try:
calibData = device.readCalibration2()
lensPosition = calibData.getLensPosition(dai.CameraBoardSocket.RGB)
if lensPosition:
camRgb.initialControl.setManualFocus(lensPosition)
except:
raise
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
monoLeft.setFps(set_fps)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)
monoRight.setFps(set_fps)
# stereoConfig
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
stereo.initialConfig.setMedianFilter(dai.MedianFilter.KERNEL_7x7)
stereoConfig = stereo.initialConfig.get()
stereoConfig.postProcessing.speckleFilter.enable = True
stereoConfig.postProcessing.speckleFilter.speckleRange = 50
stereoConfig.postProcessing.spatialFilter.enable = True
stereoConfig.postProcessing.spatialFilter.holeFillingRadius = 2
stereoConfig.postProcessing.spatialFilter.numIterations = 1
stereoConfig.postProcessing.thresholdFilter.minRange = 150
stereoConfig.postProcessing.thresholdFilter.maxRange = 1000
stereoConfig.postProcessing.decimationFilter.decimationFactor = 2
stereo.initialConfig.set(stereoConfig)
stereo.setLeftRightCheck(True)
stereo.setExtendedDisparity(True)
# Align depth map to rgb
stereo.setDepthAlign(dai.CameraBoardSocket.RGB)
stereo.setOutputSize(800, 599)
# Linking
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
camRgb.isp.link(xoutRgb.input)
stereo.depth.link(xoutDepth.input)
point = (0, 0)
def show_distance(event, x,y, args, params):
global point
point = (x,y)
cv2.namedWindow("12mp")
cv2.setMouseCallback("12mp", show_distance)
# %%
# obj detection
model = torch.hub.load('ultralytics/yolov5', "yolov5s")
model.classes = [0] # detects `people` only
# %%
device.startPipeline(pipeline)
calibData = device.readCalibration()
outputs = ['rgb', 'depth']
queues = [device.getOutputQueue(name, 4, False) for name in outputs]
sync = HostSync()
fps = FPSHandler()
synced_msgs = None
while True:
for q in queues:
if q.has():
synced_msgs = sync.add_msg(q.getName(), q.get())
if synced_msgs:
fps.nextIter()
print('FPS', fps.fps())
frame = synced_msgs["rgb"].getCvFrame()
if 'depth' in synced_msgs:
depthFrame: dai.ImgFrame = synced_msgs["depth"].getFrame()
fov = synced_msgs['depth'].getInstanceNum()
frame_resized = cv2.resize(frame,(800,599), interpolation=cv2.INTER_NEAREST)
# object det yolov5 on host
result = model(frame[:,:,::-1],size=840)
result = result.pandas().xyxy[0]
distance = depthFrame[point[1]][point[0]]
cv2.circle(frame_resized, point, 10, (0,0,255))
cv2.putText(frame_resized, "{}cm".format(distance), (point[0],point[1]), cv2.FONT_HERSHEY_PLAIN, 2 , (255,255,255), 2 )
cv2.imshow("12mp", frame_resized)
if cv2.waitKey(1) == ord('q'):
break
device.close()