Hi glitchyordis ,
To get perfectly aligned depth and RGB, you'd want to manually set lens position, as you do above. If you have an object close (eg. below 50cm), and especially very close (eg below 20cm), using AF will cause rgb-depth not being aligned. Not having perfectly aligned depth and RGB could lead to incorrect spatial detections, but usually it wouldn't, as there's bounding box scalling applied anyways.
Thanks, Erik
RGB Depth alignment
- Edited
Hi erik,
just to double check, do you mean:
- get obj det bbox
- scale bbox width height by 'bboxscalefactor' -> scaled bbox
- get depth within scaled bbox instead of obj det bbox?
Pls help me take a look at the following code for algining RGB to depth and synching; I was only able to get around 12 FPS (printed by depthai_sdk.fps FPSHandler). Any suggestions on how to improve on FPS? It gets even lower when I calc spatials based on depth frame received and bbox from host.
python
from pathlib import Path
import sys
import cv2
import depthai as dai
import numpy as np
import time
import matplotlib.pyplot as plt
import math
import PySimpleGUI as gui
from depthai_sdk.fps import FPSHandler
class HostSync:
# from https://github.com/luxonis/depthai-experiments/blob/master/gen2-depth-driven-focus/main.py
def __init__(self):
self.arrays = {}
def add_msg(self, name, msg):
if not name in self.arrays:
self.arrays[name] = []
# Add msg to array
self.arrays[name].append({"msg": msg, "seq": msg.getSequenceNum()})
synced = {}
for name, arr in self.arrays.items():
for i, obj in enumerate(arr):
if msg.getSequenceNum() == obj["seq"]:
synced[name] = obj["msg"]
break
# If there are 5 (all) synced msgs, remove all old msgs
# and return synced msgs
if len(synced) == (2): # Color, Depth
# Remove old msgs
for name, arr in self.arrays.items():
for i, obj in enumerate(arr):
if obj["seq"] < msg.getSequenceNum():
arr.remove(obj)
else:
break
return synced
return False
def printSystemInformation(info):
# from https://docs.luxonis.com/projects/api/en/latest/samples/SystemLogger/system_information/#system-information
m = 1024 * 1024 # MiB
print(f"Ddr used / total - {info.ddrMemoryUsage.used / m:.2f} / {info.ddrMemoryUsage.total / m:.2f} MiB")
print(f"Cmx used / total - {info.cmxMemoryUsage.used / m:.2f} / {info.cmxMemoryUsage.total / m:.2f} MiB")
print(f"LeonCss heap used / total - {info.leonCssMemoryUsage.used / m:.2f} / {info.leonCssMemoryUsage.total / m:.2f} MiB")
print(f"LeonMss heap used / total - {info.leonMssMemoryUsage.used / m:.2f} / {info.leonMssMemoryUsage.total / m:.2f} MiB")
t = info.chipTemperature
print(f"Chip temperature - average: {t.average:.2f}, css: {t.css:.2f}, mss: {t.mss:.2f}, upa: {t.upa:.2f}, dss: {t.dss:.2f}")
print(f"Cpu usage - Leon CSS: {info.leonCssCpuUsage.average * 100:.2f}%, Leon MSS: {info.leonMssCpuUsage.average * 100:.2f} %")
print("----------------------------------------")
# pipeline ---------------------------------------------------------------------------------------
device = dai.Device("184430102145421300", maxUsbSpeed=dai.UsbSpeed.SUPER_PLUS)
# Create pipeline
pipeline = dai.Pipeline()
pipeline.setXLinkChunkSize(0)
# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
xoutRgb = pipeline.create(dai.node.XLinkOut)
xoutDepth = pipeline.create(dai.node.XLinkOut)
xoutRgb.setStreamName("rgb")
xoutDepth.setStreamName("depth")
# Properties
set_fps = 30
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP)
camRgb.setFps(set_fps)
# camRgb.setInterleaved(False)
try:
calibData = device.readCalibration2()
lensPosition = calibData.getLensPosition(dai.CameraBoardSocket.RGB)
if lensPosition:
camRgb.initialControl.setManualFocus(lensPosition)
else:
gui.popup("No Calib data for lensposition")
except:
raise
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
monoLeft.setFps(set_fps)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)
monoRight.setFps(set_fps)
# Setting node configs
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
# this is similar doing threshold (int) -> stereo_depth.initialConfig.setConfidenceThreshold(threshold)
# Options: MEDIAN_OFF, KERNEL_3x3, KERNEL_5x5, KERNEL_7x7 (default)
stereo.initialConfig.setMedianFilter(dai.MedianFilter.KERNEL_7x7)
# stereoConfig
stereoConfig = stereo.initialConfig.get()
stereoConfig.postProcessing.speckleFilter.enable = True
stereoConfig.postProcessing.speckleFilter.speckleRange = 50
# stereoConfig.postProcessing.temporalFilter.enable = True
stereoConfig.postProcessing.spatialFilter.enable = True
stereoConfig.postProcessing.spatialFilter.holeFillingRadius = 2
stereoConfig.postProcessing.spatialFilter.numIterations = 1
stereoConfig.postProcessing.thresholdFilter.minRange = 150
stereoConfig.postProcessing.thresholdFilter.maxRange = 1000
stereoConfig.postProcessing.decimationFilter.decimationFactor = 2
stereo.initialConfig.set(stereoConfig)
# Align depth map to the perspective of RGB camera, on which inference is done
stereo.setDepthAlign(dai.CameraBoardSocket.RGB)
stereo.setLeftRightCheck(True)
stereo.setExtendedDisparity(True)
stereo.setOutputSize(800, 599)
# Linking
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
camRgb.isp.link(xoutRgb.input)
stereo.depth.link(xoutDepth.input)
sysLog = pipeline.create(dai.node.SystemLogger)
linkOut = pipeline.create(dai.node.XLinkOut)
linkOut.setStreamName("sysinfo")
sysLog.setRate(0.2)
sysLog.out.link(linkOut.input)
# tells depth when hover mouse over window
point = (0, 0)
def show_distance(event, x,y, args, params):
global point
point = (x,y)
print(f"point: {point}")
cv2.namedWindow("12mp")
cv2.setMouseCallback("12mp", show_distance)
# main ----------------------------------------------------------------------------------------
device.startPipeline(pipeline)
device.setLogLevel(dai.LogLevel.INFO)
device.setLogOutputLevel(dai.LogLevel.INFO)
calibData = device.readCalibration()
outputs = ['rgb', 'depth']
queues = [device.getOutputQueue(name, 4, False) for name in outputs]
qSysInfo = device.getOutputQueue(name="sysinfo", maxSize=4, blocking=False)
sync = HostSync()
color = (220, 220, 220)
fps = FPSHandler()
while True:
sysInfo = qSysInfo.tryGet()
if sysInfo is not None:
printSystemInformation(sysInfo)
for q in queues:
if q.has():
synced_msgs = sync.add_msg(q.getName(), q.get())
if synced_msgs:
fps.nextIter()
print('FPS', fps.fps())
frame = synced_msgs["rgb"].getCvFrame()
depthFrame = None # If debug
if 'depth' in synced_msgs:
depthFrame: dai.ImgFrame = synced_msgs["depth"].getFrame()
fov = synced_msgs['depth'].getInstanceNum()
frame_resized = cv2.resize(frame,(800,599), interpolation=cv2.INTER_NEAREST)
# get frame sizes
imgH, imgW, _ = frame.shape
resizedh, resizedw, _ = frame_resized.shape
#### without:
host obj det on frame, get bbox, cal spatial using depthFrame
####
cv2.circle(frame_resized, point, 10, (0,0,255))
distance = depthFrame[point[1]][point[0]]
cv2.putText(frame_resized, "{}cm".format(distance), (point[0],point[1]), cv2.FONT_HERSHEY_PLAIN, 2 , (255,255,255), 2 )
cv2.imshow("12mp", frame_resized)
if cv2.waitKey(1) == ord('q'):
break
device.close()
example FPS printed (No object detection on Host/device) by depthai_sdk.fps FPS Handler
FPS 14.071720381301297
FPS 13.953488372094823
Ddr used / total - 108.02 / 340.42 MiB
Cmx used / total - 2.09 / 2.50 MiB
LeonCss heap used / total - 47.33 / 77.32 MiB
LeonMss heap used / total - 4.59 / 41.23 MiB
Chip temperature - average: 46.39, css: 47.35, mss: 46.45, upa: 46.68, dss: 45.09
Cpu usage - Leon CSS: 48.04%, Leon MSS: 19.31 %
example FPS printed (object detection on Host, calc spatials on host based on depth from device) by depthai_sdk.fps FPS Handler
FPS 6.550218340610157
FPS 6.56959024720649
Ddr used / total - 108.02 / 340.42 MiB
Cmx used / total - 2.09 / 2.50 MiB
LeonCss heap used / total - 47.33 / 77.32 MiB
LeonMss heap used / total - 4.59 / 41.23 MiB
Chip temperature - average: 46.45, css: 47.35, mss: 46.45, upa: 46.00, dss: 46.00
Cpu usage - Leon CSS: 47.33%, Leon MSS: 20.71 %
Hi glitchyordis , Please provide full MRE https://docs.luxonis.com/en/latest/pages/support/
- Edited
Hi erik , here you go.
device: Oak-D PRO AF
latest version of depthai and depthai-sdk
Note device = dai.Device("111111111111111111", maxUsbSpeed=dai.UsbSpeed.SUPER_PLUS)
, please change it to your own device mxID.
# %%
from pathlib import Path
import sys
import cv2
import depthai as dai
import numpy as np
import time
import matplotlib.pyplot as plt
import math
import PySimpleGUI as gui
from depthai_sdk.fps import FPSHandler
import torch
# %%
class HostSync:
# adapted from https://github.com/luxonis/depthai-experiments/blob/master/gen2-depth-driven-focus/main.py
def __init__(self):
self.arrays = {}
def add_msg(self, name, msg):
if not name in self.arrays:
self.arrays[name] = []
# Add msg to array
self.arrays[name].append({"msg": msg, "seq": msg.getSequenceNum()})
synced = {}
for name, arr in self.arrays.items():
for i, obj in enumerate(arr):
if msg.getSequenceNum() == obj["seq"]:
synced[name] = obj["msg"]
if len(synced) == (2): # Color, Depth
# Remove old msgs
for name, arr in self.arrays.items():
for i, obj in enumerate(arr):
if obj["seq"] < msg.getSequenceNum():
arr.remove(obj)
else:
break
return synced
return False
class TextHelper:
# from: https://github.com/luxonis/depthai-experiments/blob/master/gen2-calc-spatials-on-host/utility.py
def __init__(self) -> None:
self.bg_color = (0, 0, 0)
self.color = (255, 255, 255)
self.text_type = cv2.FONT_HERSHEY_SIMPLEX
self.line_type = cv2.LINE_AA
def putText(self, frame, text, coords):
cv2.putText(frame, text, coords, self.text_type, 0.5, self.bg_color, 3, self.line_type)
cv2.putText(frame, text, coords, self.text_type, 0.5, self.color, 1, self.line_type)
def rectangle(self, frame, p1, p2):
cv2.rectangle(frame, p1, p2, self.bg_color, 3)
cv2.rectangle(frame, p1, p2, self.color, 1)
# %%
def printSystemInformation(info):
# from https://docs.luxonis.com/projects/api/en/latest/samples/SystemLogger/system_information/#system-information
m = 1024 * 1024 # MiB
print(f"Ddr used / total - {info.ddrMemoryUsage.used / m:.2f} / {info.ddrMemoryUsage.total / m:.2f} MiB")
print(f"Cmx used / total - {info.cmxMemoryUsage.used / m:.2f} / {info.cmxMemoryUsage.total / m:.2f} MiB")
print(f"LeonCss heap used / total - {info.leonCssMemoryUsage.used / m:.2f} / {info.leonCssMemoryUsage.total / m:.2f} MiB")
print(f"LeonMss heap used / total - {info.leonMssMemoryUsage.used / m:.2f} / {info.leonMssMemoryUsage.total / m:.2f} MiB")
t = info.chipTemperature
print(f"Chip temperature - average: {t.average:.2f}, css: {t.css:.2f}, mss: {t.mss:.2f}, upa: {t.upa:.2f}, dss: {t.dss:.2f}")
print(f"Cpu usage - Leon CSS: {info.leonCssCpuUsage.average * 100:.2f}%, Leon MSS: {info.leonMssCpuUsage.average * 100:.2f} %")
print("----------------------------------------")
# %%
def calc_spatials(frame, frame_resized, result, depthFrame, calibData, depthData):
# adapted from : https://github.com/luxonis/depthai-experiments/blob/master/gen2-calc-spatials-on-host/calc.py
def calc_angle(frame, offset, HFOV):
return math.atan(math.tan(HFOV / 2.0) * offset / (frame.shape[1] / 2.0))
imgH, imgW, _ = frame.shape
resizedh, resizedw, _ = frame_resized.shape
# only process the first detection
scaled_bb = {
"xmin": int(result.xmin[0]/imgW*resizedw),
"ymin": int(result.ymin[0]/imgH*resizedh),
"xmax": int(result.xmax[0]/imgW*resizedw),
"ymax": int(result.ymax[0]/imgH*resizedh),
}
depthROI = depthFrame[scaled_bb["ymin"]:scaled_bb["ymax"], scaled_bb["xmin"]:scaled_bb["xmax"]]
inRange = (150 <= depthROI) & (depthROI <= 1000) # treshold
averageDepth = np.mean(depthROI[inRange])
centroid = { # Get centroid of the ROI
'x': int((scaled_bb["xmax"] + scaled_bb["xmin"]) / 2),
'y': int((scaled_bb["ymax"] + scaled_bb["ymin"]) / 2)
}
midW = int(depthFrame.shape[1] / 2) # middle of the depth img width
midH = int(depthFrame.shape[0] / 2) # middle of the depth img height
bb_x_pos = centroid['x'] - midW
bb_y_pos = centroid['y'] - midH
HFOV = np.deg2rad(calibData.getFov(dai.CameraBoardSocket(depthData.getInstanceNum())))
angle_x = calc_angle(depthFrame, bb_x_pos, HFOV)
angle_y = calc_angle(depthFrame, bb_y_pos, HFOV)
spatials = {
'z': averageDepth,
'x': averageDepth * math.tan(angle_x),
'y': -averageDepth * math.tan(angle_y)
}
return spatials, scaled_bb, centroid
# %%
device = dai.Device("184430102145421300", maxUsbSpeed=dai.UsbSpeed.SUPER_PLUS)
# %%
# Create pipeline
pipeline = dai.Pipeline()
pipeline.setXLinkChunkSize(0)
# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
xoutRgb = pipeline.create(dai.node.XLinkOut)
xoutDepth = pipeline.create(dai.node.XLinkOut)
xoutRgb.setStreamName("rgb")
xoutDepth.setStreamName("depth")
# mono and rgb properties
set_fps = 30
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP)
camRgb.setFps(set_fps)
# camRgb.setInterleaved(False)
try:
calibData = device.readCalibration2()
lensPosition = calibData.getLensPosition(dai.CameraBoardSocket.RGB)
if lensPosition:
camRgb.initialControl.setManualFocus(lensPosition)
else:
gui.popup("No Calib data for lensposition")
except:
raise
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
monoLeft.setFps(set_fps)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)
monoRight.setFps(set_fps)
# stereoConfig
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
stereo.initialConfig.setMedianFilter(dai.MedianFilter.KERNEL_7x7)
stereoConfig = stereo.initialConfig.get()
stereoConfig.postProcessing.speckleFilter.enable = True
stereoConfig.postProcessing.speckleFilter.speckleRange = 50
stereoConfig.postProcessing.spatialFilter.enable = True
stereoConfig.postProcessing.spatialFilter.holeFillingRadius = 2
stereoConfig.postProcessing.spatialFilter.numIterations = 1
stereoConfig.postProcessing.thresholdFilter.minRange = 150
stereoConfig.postProcessing.thresholdFilter.maxRange = 1000
stereoConfig.postProcessing.decimationFilter.decimationFactor = 2
stereo.initialConfig.set(stereoConfig)
stereo.setLeftRightCheck(True)
stereo.setExtendedDisparity(True)
# Align depth map to rgb
stereo.setDepthAlign(dai.CameraBoardSocket.RGB)
stereo.setOutputSize(800, 599)
# Linking
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
camRgb.isp.link(xoutRgb.input)
stereo.depth.link(xoutDepth.input)
# logger
sysLog = pipeline.create(dai.node.SystemLogger)
linkOut = pipeline.create(dai.node.XLinkOut)
linkOut.setStreamName("sysinfo")
sysLog.setRate(0.5)
sysLog.out.link(linkOut.input)
# gets coordinate (pixels) of mouse on cv2 window when mouse hover overs cv2 window
point = (0, 0)
def show_distance(event, x,y, args, params):
global point
point = (x,y)
# print(f"point: {point}")
cv2.namedWindow("12mp")
cv2.setMouseCallback("12mp", show_distance)
# %%
# obj detection
model = torch.hub.load('ultralytics/yolov5', "yolov5s")
model.classes = [0] # detects `people` only
# %%
device.startPipeline(pipeline)
device.setLogLevel(dai.LogLevel.INFO)
device.setLogOutputLevel(dai.LogLevel.INFO)
calibData = device.readCalibration()
outputs = ['rgb', 'depth']
queues = [device.getOutputQueue(name, 4, False) for name in outputs]
qSysInfo = device.getOutputQueue(name="sysinfo", maxSize=4, blocking=False)
sync = HostSync()
fps = FPSHandler()
text = TextHelper()
synced_msgs = None
while True:
sysInfo = qSysInfo.tryGet()
if sysInfo is not None:
printSystemInformation(sysInfo)
for q in queues:
if q.has():
synced_msgs = sync.add_msg(q.getName(), q.get())
if synced_msgs:
fps.nextIter()
print('FPS', fps.fps())
frame = synced_msgs["rgb"].getCvFrame()
if 'depth' in synced_msgs:
depthFrame: dai.ImgFrame = synced_msgs["depth"].getFrame()
fov = synced_msgs['depth'].getInstanceNum()
frame_resized = cv2.resize(frame,(800,599), interpolation=cv2.INTER_NEAREST)
# object det yolov5 on host
result = model(frame[:,:,::-1],size=840)
result = result.pandas().xyxy[0]
if len(result)>0:
spatials, scaled_bb, centroid = calc_spatials(frame, frame_resized, result, depthFrame, calibData, synced_msgs["depth"])
text.rectangle(frame_resized, (scaled_bb["xmin"], scaled_bb["ymin"]), (scaled_bb["xmax"], scaled_bb["ymax"]))
text.putText(frame_resized, "X: " + ("{:.1f}cm".format(spatials['x']/10) if not math.isnan(spatials['x']) else "--"), (centroid["x"] + 10, centroid["y"] + 20))
text.putText(frame_resized, "Y: " + ("{:.1f}cm".format(spatials['y']/10) if not math.isnan(spatials['y']) else "--"), (centroid["x"] + 10, centroid["y"] + 35))
text.putText(frame_resized, "Z: " + ("{:.1f}cm".format(spatials['z']/10) if not math.isnan(spatials['z']) else "--"), (centroid["x"] + 10, centroid["y"] + 50))
distance = depthFrame[point[1]][point[0]]
cv2.circle(frame_resized, point, 10, (0,0,255))
cv2.putText(frame_resized, "{}cm".format(distance), (point[0],point[1]), cv2.FONT_HERSHEY_PLAIN, 2 , (255,255,255), 2 )
cv2.imshow("12mp", frame_resized)
if cv2.waitKey(1) == ord('q'):
break
device.close()
Hi glitchyordis , This is far from MRE. Please make is as short as possible.
- Edited
Hi erik , thanks for the feedback. is the following alright?
import cv2
import depthai as dai
import numpy as np
import matplotlib.pyplot as plt
from depthai_sdk.fps import FPSHandler
import torch
# %%
class HostSync:
# adapted from https://github.com/luxonis/depthai-experiments/blob/master/gen2-depth-driven-focus/main.py
def __init__(self):
self.arrays = {}
def add_msg(self, name, msg):
if not name in self.arrays:
self.arrays[name] = []
# Add msg to array
self.arrays[name].append({"msg": msg, "seq": msg.getSequenceNum()})
synced = {}
for name, arr in self.arrays.items():
for i, obj in enumerate(arr):
if msg.getSequenceNum() == obj["seq"]:
synced[name] = obj["msg"]
if len(synced) == (2): # Color, Depth
# Remove old msgs
for name, arr in self.arrays.items():
for i, obj in enumerate(arr):
if obj["seq"] < msg.getSequenceNum():
arr.remove(obj)
else:
break
return synced
return False
# %%
device = dai.Device("184430102145421300", maxUsbSpeed=dai.UsbSpeed.SUPER_PLUS)
# %%
# Create pipeline
pipeline = dai.Pipeline()
pipeline.setXLinkChunkSize(0)
# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
xoutRgb = pipeline.create(dai.node.XLinkOut)
xoutDepth = pipeline.create(dai.node.XLinkOut)
xoutRgb.setStreamName("rgb")
xoutDepth.setStreamName("depth")
# mono and rgb properties
set_fps = 30
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP)
camRgb.setFps(set_fps)
# camRgb.setInterleaved(False)
try:
calibData = device.readCalibration2()
lensPosition = calibData.getLensPosition(dai.CameraBoardSocket.RGB)
if lensPosition:
camRgb.initialControl.setManualFocus(lensPosition)
except:
raise
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
monoLeft.setFps(set_fps)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)
monoRight.setFps(set_fps)
# stereoConfig
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
stereo.initialConfig.setMedianFilter(dai.MedianFilter.KERNEL_7x7)
stereoConfig = stereo.initialConfig.get()
stereoConfig.postProcessing.speckleFilter.enable = True
stereoConfig.postProcessing.speckleFilter.speckleRange = 50
stereoConfig.postProcessing.spatialFilter.enable = True
stereoConfig.postProcessing.spatialFilter.holeFillingRadius = 2
stereoConfig.postProcessing.spatialFilter.numIterations = 1
stereoConfig.postProcessing.thresholdFilter.minRange = 150
stereoConfig.postProcessing.thresholdFilter.maxRange = 1000
stereoConfig.postProcessing.decimationFilter.decimationFactor = 2
stereo.initialConfig.set(stereoConfig)
stereo.setLeftRightCheck(True)
stereo.setExtendedDisparity(True)
# Align depth map to rgb
stereo.setDepthAlign(dai.CameraBoardSocket.RGB)
stereo.setOutputSize(800, 599)
# Linking
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
camRgb.isp.link(xoutRgb.input)
stereo.depth.link(xoutDepth.input)
point = (0, 0)
def show_distance(event, x,y, args, params):
global point
point = (x,y)
cv2.namedWindow("12mp")
cv2.setMouseCallback("12mp", show_distance)
# %%
# obj detection
model = torch.hub.load('ultralytics/yolov5', "yolov5s")
model.classes = [0] # detects `people` only
# %%
device.startPipeline(pipeline)
calibData = device.readCalibration()
outputs = ['rgb', 'depth']
queues = [device.getOutputQueue(name, 4, False) for name in outputs]
sync = HostSync()
fps = FPSHandler()
synced_msgs = None
while True:
for q in queues:
if q.has():
synced_msgs = sync.add_msg(q.getName(), q.get())
if synced_msgs:
fps.nextIter()
print('FPS', fps.fps())
frame = synced_msgs["rgb"].getCvFrame()
if 'depth' in synced_msgs:
depthFrame: dai.ImgFrame = synced_msgs["depth"].getFrame()
fov = synced_msgs['depth'].getInstanceNum()
frame_resized = cv2.resize(frame,(800,599), interpolation=cv2.INTER_NEAREST)
# object det yolov5 on host
result = model(frame[:,:,::-1],size=840)
result = result.pandas().xyxy[0]
distance = depthFrame[point[1]][point[0]]
cv2.circle(frame_resized, point, 10, (0,0,255))
cv2.putText(frame_resized, "{}cm".format(distance), (point[0],point[1]), cv2.FONT_HERSHEY_PLAIN, 2 , (255,255,255), 2 )
cv2.imshow("12mp", frame_resized)
if cv2.waitKey(1) == ord('q'):
break
device.close()
- Edited
Hi glitchyordis ,
After removing a bunch of unneeded code, I get to about 17FPS for both depth and RGB. A single 12MP frame (isp, meaning YUV420, 1.5bytes/pixel) is 18MB (144Mbits), so at 17FPS that would be 2.5gpbs. Depth isn't nearly as large.
Looking at OAK bandwidth test where I got max 2273mbps for downlink, 2.5gpbs (+depth) is actually quite good.
So overall, the bandwidth is the bottleneck here. I don't have 10gbps support on my laptop so I can't select SUPER_PLUS speeds. Tested code:
Thanks, Erik
- Edited
Hi erik , thank you for the explanation. Got a few questions for Oak-D Pro:
- After performing (Depth alignment with 12MP RGB) https://gist.github.com/Erol444/25f374fa18efa7939ec9bb848b39249a, if we calculate the spatial based on ROI, are the spatial (x,y,z) relative to RGB camera or mono cam?
- Is it recommended to perform rgb-undistorted depth alignment ?
Hi glitchyordis ,
- They are relative to RGB camera, as depth was aligned to RGB camera.
- Usually that's not needed - it was developed for wide FOV cameras (OAK-D W*), where images are very distorted.
Thanks, Erik
- Edited
Hey eric I have the folllowing setup .
a) .I am running a object detect using Yolo and finding out depth
stereo.setDepthAlign(dai.CameraBoardSocket.RGB) .
spatialDetectionNetwork = pipeline.createYoloSpatialDetectionNetwork()
in this setup the depth coming out seems to be wrt to RGB
b)I am using performing the aruco detection without Depthalign and using
hostSpatials = HostSpatialsCalc(device)
In this the depth coming out be the right camera.
i want a consistent location wrt to rgb camera
I should be able to change the camera reference whenever i want
Can you tell how to achieve this?
Hi ManiRadhakrishnan
Any specific reason why you are not using the depthalign in the second case as well?
jakaskerl I am using depth Align only but I don't know how to verify that …and the values of location is very high
ManiRadhakrishnan I am using performing the aruco detection without Depthalign and using
hostSpatials = HostSpatialsCalc(device)
In this the depth coming out be the right camera.
If you align the depth to the RGB camera, then that depth map will be aligned to that camera. What do you mean the values are high?
ManiRadhakrishnan I should be able to change the camera reference whenever i want
You should be able to change the alignment by altering the stereo config during runtime (not completely sure).
Thanks,
Jaka
The depth value coming out is in 1 metres ,2 metres .Before using the align ,it was perfect with 2 mm accuarcy.
Hi ManiRadhakrishnan
Are you talking about the depth map here? - it should stay the same. If the error is in the end result, it means something if wrong with the calculation you are running on host side.
ManiRadhakrishnan hostSpatials = HostSpatialsCalc(device)
Where is this HostSpatialsCalc from?
Thanks,
Jaka