I'm using an Oak-D Pro Wide and I have noticed that the depth data coming out of the stereoDepth node seems to be wildly inaccurate. I was using a tape measure and laser range finder to compare against the camera and here are some of the distances I was getting:
Actual: 1.00m, Reported by camera: 1.21m
Actual: 2.05m, Reported by camera: 2.64m
Actual: 3.50m, Reported by camera: 4.76m
Actual: 4.32m, Reported by camera: 7.87m
I also tried to use the disparity and manually calculate the depth using the formula to make sure that it wasn't something funny with the depth property in the stereoDepth node but it's giving me the same result. I wasn't exactly sure if the depth property just provides perpendicular depth, so to avoid any possible issues I was taking these measurements using the pixels in the center of the image. I also tried to use the spectranslation settings but they didn't appear to make a noticeable difference.
I thought it may be something to do with my code so I tested this in DepthAI Viewer and unfortunately the horribly inaccurate depths also showed up in that utility as well.
I have an oak-d and an oak-d lite and both of them are reliably within the 10% depth error that is quoted in the documentation. So I'm not sure if there is just something wrong with the unit I have or if the wide FOV devices just have horrible accuracy compared to the narrow sensors.
Any help on this would be greatly appreciated.
import cv2
import numpy as np
import depthai as dai
import argparse
from datetime import timedelta
# Define some functions
def printPixelValue(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
disparity_value = disp_frame_orig[y, x]
depth_value = depth_frame_orig[y, x]
# Print out the values
print('Disparity at clicked cursor location:', disparity_value, round(depth_value, 1)/1000)
parser = argparse.ArgumentParser()
parser.add_argument('-alpha', type=float, default=None, help="Alpha scaling parameter to increase float. [0,1] valid interval.")
args = parser.parse_args()
alpha = args.alpha
# Sets the frames per second, also sets the min shutter speed as a consequence
fps = 30
# Create pipeline
pipeline = dai.Pipeline()
device = dai.Device()
# Define the nodes
camRgb = pipeline.create(dai.node.Camera)
left = pipeline.create(dai.node.MonoCamera)
right = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
imu = pipeline.create(dai.node.IMU)
sync = pipeline.create(dai.node.Sync)
x_out = pipeline.create(dai.node.XLinkOut)
# Set stream names
x_out.setStreamName("xout")
# Set properties for rgb node
rgbCamSocket = dai.CameraBoardSocket.CAM_A
camRgb.setBoardSocket(rgbCamSocket)
camRgb.setSize(1280, 800)
camRgb.setFps(fps)
# For now, RGB needs fixed focus to properly align with depth.
try:
calibData = device.readCalibration2()
lensPosition = calibData.getLensPosition(rgbCamSocket)
if lensPosition:
camRgb.initialControl.setManualFocus(lensPosition)
except:
raise
# Set properties for left/right nodes
# The disparity is computed at this resolution, then upscaled to RGB resolution
monoResolution = dai.MonoCameraProperties.SensorResolution.THE_400_P
left.setResolution(monoResolution)
left.setCamera("left")
left.setFps(fps)
right.setResolution(monoResolution)
right.setCamera("right")
right.setFps(fps)
# Set properties for stereo node
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
stereo.initialConfig.setConfidenceThreshold(200)
# stereo.initialConfig.setMedianFilter(dai.MedianFilter.KERNEL_5x5)
stereo.setLeftRightCheck(True) # LR-check is required for depth alignment
stereo.setSubpixel(True) # This adds a lot of lag on 800p mono resolution
stereo.setDepthAlign(rgbCamSocket)
stereo.setDisparityToDepthUseSpecTranslation(False)
stereo.setDepthAlignmentUseSpecTranslation(False)
print(stereo.initialConfig.getMaxDisparity())
# Set properties for IMU node
imu.enableIMUSensor(dai.IMUSensor.ROTATION_VECTOR, 120)
imu.setBatchReportThreshold(1)
imu.setMaxBatchReports(1)
# Set properties for sync node
sync.setSyncThreshold(timedelta(milliseconds=30))
sync.setSyncAttempts(-1)
# Linking
left.out.link(stereo.left)
right.out.link(stereo.right)
stereo.disparity.link(sync.inputs["disparity"])
stereo.depth.link(sync.inputs["depth"])
camRgb.video.link(sync.inputs["video"])
imu.out.link(sync.inputs["imu"])
sync.out.link(x_out.input)
camRgb.setMeshSource(dai.CameraProperties.WarpMeshSource.CALIBRATION)
if alpha is not None:
camRgb.setCalibrationAlpha(alpha)
stereo.setAlphaScaling(alpha)
# Get max disparity to normalize the disparity images
disparityMultiplier = 255.0 / stereo.initialConfig.getMaxDisparity()
# Connect to device and start pipeline
with device:
# Create the cv2 windows
cv2.namedWindow('disparity')
cv2.namedWindow('video')
cv2.namedWindow('depth')
# Configure the mouse callback
cv2.setMouseCallback('disparity', printPixelValue)
cv2.setMouseCallback('video', printPixelValue)
cv2.setMouseCallback('depth', printPixelValue)
# Get info for calculating depth from disparity
calib_data = device.readCalibration()
intrinsics = calib_data.getCameraIntrinsics(dai.CameraBoardSocket.CAM_C)
focal_length_pixels = intrinsics[0][0] # Need to divide by 2 when using 400p
baseline = 7.5
print(focal_length_pixels)
# Start the pipeline
device.startPipeline(pipeline)
# Turn on/off the IR dot projector or IR illumination
device.setIrLaserDotProjectorIntensity(0.0)
device.setIrFloodLightIntensity(0.0)
# Start the queue
queue = device.getOutputQueue(name='xout')
queue.setMaxSize(1)
queue.setBlocking(False)
while True:
msgGrp = queue.get()
imu_msg = msgGrp['imu']
video_msg = msgGrp['video']
disp_msg = msgGrp['disparity']
depth_msg = msgGrp['depth']
# IMU
# print(imu_msg.packets[-1].rotationVector.i)
# Disparity
disp_frame_orig = disp_msg.getCvFrame()
disp_frame_orig = disp_frame_orig[::2, ::2]
disp_frame = (disp_frame_orig * disparityMultiplier).astype(np.uint8)
disp_frame = cv2.applyColorMap(disp_frame, cv2.COLORMAP_JET)
# Color
video_frame_orig = video_msg.getCvFrame()
video_frame_orig = video_frame_orig[::2, ::2]
video_frame = video_frame_orig
# Depth
depth_frame_orig = depth_msg.getCvFrame()
depth_frame_orig = depth_frame_orig[::2, ::2]
depth_frame = (depth_frame_orig * (255.0 / 65535)).astype(np.uint8)
depth_frame = cv2.applyColorMap(depth_frame, cv2.COLORMAP_JET)
cv2.imshow('disparity', disp_frame)
cv2.imshow('video', video_frame)
cv2.imshow('depth', depth_frame)
if cv2.waitKey(1) == ord('q'):
break