Aligning full FoV color camera stream with depth camera

xperroni

A little while ago I was writing a simple script to produce a combined RGB / depth image stream. I first configured the color camera to its maximum resolution, downscaled by 1/3 so I could get the camera's full FoV:

COLOR_CAMERA_SOCKET = dai.CameraBoardSocket.CAM_A

color_camera = pipeline.create(dai.node.ColorCamera)
color_camera.setBoardSocket(COLOR_CAMERA_SOCKET)
color_camera.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP)
color_camera.setIspScale(1, 3)

I then aligned the stereo depth node to the color camera:

depth_camera = pipeline.create(dai.node.StereoDepth)
depth_camera.setDepthAlign(COLOR_CAMERA_SOCKET)

After setting up other configurations and nodes as needed (see full script at the end of the post), I was surprised to find that the script would fail with the following error:

[1944301041162F1200] [1.3] [1.609] [StereoDepth(3)] [error] Disparity/depth width must be multiple of 16, but RGB camera width is 1352. Set output size explicitly using 'setOutputSize(width, height)'.

It turned out that the setDepthAlign() call was trying to set the depth stream to the same resolution of 1352x1014 as the color camera stream — a third of the full 12 MP resolution of 4056x3040 — and that's an invalid setting.

To compensate for that, I had to manually set the depth stream dimensions by calling setOutputSize(), as described in the error message. I set it to the same resolution as the right monochrome camera (the left would work just as well), which was set to 640X480. This setting has the advantage of being the same 4:3 aspect ratio as the color stream's, which makes the images easy to align.

See below for the full script. Notice that I don't bother to rectify the color images, since the distortions in the color camera are already small enough for my purposes.

#!/usr/bin/env python3

import cv2
import depthai as dai
from datetime import timedelta
import numpy as np

COLOR_CAMERA_SOCKET = dai.CameraBoardSocket.CAM_A

FPS = 18.0

pipeline = dai.Pipeline()

# In order to obtain a video stream with the full FoV from the color camera,
# first set its resolution to 12 MP, then downscale images by 1/3 to avoid cropping.
color_camera = pipeline.create(dai.node.ColorCamera)
color_camera.setBoardSocket(COLOR_CAMERA_SOCKET)
color_camera.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP)
color_camera.setIspScale(1, 3)
color_camera.setFps(FPS)

left_camera = pipeline.create(dai.node.MonoCamera)
left_camera.setResolution(dai.MonoCameraProperties.SensorResolution.THE_480_P)
left_camera.setCamera('left')
left_camera.setFps(FPS)

right_camera = pipeline.create(dai.node.MonoCamera)
right_camera.setResolution(dai.MonoCameraProperties.SensorResolution.THE_480_P)
right_camera.setCamera('right')
right_camera.setFps(FPS)

# Align the stereo depth camera with the color camera. The resolution is manually set to that
# of the right monochrome camera, which is the same 4:3 aspect ratio as the color camera's.
depth_camera = pipeline.create(dai.node.StereoDepth)
depth_camera.setOutputSize(right_camera.getResolutionWidth(), right_camera.getResolutionHeight())
depth_camera.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_ACCURACY)
depth_camera.initialConfig.setMedianFilter(dai.MedianFilter.KERNEL_7x7)
depth_camera.setDepthAlign(COLOR_CAMERA_SOCKET)
depth_camera.setLeftRightCheck(True)
depth_camera.setRectification(True)
depth_camera.setSubpixel(True)

# The synchronization node allows color and depth frames
# to be retrieved from the camera in the same operation.
sync = pipeline.create(dai.node.Sync)
sync.setSyncThreshold(timedelta(seconds=0.5 / FPS))

output = pipeline.create(dai.node.XLinkOut)
output.setStreamName('output')

left_camera.out.link(depth_camera.left)
right_camera.out.link(depth_camera.right)
color_camera.isp.link(sync.inputs['color'])
depth_camera.depth.link(sync.inputs['depth'])
sync.out.link(output.input)

color_width = color_camera.getIspWidth()
color_height = color_camera.getIspHeight()

with dai.Device(pipeline) as device:
    cv2.namedWindow('Overlay', cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO | cv2.WINDOW_GUI_NORMAL)

    queue = device.getOutputQueue(name='output', maxSize=4, blocking=False)

    while True:
        messages = queue.get()
        color_image = messages['color'].getCvFrame()
        depth_image = messages['depth'].getCvFrame()

        # Turn the depth image into a color map.
        depth_image = (depth_image * (255 / 4000)).astype(np.uint8)
        depth_image = cv2.applyColorMap(depth_image, cv2.COLORMAP_JET)
        depth_image = cv2.resize(depth_image, (color_width, color_height))

        # Overlay the color and depth images for display.
        overlay = (color_image // 2) + (depth_image // 2)
        cv2.imshow('Overlay', overlay)

        if cv2.waitKey(1) == ord('q'):
            break

jakaskerl

xperroni nice! thanks for sharing!

xperroni

Addendum for wide FoV cameras: because the OV9282 monochrome sensor doesn't support any resolutions with a 4:3 aspect ratio, in that case we can't just resize depth images to the color camera's resolution. Instead, we need to resize them proportionally to the color camera's width, then merge them into the color images while taking into account the difference in height. Distortion effects are also more visible in this case, so it makes sense to rectify the color images. See below the updated script:

#!/usr/bin/env python3

import cv2
import depthai as dai
from datetime import timedelta
import numpy as np

COLOR_CAMERA_SOCKET = dai.CameraBoardSocket.CAM_A

FPS = 18.0

pipeline = dai.Pipeline()

# In order to obtain a video stream with the full FoV from the color camera,
# first set its resolution to 12 MP, then downscale images by 1/3 to avoid cropping.
# This results in an output resolution of 1352x1014.
color_camera = pipeline.create(dai.node.ColorCamera)
color_camera.setBoardSocket(COLOR_CAMERA_SOCKET)
color_camera.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP)
color_camera.setIspScale(1, 3)
color_camera.setFps(FPS)

left_camera = pipeline.create(dai.node.MonoCamera)
left_camera.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
left_camera.setCamera('left')
left_camera.setFps(FPS)

right_camera = pipeline.create(dai.node.MonoCamera)
right_camera.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
right_camera.setCamera('right')
right_camera.setFps(FPS)

# The resolution of the depth camera will be set to that of the monochrome cameras.
# This has to be done explicitly to override setDepthAlign()'s (see below) attempt
# to set the depth camera resolution to be the same as the color camera's, which
# would fail because the width of the depth image has to be a multiple of 16.
depth_width = right_camera.getResolutionWidth()
depth_height = right_camera.getResolutionHeight()

# Align the stereo depth camera with the color camera.
depth_camera = pipeline.create(dai.node.StereoDepth)
depth_camera.setOutputSize(depth_width, depth_height)
depth_camera.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_ACCURACY)
depth_camera.initialConfig.setMedianFilter(dai.MedianFilter.KERNEL_7x7)
depth_camera.setDepthAlign(COLOR_CAMERA_SOCKET)
depth_camera.setLeftRightCheck(True)
depth_camera.setRectification(True)
depth_camera.setSubpixel(True)

# The synchronization node allows color and depth frames
# to be retrieved from the camera in the same operation.
sync = pipeline.create(dai.node.Sync)
sync.setSyncThreshold(timedelta(seconds=0.5 / FPS))

output = pipeline.create(dai.node.XLinkOut)
output.setStreamName('output')

left_camera.out.link(depth_camera.left)
right_camera.out.link(depth_camera.right)
color_camera.isp.link(sync.inputs['color'])
depth_camera.depth.link(sync.inputs['depth'])
sync.out.link(output.input)

color_width = color_camera.getIspWidth()
color_height = color_camera.getIspHeight()

# Compute the region of the color image where the depth image will be pasted into.
depth_roi_height = depth_height * color_width // depth_width
depth_roi_top = (color_height - depth_roi_height) // 2
depth_roi = slice(depth_roi_top, depth_roi_top + depth_roi_height)

with dai.Device(pipeline) as device:
    cv2.namedWindow('Overlay', cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO | cv2.WINDOW_GUI_EXPANDED)

    calibration = device.readCalibration()
    color_distortion = np.array(calibration.getDistortionCoefficients(COLOR_CAMERA_SOCKET))
    color_intrinsics = np.array(calibration.getCameraIntrinsics(COLOR_CAMERA_SOCKET, color_width, color_height))

    queue = device.getOutputQueue(name='output', maxSize=4, blocking=False)

    while True:
        messages = queue.get()
        color_image = cv2.undistort(messages['color'].getCvFrame(), color_intrinsics, color_distortion)
        depth_image = messages['depth'].getCvFrame()

        # Turn the depth image into a color map.
        depth_image = (depth_image * (255 / 4000)).astype(np.uint8)
        depth_image = cv2.applyColorMap(depth_image, cv2.COLORMAP_JET)
        depth_image = cv2.resize(depth_image, (color_width, depth_roi_height))

        # Overlay the color and depth images for display.
        color_image[depth_roi] = (color_image[depth_roi] // 2) + (depth_image // 2)
        cv2.imshow('Overlay', color_image)

        if cv2.waitKey(1) == ord('q'):
            break