Hi erik , here you go.

device: Oak-D PRO AF
latest version of depthai and depthai-sdk

Note device = dai.Device("111111111111111111", maxUsbSpeed=dai.UsbSpeed.SUPER_PLUS), please change it to your own device mxID.

# %%
from pathlib import Path
import sys
import cv2
import depthai as dai
import numpy as np
import time 
import matplotlib.pyplot as plt
import math
import PySimpleGUI as gui
from depthai_sdk.fps import FPSHandler
import torch

# %%
class HostSync:
    # adapted from https://github.com/luxonis/depthai-experiments/blob/master/gen2-depth-driven-focus/main.py
    
    def __init__(self):
        self.arrays = {}

    def add_msg(self, name, msg):
        if not name in self.arrays:
            self.arrays[name] = []
        # Add msg to array
        self.arrays[name].append({"msg": msg, "seq": msg.getSequenceNum()})

        synced = {}
        for name, arr in self.arrays.items():
            for i, obj in enumerate(arr):
                if msg.getSequenceNum() == obj["seq"]:
                    synced[name] = obj["msg"] 

        if len(synced) == (2):  # Color, Depth
            # Remove old msgs
            for name, arr in self.arrays.items():
                for i, obj in enumerate(arr):
                    if obj["seq"] < msg.getSequenceNum():
                        arr.remove(obj)
                    else:
                        break
            return synced
        return False
    
class TextHelper:
    # from: https://github.com/luxonis/depthai-experiments/blob/master/gen2-calc-spatials-on-host/utility.py
    def __init__(self) -> None:
        self.bg_color = (0, 0, 0)
        self.color = (255, 255, 255)
        self.text_type = cv2.FONT_HERSHEY_SIMPLEX
        self.line_type = cv2.LINE_AA
    def putText(self, frame, text, coords):
        cv2.putText(frame, text, coords, self.text_type, 0.5, self.bg_color, 3, self.line_type)
        cv2.putText(frame, text, coords, self.text_type, 0.5, self.color, 1, self.line_type)
    def rectangle(self, frame, p1, p2):
        cv2.rectangle(frame, p1, p2, self.bg_color, 3)
        cv2.rectangle(frame, p1, p2, self.color, 1)

# %%
def printSystemInformation(info):
    # from https://docs.luxonis.com/projects/api/en/latest/samples/SystemLogger/system_information/#system-information
    
    m = 1024 * 1024 # MiB
    print(f"Ddr used / total - {info.ddrMemoryUsage.used / m:.2f} / {info.ddrMemoryUsage.total / m:.2f} MiB")
    print(f"Cmx used / total - {info.cmxMemoryUsage.used / m:.2f} / {info.cmxMemoryUsage.total / m:.2f} MiB")
    print(f"LeonCss heap used / total - {info.leonCssMemoryUsage.used / m:.2f} / {info.leonCssMemoryUsage.total / m:.2f} MiB")
    print(f"LeonMss heap used / total - {info.leonMssMemoryUsage.used / m:.2f} / {info.leonMssMemoryUsage.total / m:.2f} MiB")
    t = info.chipTemperature
    print(f"Chip temperature - average: {t.average:.2f}, css: {t.css:.2f}, mss: {t.mss:.2f}, upa: {t.upa:.2f}, dss: {t.dss:.2f}")
    print(f"Cpu usage - Leon CSS: {info.leonCssCpuUsage.average * 100:.2f}%, Leon MSS: {info.leonMssCpuUsage.average * 100:.2f} %")
    print("----------------------------------------")

# %%
def calc_spatials(frame, frame_resized, result, depthFrame, calibData, depthData):
    # adapted from : https://github.com/luxonis/depthai-experiments/blob/master/gen2-calc-spatials-on-host/calc.py

    def calc_angle(frame, offset, HFOV):
        return math.atan(math.tan(HFOV / 2.0) * offset / (frame.shape[1] / 2.0))

    imgH, imgW, _ = frame.shape
    resizedh, resizedw, _ = frame_resized.shape

    # only process the first detection
    scaled_bb = {
        "xmin": int(result.xmin[0]/imgW*resizedw),
        "ymin": int(result.ymin[0]/imgH*resizedh),
        "xmax": int(result.xmax[0]/imgW*resizedw),
        "ymax": int(result.ymax[0]/imgH*resizedh),
        } 

    depthROI = depthFrame[scaled_bb["ymin"]:scaled_bb["ymax"], scaled_bb["xmin"]:scaled_bb["xmax"]]
    inRange = (150 <= depthROI) & (depthROI <= 1000) # treshold

    averageDepth = np.mean(depthROI[inRange])

    centroid = { # Get centroid of the ROI
        'x': int((scaled_bb["xmax"] + scaled_bb["xmin"]) / 2),
        'y': int((scaled_bb["ymax"] + scaled_bb["ymin"]) / 2)
        }
    
    midW = int(depthFrame.shape[1] / 2) # middle of the depth img width
    midH = int(depthFrame.shape[0] / 2) # middle of the depth img height
    bb_x_pos = centroid['x'] - midW
    bb_y_pos = centroid['y'] - midH

    HFOV = np.deg2rad(calibData.getFov(dai.CameraBoardSocket(depthData.getInstanceNum())))

    angle_x = calc_angle(depthFrame, bb_x_pos, HFOV)
    angle_y = calc_angle(depthFrame, bb_y_pos, HFOV)

    spatials = {
                'z': averageDepth,
                'x': averageDepth * math.tan(angle_x),
                'y': -averageDepth * math.tan(angle_y)
            } 

    return spatials, scaled_bb, centroid

# %%
device = dai.Device("184430102145421300", maxUsbSpeed=dai.UsbSpeed.SUPER_PLUS)

# %%
# Create pipeline
pipeline = dai.Pipeline()
pipeline.setXLinkChunkSize(0)

# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)

xoutRgb = pipeline.create(dai.node.XLinkOut)
xoutDepth = pipeline.create(dai.node.XLinkOut) 

xoutRgb.setStreamName("rgb") 
xoutDepth.setStreamName("depth")

# mono and rgb properties
set_fps = 30
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP)
camRgb.setFps(set_fps)
# camRgb.setInterleaved(False) 
try:
    calibData = device.readCalibration2()
    lensPosition = calibData.getLensPosition(dai.CameraBoardSocket.RGB)
    if lensPosition:
        camRgb.initialControl.setManualFocus(lensPosition)
    else:
        gui.popup("No Calib data for lensposition")
except:
    raise

monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
monoLeft.setFps(set_fps)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)
monoRight.setFps(set_fps)

# stereoConfig
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) 

stereo.initialConfig.setMedianFilter(dai.MedianFilter.KERNEL_7x7)
 
stereoConfig = stereo.initialConfig.get()
stereoConfig.postProcessing.speckleFilter.enable = True
stereoConfig.postProcessing.speckleFilter.speckleRange = 50 
stereoConfig.postProcessing.spatialFilter.enable = True
stereoConfig.postProcessing.spatialFilter.holeFillingRadius = 2
stereoConfig.postProcessing.spatialFilter.numIterations = 1
stereoConfig.postProcessing.thresholdFilter.minRange = 150
stereoConfig.postProcessing.thresholdFilter.maxRange = 1000
stereoConfig.postProcessing.decimationFilter.decimationFactor = 2
stereo.initialConfig.set(stereoConfig)

stereo.setLeftRightCheck(True)
stereo.setExtendedDisparity(True)

# Align depth map to rgb
stereo.setDepthAlign(dai.CameraBoardSocket.RGB)
stereo.setOutputSize(800, 599)
 
# Linking
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
 
camRgb.isp.link(xoutRgb.input)
stereo.depth.link(xoutDepth.input) 

# logger
sysLog = pipeline.create(dai.node.SystemLogger)
linkOut = pipeline.create(dai.node.XLinkOut)
linkOut.setStreamName("sysinfo")
sysLog.setRate(0.5)   
sysLog.out.link(linkOut.input)

# gets coordinate (pixels) of mouse on cv2 window when mouse hover overs cv2 window
point = (0, 0)
def show_distance(event, x,y, args, params):
    global point
    point = (x,y)
    # print(f"point: {point}")

cv2.namedWindow("12mp")
cv2.setMouseCallback("12mp", show_distance)
 
# %%
# obj detection
model = torch.hub.load('ultralytics/yolov5', "yolov5s") 
model.classes = [0] # detects `people` only

# %%
device.startPipeline(pipeline)
device.setLogLevel(dai.LogLevel.INFO)
device.setLogOutputLevel(dai.LogLevel.INFO) 

calibData = device.readCalibration() 

outputs = ['rgb', 'depth'] 
queues  = [device.getOutputQueue(name, 4, False) for name in outputs]
qSysInfo = device.getOutputQueue(name="sysinfo", maxSize=4, blocking=False)

sync = HostSync()
fps = FPSHandler()
text = TextHelper()

synced_msgs = None
while True: 
    sysInfo = qSysInfo.tryGet()
    if sysInfo is not None:
        printSystemInformation(sysInfo)

    for q in queues:
        if q.has():
            synced_msgs = sync.add_msg(q.getName(), q.get())

            if synced_msgs:
                fps.nextIter()
                print('FPS', fps.fps())

                frame = synced_msgs["rgb"].getCvFrame()

                if 'depth' in synced_msgs:
                    depthFrame: dai.ImgFrame = synced_msgs["depth"].getFrame()
                    fov = synced_msgs['depth'].getInstanceNum()

                frame_resized = cv2.resize(frame,(800,599), interpolation=cv2.INTER_NEAREST)
 
                # object det yolov5 on host
                result = model(frame[:,:,::-1],size=840)
                result = result.pandas().xyxy[0]
                
                if len(result)>0:
                    spatials, scaled_bb, centroid =  calc_spatials(frame, frame_resized, result, depthFrame, calibData, synced_msgs["depth"])
                    text.rectangle(frame_resized, (scaled_bb["xmin"], scaled_bb["ymin"]), (scaled_bb["xmax"], scaled_bb["ymax"]))
                    text.putText(frame_resized, "X: " + ("{:.1f}cm".format(spatials['x']/10) if not math.isnan(spatials['x']) else "--"), (centroid["x"] + 10, centroid["y"] + 20))
                    text.putText(frame_resized, "Y: " + ("{:.1f}cm".format(spatials['y']/10) if not math.isnan(spatials['y']) else "--"), (centroid["x"] + 10, centroid["y"] + 35))
                    text.putText(frame_resized, "Z: " + ("{:.1f}cm".format(spatials['z']/10) if not math.isnan(spatials['z']) else "--"), (centroid["x"] + 10, centroid["y"] + 50))

                distance = depthFrame[point[1]][point[0]] 
                cv2.circle(frame_resized, point, 10, (0,0,255))
                cv2.putText(frame_resized, "{}cm".format(distance), (point[0],point[1]), cv2.FONT_HERSHEY_PLAIN, 2 , (255,255,255), 2 )
                cv2.imshow("12mp", frame_resized)

    if cv2.waitKey(1) == ord('q'):
        break

device.close()
  • erik replied to this.

    Hi erik , thanks for the feedback. is the following alright?

    import cv2
    import depthai as dai
    import numpy as np 
    import matplotlib.pyplot as plt  
    from depthai_sdk.fps import FPSHandler
    import torch
    
    # %%
    class HostSync:
        # adapted from https://github.com/luxonis/depthai-experiments/blob/master/gen2-depth-driven-focus/main.py
        
        def __init__(self):
            self.arrays = {}
    
        def add_msg(self, name, msg):
            if not name in self.arrays:
                self.arrays[name] = []
            # Add msg to array
            self.arrays[name].append({"msg": msg, "seq": msg.getSequenceNum()})
    
            synced = {}
            for name, arr in self.arrays.items():
                for i, obj in enumerate(arr):
                    if msg.getSequenceNum() == obj["seq"]:
                        synced[name] = obj["msg"] 
    
            if len(synced) == (2):  # Color, Depth
                # Remove old msgs
                for name, arr in self.arrays.items():
                    for i, obj in enumerate(arr):
                        if obj["seq"] < msg.getSequenceNum():
                            arr.remove(obj)
                        else:
                            break
                return synced
            return False
    
    # %%
    device = dai.Device("184430102145421300", maxUsbSpeed=dai.UsbSpeed.SUPER_PLUS)
    
    # %%
    # Create pipeline
    pipeline = dai.Pipeline()
    pipeline.setXLinkChunkSize(0)
    
    # Define sources and outputs
    camRgb = pipeline.create(dai.node.ColorCamera)
    monoLeft = pipeline.create(dai.node.MonoCamera)
    monoRight = pipeline.create(dai.node.MonoCamera)
    stereo = pipeline.create(dai.node.StereoDepth)
    
    xoutRgb = pipeline.create(dai.node.XLinkOut)
    xoutDepth = pipeline.create(dai.node.XLinkOut) 
    
    xoutRgb.setStreamName("rgb") 
    xoutDepth.setStreamName("depth")
    
    # mono and rgb properties
    set_fps = 30
    camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP)
    camRgb.setFps(set_fps)
    # camRgb.setInterleaved(False) 
    try:
        calibData = device.readCalibration2()
        lensPosition = calibData.getLensPosition(dai.CameraBoardSocket.RGB)
        if lensPosition:
            camRgb.initialControl.setManualFocus(lensPosition) 
    except:
        raise
    
    monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
    monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
    monoLeft.setFps(set_fps)
    monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
    monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)
    monoRight.setFps(set_fps)
    
    # stereoConfig
    stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) 
    
    stereo.initialConfig.setMedianFilter(dai.MedianFilter.KERNEL_7x7)
     
    stereoConfig = stereo.initialConfig.get()
    stereoConfig.postProcessing.speckleFilter.enable = True
    stereoConfig.postProcessing.speckleFilter.speckleRange = 50 
    stereoConfig.postProcessing.spatialFilter.enable = True
    stereoConfig.postProcessing.spatialFilter.holeFillingRadius = 2
    stereoConfig.postProcessing.spatialFilter.numIterations = 1
    stereoConfig.postProcessing.thresholdFilter.minRange = 150
    stereoConfig.postProcessing.thresholdFilter.maxRange = 1000
    stereoConfig.postProcessing.decimationFilter.decimationFactor = 2
    stereo.initialConfig.set(stereoConfig)
    
    stereo.setLeftRightCheck(True)
    stereo.setExtendedDisparity(True)
    
    # Align depth map to rgb
    stereo.setDepthAlign(dai.CameraBoardSocket.RGB)
    stereo.setOutputSize(800, 599)
     
    # Linking
    monoLeft.out.link(stereo.left)
    monoRight.out.link(stereo.right)
     
    camRgb.isp.link(xoutRgb.input)
    stereo.depth.link(xoutDepth.input) 
    
    point = (0, 0)
    def show_distance(event, x,y, args, params):
        global point
        point = (x,y)
     
    cv2.namedWindow("12mp")
    cv2.setMouseCallback("12mp", show_distance)
    
    # %%
    # obj detection
    model = torch.hub.load('ultralytics/yolov5', "yolov5s") 
    model.classes = [0] # detects `people` only
    
    # %%
    device.startPipeline(pipeline) 
    
    calibData = device.readCalibration() 
    
    outputs = ['rgb', 'depth'] 
    queues  = [device.getOutputQueue(name, 4, False) for name in outputs]
    
    sync = HostSync()
    fps = FPSHandler() 
    
    synced_msgs = None
    while True:  
    
        for q in queues:
            if q.has():
                synced_msgs = sync.add_msg(q.getName(), q.get())
    
                if synced_msgs:
                    fps.nextIter()
                    print('FPS', fps.fps())
    
                    frame = synced_msgs["rgb"].getCvFrame()
    
                    if 'depth' in synced_msgs:
                        depthFrame: dai.ImgFrame = synced_msgs["depth"].getFrame()
                        fov = synced_msgs['depth'].getInstanceNum()
    
                    frame_resized = cv2.resize(frame,(800,599), interpolation=cv2.INTER_NEAREST)
     
                    # object det yolov5 on host
                    result = model(frame[:,:,::-1],size=840)
                    result = result.pandas().xyxy[0]
     
                    distance = depthFrame[point[1]][point[0]] 
                    cv2.circle(frame_resized, point, 10, (0,0,255))
                    cv2.putText(frame_resized, "{}cm".format(distance), (point[0],point[1]), cv2.FONT_HERSHEY_PLAIN, 2 , (255,255,255), 2 )
                    cv2.imshow("12mp", frame_resized)
    
        if cv2.waitKey(1) == ord('q'):
            break
    
    device.close() 
    • erik replied to this.

      Hi glitchyordis ,
      After removing a bunch of unneeded code, I get to about 17FPS for both depth and RGB. A single 12MP frame (isp, meaning YUV420, 1.5bytes/pixel) is 18MB (144Mbits), so at 17FPS that would be 2.5gpbs. Depth isn't nearly as large.
      Looking at OAK bandwidth test where I got max 2273mbps for downlink, 2.5gpbs (+depth) is actually quite good.

      So overall, the bandwidth is the bottleneck here. I don't have 10gbps support on my laptop so I can't select SUPER_PLUS speeds. Tested code:

      Thanks, Erik

        7 days later

        Hi glitchyordis ,

        1. They are relative to RGB camera, as depth was aligned to RGB camera.
        2. Usually that's not needed - it was developed for wide FOV cameras (OAK-D W*), where images are very distorted.

        Thanks, Erik

        7 months later

        Hey eric I have the folllowing setup .

        a) .I am running a object detect using Yolo and finding out depth
        stereo.setDepthAlign(dai.CameraBoardSocket.RGB) .
        spatialDetectionNetwork = pipeline.createYoloSpatialDetectionNetwork()

        in this setup the depth coming out seems to be wrt to RGB

        b)I am using performing the aruco detection without Depthalign and using
        hostSpatials = HostSpatialsCalc(device)
        In this the depth coming out be the right camera.

        i want a consistent location wrt to rgb camera
        I should be able to change the camera reference whenever i want

        Can you tell how to achieve this?

          jakaskerl I am using depth Align only but I don't know how to verify that …and the values of location is very high

          ManiRadhakrishnan I am using performing the aruco detection without Depthalign and using
          hostSpatials = HostSpatialsCalc(device)
          In this the depth coming out be the right camera.

          If you align the depth to the RGB camera, then that depth map will be aligned to that camera. What do you mean the values are high?

          ManiRadhakrishnan I should be able to change the camera reference whenever i want

          You should be able to change the alignment by altering the stereo config during runtime (not completely sure).

          Thanks,
          Jaka

          The depth value coming out is in 1 metres ,2 metres .Before using the align ,it was perfect with 2 mm accuarcy.

            Hi ManiRadhakrishnan
            Are you talking about the depth map here? - it should stay the same. If the error is in the end result, it means something if wrong with the calculation you are running on host side.

            ManiRadhakrishnan hostSpatials = HostSpatialsCalc(device)

            Where is this HostSpatialsCalc from?

            Thanks,
            Jaka