Running detection on host along with depth from camera

DARYLTHOMAS

I am trying to run Yolov8 on the device( bcoz yolov8l model ) and trying to get depth value from my Oak-d lite. This is the code I tried, i tried to merge the rgbvideo and gen2-calc-spatials-on-host examples. When I run with the rgbvideo along with the depthstereo input I do not get any value from the depth spatials call. It shows as nan value. Here is my integrated code -

import cv2
import depthai as dai
import datetime
from time import sleep
# Depth library
from calc import HostSpatialsCalc
from utility import *
import math
# YOLO Setup
from ultralytics import YOLO
import os

# define some constants
CONFIDENCE_THRESHOLD = 0.8
GREEN = (0, 255, 0)
model = YOLO("best_corrected_june12.pt")
def on_predict_batch_end(predictor):
    # results -> List[batch_size]
    _, _, im0s, _, _ = predictor.batch
    im0s = im0s if isinstance(im0s, list) else [im0s]
    predictor.results = zip(predictor.results, im0s)


sleeprate = 1.0

def callback_sleeprate(data):
    global sleeprate
    sleeprate = data.data
# YOLO Setup


# Create pipeline
pipeline = dai.Pipeline()

# Define source and output
camRgb = pipeline.create(dai.node.ColorCamera)
xoutVideo = pipeline.create(dai.node.XLinkOut)

xoutVideo.setStreamName("video")



# Properties
#Preview settings
# camRgb.setPreviewSize(640, 480)
# camRgb.setInterleaved(False)
# camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB)
# camRgb.preview.link(xoutVideo.input)
#Preview settings
camRgb.setBoardSocket(dai.CameraBoardSocket.CAM_A)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
camRgb.setVideoSize(1920, 1080)

xoutVideo.input.setBlocking(False)
xoutVideo.input.setQueueSize(1)

# Linking
camRgb.video.link(xoutVideo.input)

# Stereo depth setup
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
# Properties
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_480_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_480_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)

stereo.initialConfig.setConfidenceThreshold(255)
stereo.setLeftRightCheck(True)
stereo.setSubpixel(False)
# Linking
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)

xoutDepth = pipeline.create(dai.node.XLinkOut)
xoutDepth.setStreamName("depth")
stereo.depth.link(xoutDepth.input)

xoutDepth = pipeline.create(dai.node.XLinkOut)
xoutDepth.setStreamName("disp")
stereo.disparity.link(xoutDepth.input)
xoutDepth.input.setBlocking(False)
xoutDepth.input.setQueueSize(1)
# Stereo depth setup

# Connect to device and start pipeline
with dai.Device(pipeline) as device:

    video = device.getOutputQueue(name="video", maxSize=1, blocking=False)
    depthQueue = device.getOutputQueue(name="depth")
    hostSpatials = HostSpatialsCalc(device)
    delta = 10
    hostSpatials.setDeltaRoi(delta)
    text = TextHelper()

    while True:
        start = datetime.datetime.now()        
        videoIn = video.get()
        # Get BGR frame from NV12 encoded video frame to show with opencv
        # Visualizing the frame on slower hosts might have overhead
        frame = videoIn.getCvFrame()
        depthData = depthQueue.get()
        detections = model(frame)[0]   
        for data in detections.boxes.data.tolist():
            # extract the confidence (i.e., probability) associated with the detection
            confidence = data[4]

            # filter out weak detections by ensuring the 
            # confidence is greater than the minimum confidence
            if float(confidence) < CONFIDENCE_THRESHOLD:
                continue

            # if the confidence is greater than the minimum confidence,
            # draw the bounding box on the frame
            xmin, ymin, xmax, ymax = int(data[0]), int(data[1]), int(data[2]), int(data[3])
      
            # Pixel to mm calculations 
            cpx=int(xmin+xmax)//2
            cpy=int(ymin+ymax)//2
            spatials, centroid = hostSpatials.calc_spatials(depthData, (0,0)) # centroid == x/y in our case
            x,y = cpx,cpy                  
            text.putText(frame, "X: " + ("{:.1f}m".format(spatials['x']/1000) if not math.isnan(spatials['x']) else "--"), (x + 10, y + 20))
            text.putText(frame, "Y: " + ("{:.1f}m".format(spatials['y']/1000) if not math.isnan(spatials['y']) else "--"), (x + 10, y + 35))
            text.putText(frame, "Z: " + ("{:.1f}m".format(spatials['z']/1000) if not math.isnan(spatials['z']) else "--"), (x + 10, y + 50))            

        # end time to compute the fps
        end = datetime.datetime.now()
        # show the time it took to process 1 frame
        total = (end - start).total_seconds()
        print(f"Time to process 1 frame: {total * 1000:.0f} milliseconds")    

        # calculate the frame per second and draw it on the frame
        fps = f"FPS: {1 / total:.2f}"
        cv2.putText(frame, fps, (50, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 8)                
        
        cv2.imshow("video", frame)

        if cv2.waitKey(1) == ord('q'):
            break

Is there some kind of limitation in running rgbvideo and stereo together in the Oak-D Lite? I tried with rgb_preview too, but the spatials do not show. If i run stereo separately as per demo, it shows the proper distance.

jakaskerl

Hi DARYLTHOMAS
Running rgb and stereo together shouldn't cause any problems. Why are you setting the roi center for calc_spatials to 0,0?

Thanks,
Jaka

DARYLTHOMAS

jakaskerl Hi, I was just checking to see whether i was not getting a value because my rgb camera resolution was not aligned with the depth camera resolution. It was not, even for 0,0 it wasn't giving me any values. Eventually just going through other spatial examples I found it was indeed because those two weren't aligned, I added the following three properties from the spatialdetection examples:

stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())

Now it is working! Leaving this here for anyone who makes the same mistake 😛

Thanks!