I am trying to run Yolov8 on the device( bcoz yolov8l model ) and trying to get depth value from my Oak-d lite. This is the code I tried, i tried to merge the rgbvideo and gen2-calc-spatials-on-host examples. When I run with the rgbvideo along with the depthstereo input I do not get any value from the depth spatials call. It shows as nan value. Here is my integrated code -
import cv2
import depthai as dai
import datetime
from time import sleep
# Depth library
from calc import HostSpatialsCalc
from utility import *
import math
# YOLO Setup
from ultralytics import YOLO
import os
# define some constants
CONFIDENCE_THRESHOLD = 0.8
GREEN = (0, 255, 0)
model = YOLO("best_corrected_june12.pt")
def on_predict_batch_end(predictor):
# results -> List[batch_size]
_, _, im0s, _, _ = predictor.batch
im0s = im0s if isinstance(im0s, list) else [im0s]
predictor.results = zip(predictor.results, im0s)
sleeprate = 1.0
def callback_sleeprate(data):
global sleeprate
sleeprate = data.data
# YOLO Setup
# Create pipeline
pipeline = dai.Pipeline()
# Define source and output
camRgb = pipeline.create(dai.node.ColorCamera)
xoutVideo = pipeline.create(dai.node.XLinkOut)
xoutVideo.setStreamName("video")
# Properties
#Preview settings
# camRgb.setPreviewSize(640, 480)
# camRgb.setInterleaved(False)
# camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB)
# camRgb.preview.link(xoutVideo.input)
#Preview settings
camRgb.setBoardSocket(dai.CameraBoardSocket.CAM_A)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
camRgb.setVideoSize(1920, 1080)
xoutVideo.input.setBlocking(False)
xoutVideo.input.setQueueSize(1)
# Linking
camRgb.video.link(xoutVideo.input)
# Stereo depth setup
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
# Properties
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_480_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_480_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)
stereo.initialConfig.setConfidenceThreshold(255)
stereo.setLeftRightCheck(True)
stereo.setSubpixel(False)
# Linking
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
xoutDepth = pipeline.create(dai.node.XLinkOut)
xoutDepth.setStreamName("depth")
stereo.depth.link(xoutDepth.input)
xoutDepth = pipeline.create(dai.node.XLinkOut)
xoutDepth.setStreamName("disp")
stereo.disparity.link(xoutDepth.input)
xoutDepth.input.setBlocking(False)
xoutDepth.input.setQueueSize(1)
# Stereo depth setup
# Connect to device and start pipeline
with dai.Device(pipeline) as device:
video = device.getOutputQueue(name="video", maxSize=1, blocking=False)
depthQueue = device.getOutputQueue(name="depth")
hostSpatials = HostSpatialsCalc(device)
delta = 10
hostSpatials.setDeltaRoi(delta)
text = TextHelper()
while True:
start = datetime.datetime.now()
videoIn = video.get()
# Get BGR frame from NV12 encoded video frame to show with opencv
# Visualizing the frame on slower hosts might have overhead
frame = videoIn.getCvFrame()
depthData = depthQueue.get()
detections = model(frame)[0]
for data in detections.boxes.data.tolist():
# extract the confidence (i.e., probability) associated with the detection
confidence = data[4]
# filter out weak detections by ensuring the
# confidence is greater than the minimum confidence
if float(confidence) < CONFIDENCE_THRESHOLD:
continue
# if the confidence is greater than the minimum confidence,
# draw the bounding box on the frame
xmin, ymin, xmax, ymax = int(data[0]), int(data[1]), int(data[2]), int(data[3])
# Pixel to mm calculations
cpx=int(xmin+xmax)//2
cpy=int(ymin+ymax)//2
spatials, centroid = hostSpatials.calc_spatials(depthData, (0,0)) # centroid == x/y in our case
x,y = cpx,cpy
text.putText(frame, "X: " + ("{:.1f}m".format(spatials['x']/1000) if not math.isnan(spatials['x']) else "--"), (x + 10, y + 20))
text.putText(frame, "Y: " + ("{:.1f}m".format(spatials['y']/1000) if not math.isnan(spatials['y']) else "--"), (x + 10, y + 35))
text.putText(frame, "Z: " + ("{:.1f}m".format(spatials['z']/1000) if not math.isnan(spatials['z']) else "--"), (x + 10, y + 50))
# end time to compute the fps
end = datetime.datetime.now()
# show the time it took to process 1 frame
total = (end - start).total_seconds()
print(f"Time to process 1 frame: {total * 1000:.0f} milliseconds")
# calculate the frame per second and draw it on the frame
fps = f"FPS: {1 / total:.2f}"
cv2.putText(frame, fps, (50, 50),
cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 8)
cv2.imshow("video", frame)
if cv2.waitKey(1) == ord('q'):
break
Is there some kind of limitation in running rgbvideo and stereo together in the Oak-D Lite? I tried with rgb_preview too, but the spatials do not show. If i run stereo separately as per demo, it shows the proper distance.