Hello,
I'm writing a script for a OAK-D LR camera to run custom YOLOv8 model with YoloSpatialDetectionNetwork node. I converted my model using this converter: https://tools.luxonis.com/ with 5 shaves. My code looks like this:
import depthai as dai
import cv2
import numpy as np
import time
nnBlobPath = "models/8n/8n_75mAP50_openvino_2022.1_5shave.blob"
labelMap = ['red_gate_buoy', 'green_gate_buoy', 'red_buoy', 'green_buoy', 'yellow_buoy', 'black_buoy', 'blue_circle', 'green_circle', 'red_circle', 'blue_triangle', 'green_triangle', 'red_triangle', 'blue_square', 'green_square', 'red_square',
'blue_plus', 'green_plus', 'red_plus', 'duck_image', 'blue_buoy', 'banner', 'other', 'black_circle', 'black_plus', 'black_triangle', 'black_plus', 'blue_racquet_ball', 'dock', 'rubber_duck', 'misc_buoy', 'red_racquet_ball', 'yellow_racquet_ball']
syncNN = True
pipeline = dai.Pipeline()
pipeline.setXLinkChunkSize(0)
yoloSpatial = pipeline.create(dai.node.YoloSpatialDetectionNetwork)
yoloSpatial.setBlobPath(nnBlobPath)
# Spatial detection specific parameters
yoloSpatial.setConfidenceThreshold(0.5)
yoloSpatial.input.setBlocking(False)
yoloSpatial.input.setQueueSize(1)
yoloSpatial.setBoundingBoxScaleFactor(0.5)
yoloSpatial.setDepthLowerThreshold(100) # Min 10 centimeters
yoloSpatial.setDepthUpperThreshold(5000) # Max 5 meters
# Yolo specific parameters
yoloSpatial.setNumClasses(int(len(labelMap)))
yoloSpatial.setCoordinateSize(4)
yoloSpatial.setIouThreshold(0.5)
yoloSpatial.setNumInferenceThreads(1)
yoloSpatial.setNumNCEPerInferenceThread(2)
# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
leftRgb = pipeline.create(dai.node.ColorCamera)
rightRgb = pipeline.create(dai.node.ColorCamera)
stereo = pipeline.create(dai.node.StereoDepth)
nnNetworkOut = pipeline.create(dai.node.XLinkOut)
xoutRgb = pipeline.create(dai.node.XLinkOut)
xoutNN = pipeline.create(dai.node.XLinkOut)
xoutDepth = pipeline.create(dai.node.XLinkOut)
xoutRgb.setStreamName("rgb")
xoutNN.setStreamName("detections")
xoutDepth.setStreamName("depth")
nnNetworkOut.setStreamName("nnNetwork")
# Properties
camRgb.setPreviewSize(640, 640)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1200_P)
camRgb.setInterleaved(False)
camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
camRgb.setFps(10)
leftRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1200_P)
leftRgb.setCamera("left")
leftRgb.setIspScale(2, 3)
rightRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1200_P)
rightRgb.setCamera("right")
rightRgb.setIspScale(2, 3)
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
stereo.setSubpixel(True)
# Linking
leftRgb.isp.link(stereo.left)
rightRgb.isp.link(stereo.right)
camRgb.preview.link(yoloSpatial.input)
if syncNN:
yoloSpatial.passthrough.link(xoutRgb.input)
else:
camRgb.preview.link(xoutRgb.input)
yoloSpatial.out.link(xoutNN.input)
stereo.depth.link(yoloSpatial.inputDepth)
yoloSpatial.passthroughDepth.link(xoutDepth.input)
yoloSpatial.outNetwork.link(nnNetworkOut.input)
# Connect to device and start pipeline
with dai.Device(pipeline) as device:
device.setLogLevel(dai.LogLevel.DEBUG)
device.setLogOutputLevel(dai.LogLevel.DEBUG)
# Output queues will be used to get the rgb frames and nn data from the outputs defined above
previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
detectionNNQueue = device.getOutputQueue(
name="detections", maxSize=4, blocking=False)
depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
networkQueue = device.getOutputQueue(
name="nnNetwork", maxSize=4, blocking=False)
startTime = time.monotonic()
counter = 0
fps = 0
color = (255, 255, 255)
printOutputLayersOnce = True
while True:
inPreview = previewQueue.get()
inDet = detectionNNQueue.get()
depth = depthQueue.get()
inNN = networkQueue.get()
if printOutputLayersOnce:
toPrint = 'Output layer names:'
for ten in inNN.getAllLayerNames():
toPrint = f'{toPrint} {ten},'
print(toPrint)
printOutputLayersOnce = False
frame = inPreview.getCvFrame()
depthFrame = depth.getFrame() # depthFrame values are in millimeters
depth_downscaled = depthFrame[::4]
if np.all(depth_downscaled == 0):
min_depth = 0 # Set a default minimum depth value when all elements are zero
else:
min_depth = np.percentile(
depth_downscaled[depth_downscaled != 0], 1)
max_depth = np.percentile(depth_downscaled, 99)
depthFrameColor = np.interp(
depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
counter += 1
current_time = time.monotonic()
if (current_time - startTime) > 1:
fps = counter / (current_time - startTime)
counter = 0
startTime = current_time
detections = inDet.detections
# If the frame is available, draw bounding boxes on it and show the frame
height = frame.shape[0]
width = frame.shape[1]
for detection in detections:
roiData = detection.boundingBoxMapping
roi = roiData.roi
roi = roi.denormalize(
depthFrameColor.shape[1], depthFrameColor.shape[0])
topLeft = roi.topLeft()
bottomRight = roi.bottomRight()
xmin = int(topLeft.x)
ymin = int(topLeft.y)
xmax = int(bottomRight.x)
ymax = int(bottomRight.y)
cv2.rectangle(depthFrameColor, (xmin, ymin),
(xmax, ymax), color, 1)
# Denormalize bounding box
x1 = int(detection.xmin * width)
x2 = int(detection.xmax * width)
y1 = int(detection.ymin * height)
y2 = int(detection.ymax * height)
try:
label = labelMap[detection.label]
except:
label = detection.label
cv2.putText(frame, str(label), (x1 + 10, y1 + 20),
cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, "{:.2f}".format(
detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm",
(x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm",
(x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm",
(x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.rectangle(frame, (x1, y1), (x2, y2),
color, cv2.FONT_HERSHEY_SIMPLEX)
cv2.putText(frame, "NN fps: {:.2f}".format(
fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
cv2.imshow("rgb", frame)
if cv2.waitKey(1) == ord('q'):
break
The problem that I have is that the FPS is pretty low, around 5FPS. However, my debug info is telling that I use only 40% of the cpu and 30% of LeonRT:
[14442C10F1121AD000] [3.1] [23.080] [system] [info] Memory Usage - DDR: 115.15 / 333.28 MiB, CMX: 2.47 / 2.50 MiB, LeonOS Heap: 48.23 / 81.76 MiB, LeonRT Heap: 5.68 / 39.90 MiB / NOC ddr: 2573 MB/s
[14442C10F1121AD000] [3.1] [23.080] [system] [info] Temperatures - Average: 47.86C, CSS: 48.92C, MSS 47.13C, UPA: 48.47C, DSS: 46.90C
[14442C10F1121AD000] [3.1] [23.080] [system] [info] Cpu Usage - LeonOS 40.28%, LeonRT: 29.87%
Is it possible to increase my FPS by increasing CPU usage? If so, how can I do it?
I tried changing values like setNumInferenceThreads
, setNumNCEPerInferenceThread
and camera FPS but I couldn't achieve anything better than what I have now.
Thank you in advance for help