Description
I'm experiencing a significant performance issue when using a custom converted YOLOv8n model compared to the Luxonis model zoo version. The model zoo yolov6-nano runs at ~35 FPS, but my custom YOLOv8n (same nano size) only achieves 6-7 FPS using the exact same pipeline.
Setup:
Device: OAK-D (RVC2)
Custom model: YOLOv8n converted via HubAI SDK
Conversion settings: INT8 quantization, 640x640 input, GENERAL quantization data
The conversion completes successfully without errors
What I've tried:
Using the same pipeline code for both models
Verified the model is properly quantized (INT8)
Same confidence threshold (0.5)
Same input resolution (640x640)
I'm not sure what could be causing such a dramatic performance difference. Any insights would be greatly appreciated!
Conversion Script
$$
import os
from hubai_sdk import HubAIClient
from ultralytics import YOLO
client = HubAIClient(api_key=os.getenv("HUBAI_API_KEY"))
model = YOLO("models/yolov8l.pt")
class_names = []
for id, class_name in model.names.items():
class_names.append(class_name)
response = client.convert.RVC2(
path="models/yolov8n.pt",
name="quantized-yolov8n",
target_precision="INT8",
quantization_data="GENERAL",
yolo_input_shape=[640, 640],
yolo_class_names=class_names,
yolo_version="yolov8",
superblob=False
)
print(f"Converted model downloaded to: {response.downloaded_path}")
$$
Pipeline Code
$$
import time
import depthai as dai
import cv2
import numpy as np
def frameNorm(frame, bbox):
normVals = np.full(len(bbox), frame.shape[0])
normVals[::2] = frame.shape[1]
return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
device = dai.Device()
with dai.Pipeline(device) as pipeline:
print('Creating pipeline...')
cam = pipeline.create(dai.node.Camera)
cam.build(dai.CameraBoardSocket.CAM_A, sensorResolution=(1352, 1012), sensorFps=52)
# Custom model (6-7 FPS)
nn_archive = dai.NNArchive('quantized-yolov8n-exported-to-target-rvc2/yolov8n.rvc2.tar.xz')
# Model zoo version (35 FPS)
# model_description = dai.NNModelDescription("luxonis/yolov6-nano:r2-coco-512x288")
# model_description.platform = device.getPlatformAsString()
# nn_archive = dai.NNArchive(dai.getModelFromZoo(model_description))
detection = pipeline.create(dai.node.DetectionNetwork).build(input=cam, nnArchive=nn_archive)
detection.setConfidenceThreshold(0.5)
videoQueue = detection.passthrough.createOutputQueue()
detectionQueue = detection.out.createOutputQueue()
labelMap = detection.getClasses()
frame = None
detections = []
startTime = time.monotonic()
counter = 0
color2 = (255, 255, 255)
def displayFrame(name, frame):
color = (255, 0, 0)
for detection in detections:
bbox = frameNorm(
frame,
(detection.xmin, detection.ymin, detection.xmax, detection.ymax),
)
cv2.putText(
frame,
labelMap[detection.label],
(bbox[0] + 10, bbox[1] + 20),
cv2.FONT_HERSHEY_TRIPLEX,
0.5,
255,
)
cv2.putText(
frame,
f"{int(detection.confidence * 100)}%",
(bbox[0] + 10, bbox[1] + 40),
cv2.FONT_HERSHEY_TRIPLEX,
0.5,
255,
)
cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
cv2.imshow(name, frame)
print('Pipeline created.')
pipeline.start()
while pipeline.isRunning():
videoIn = videoQueue.tryGet()
img_detections = detectionQueue.tryGet()
if videoIn is not None:
frame = videoIn.getCvFrame()
cv2.putText(frame,
f"NN fps: {counter / (time.monotonic() - startTime):.2f}",
(2, frame.shape[0] - 4),
cv2.FONT_HERSHEY_TRIPLEX,
0.4,
color2,
)
if img_detections:
detections = img_detections.detections
counter += 1
if frame is not None:
displayFrame("Detections", frame)
if cv2.waitKey(1) == ord('q'):
pipeline.stop()
break
$$
I'm trying to figure out whether the issue is with my conversion settings or my pipeline configuration. Any help identifying the root cause would be greatly appreciated! Also, if anyone has resources on optimizing either the model conversion process or the pipeline setup for better performance, I'd love to learn more.