Hi KlemenSkrlj
I am using Yolov6 nano and converted the pytorch model weights to blob using tools.luxonis.com. Below is the screenshot of the settings used to convert the model file to a blob. Input height is 320 and input width is 416.

I also tried converting using the HubAI platform with the settings as shown in the below screenshots. But still see the same behavior in Depthai v3 script.



Below is the depthai v2 script that works for this exact model. My aim for depthai v3 was to first have a basic script in which my model works and then upgrade it to the functionality of the below v2 script.
from pathlib import Path
import sys
import cv2
import depthai as dai
import numpy as np
import time
import argparse
import json
import blobconverter
from datetime import timedelta
# parse arguments
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", help="Provide model name or model path for inference",
default="C:/EMVS_Development_Camera_Scripts/Box & Label Detection/best_ckpt_openvino_2022.1_5shave.blob", type=str)
parser.add_argument("-c", "--config", help="Provide config path for inference",
default='C:/EMVS_Development_Camera_Scripts/Box & Label Detection/best_ckpt.json', type=str)
args = parser.parse_args()
# parse config
configPath = Path(args.config)
if not configPath.exists():
raise ValueError("Path {} does not exist!".format(configPath))
with configPath.open() as f:
config = json.load(f)
nnConfig = config.get("nn_config", {})
# parse input shape
if "input_size" in nnConfig:
W, H = tuple(map(int, nnConfig.get("input_size").split('x')))
# extract metadata
metadata = nnConfig.get("NN_specific_metadata", {})
classes = metadata.get("classes", {})
coordinates = metadata.get("coordinates", {})
anchors = metadata.get("anchors", {})
anchorMasks = metadata.get("anchor_masks", {})
iouThreshold = metadata.get("iou_threshold", {})
confidenceThreshold = metadata.get("confidence_threshold", {})
#print(metadata)
# parse labels
nnMappings = config.get("mappings", {})
labels = nnMappings.get("labels", {})
#print("Labels: ", labels)
# get model path
nnPath = args.model
if not Path(nnPath).exists():
print("No blob found at {}. Looking into DepthAI model zoo.".format(nnPath))
nnPath = str(blobconverter.from_zoo(args.model, shaves = 6, zoo_type = "depthai", use_cache=True))
# sync outputs
syncNN = True
# Create pipeline
pipeline = dai.Pipeline()
# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
detectionNetwork = pipeline.create(dai.node.YoloDetectionNetwork)
# By Yishu
manip = pipeline.create(dai.node.ImageManip)
# Sync node
sync = pipeline.create(dai.node.Sync)
xoutGrp = pipeline.create(dai.node.XLinkOut)
xoutGrp.setStreamName("xout")
# Sync node properties
sync.setSyncThreshold(timedelta(milliseconds=100))
# Properties
#camRgb.setPreviewSize(W, H)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP) #THE_1080_P
camRgb.setInterleaved(False)
camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB)
camRgb.setFps(25) #40
# By Yishu
manip.initialConfig.setKeepAspectRatio(False) #True
manip.initialConfig.setResize(W, H)
# Change to RGB image than BGR - Yishu
manip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p) #dai.ImgFrame.Type.RGB888p
# setMaxOutputFrameSize to avoid image bigger than max frame size error - Yishu
manip.setMaxOutputFrameSize(1228800)
# By Yishu
detectionNetwork.input.setQueueSize(10)
# Network specific settings
detectionNetwork.setConfidenceThreshold(confidenceThreshold)
detectionNetwork.setNumClasses(classes)
detectionNetwork.setCoordinateSize(coordinates)
detectionNetwork.setAnchors(anchors)
detectionNetwork.setAnchorMasks(anchorMasks)
detectionNetwork.setIouThreshold(iouThreshold)
detectionNetwork.setBlobPath(nnPath)
detectionNetwork.setNumInferenceThreads(2)
detectionNetwork.input.setBlocking(False)
# Linking
camRgb.isp.link(manip.inputImage)
manip.out.link(detectionNetwork.input)
# Syncing NN,ISP, Disparity,
detectionNetwork.out.link(sync.inputs["NN_Sync"])
camRgb.isp.link(sync.inputs["ISP_Sync"])
sync.inputs["NN_Sync"].setBlocking(False)
sync.inputs["ISP_Sync"].setBlocking(False)
sync.out.link(xoutGrp.input)
xoutGrp.input.setBlocking(False)
cvColorMap = cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_JET)
cvColorMap[0] = [0, 0, 0]
# TakeUp Camera Static IP address
device_info = dai.DeviceInfo("192.168.220.10")
try:
# Connect to device and start pipeline
with dai.Device(pipeline, device_info) as device:
# Output queues will be used to get the rgb frames and nn data from the outputs defined above
qSync = device.getOutputQueue("xout", 4, False)
msgGrp = None
syncframe = None
sync_detections = None
frame = None
detections = []
startTime = time.monotonic()
counter = 0
color2 = (255, 255, 255)
# nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
def frameNorm(frame, bbox):
normVals = np.full(len(bbox), frame.shape[0])
normVals[::2] = frame.shape[1]
return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
def displayFrame(name, frame, detections, i):
color_box = (255, 0, 0)
color_label = (0, 255, 0)
color = ''
color_blank_img = (255, 255, 255)
blank_image = np.zeros((3040,4056,1), np.uint8)
for detection in detections:
bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
if labels[detection.label] == 'Box':
color = color_box
else:
color = color_label
cv2.putText(frame, labels[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255,255,255))
cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255,255,255))
cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
cv2.rectangle(blank_image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color_blank_img, 50)
cv2.namedWindow(name, cv2.WINDOW_NORMAL)
cv2.imshow(name, frame)
while True:
# By Yishu
msgGrp = qSync.get()
if msgGrp is not None:
#print('msgGrp: ', msgGrp)
for name, msg in msgGrp:
if name == "ISP_Sync":
if msg is not None:
syncframe = msg.getCvFrame()
nn_fps =counter / (time.monotonic() - startTime)
print("nn_SYNC_fps: ", nn_fps)
if name == "NN_Sync":
if msg is not None:
sync_detections = msg.detections
counter += 1
if syncframe is not None and sync_detections is not None:
displayFrame("ISP_Sync", syncframe, sync_detections, j)
if cv2.waitKey(1) == ord('q'):
break
except Exception as e:
print(f"An error occurred: {e}")
sys.exit(0)
Thanks,
Yishu