cycob Hi hello so i tried your code with a custom blob (yolov5 converted to blob). Upon review if you wanna use a custom yolo model you have to change the camRgb.setPreviewSize(416, 416) on line 58 of your code to match the nn image input. then you have to change the anchor shiz on line 92 as well as the anchor mask on line 93. and you can see the anchor values when you convert the .pt model and one of the file with the .json file has all the value of the anchors and shits. Ohh also change the number of classes. Anyways here is the altered code for my case.
#!/usr/bin/env python3
from pathlib import Path
import sys
import cv2
import depthai as dai
import numpy as np
import time
'''
Spatial Tiny-yolo example
Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to device.
Using tiny-yolo-v4 network
USAGE: console_spatial_tiny_yolo.py
REQ: model located at /home/pi/wali_pi5/oak_models
'''
# nnBlobPath = str((Path(file).parent / Path('../models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
nnBlobPath = r"C:\Users\admin\Downloads\Thesis Lester\320\bestaug25_openvino_2022.1_6shave.blob"
if not Path(nnBlobPath).exists():
raise FileNotFoundError('Required YOLO v4 blob not found')
# Tiny yolo v3/4 label texts
labelMap = [
"Above",
"Door",
"Human",
"Tripping",
"Wall",
"Wall_Bike",
"Wall_Car",
"Wall_Pole",
"stairs"
]
# Create pipeline
pipeline = dai.Pipeline()
# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
spatialDetectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork)
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
xoutNN = pipeline.create(dai.node.XLinkOut)
xoutNN.setStreamName("detections")
# Properties
camRgb.setPreviewSize(320, 320)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
camRgb.setInterleaved(False)
camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)
# setting node configs
# to eliminate warning 2024-01-17 alan
"""
[184430101175A41200] [3.1] [147.531] [SpatialDetectionNetwork(1)] [warning]
Neural network inference was performed on socket 'RGB', depth frame is aligned to socket 'RIGHT'.
Bounding box mapping will not be correct, and will lead to erroneus spatial values.
Align depth map to socket 'RGB' using 'setDepthAlign'.
"""
stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
# stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
# Commented out 6Jan2022 for Oak-D-Lite
spatialDetectionNetwork.setBlobPath(nnBlobPath)
spatialDetectionNetwork.setConfidenceThreshold(0.5)
spatialDetectionNetwork.input.setBlocking(False)
spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
spatialDetectionNetwork.setDepthLowerThreshold(100)
spatialDetectionNetwork.setDepthUpperThreshold(5000)
# Yolo specific parameters
spatialDetectionNetwork.setNumClasses(9)
spatialDetectionNetwork.setCoordinateSize(4)
spatialDetectionNetwork.setAnchors(np.array([ 10.0,
13.0,
16.0,
30.0,
33.0,
23.0,
30.0,
61.0,
62.0,
45.0,
59.0,
119.0,
116.0,
90.0,
156.0,
198.0,
373.0,
326.0]))
spatialDetectionNetwork.setAnchorMasks({ "side40": [
0,
1,
2
],
"side20": [
3,
4,
5
],
"side10": [
6,
7,
8
] })
spatialDetectionNetwork.setIouThreshold(0.5)
# Linking
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
camRgb.preview.link(spatialDetectionNetwork.input)
spatialDetectionNetwork.out.link(xoutNN.input)
stereo.depth.link(spatialDetectionNetwork.inputDepth)
# Connect to device and start pipeline
with dai.Device(pipeline) as device:
# Output queues will be used to get the rgb frames and nn data from the outputs defined above
detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
startTime = time.monotonic()
counter = 0
fps = 0
while True:
inDet = detectionNNQueue.get()
counter+=1
current_time = time.monotonic()
if (current_time - startTime) > 1 :
fps = counter / (current_time - startTime)
counter = 0
startTime = current_time
detections = inDet.detections
# Output FPS and detections to console
print("NN fps: {:<5.1f} ".format(fps),end="\\r")
if len(detections) != 0:
for detection in detections:
try:
label = labelMap[detection.label]
except:
label = str(detection.label)
x = int(detection.spatialCoordinates.x)
y = int(detection.spatialCoordinates.y)
z = int(detection.spatialCoordinates.z)
print("\\n{:<10s} X:{:<5d} Y:{:<5d} Z:{:<5d} mm".format(label, x, y, z))
print(" ")