Hi hello I'm really lost and need help on creating a code in which the oak d lite gets the xyz and labels of the detected objects so i can do logic and stuff, but for simple demo lets just print the xyz coordinates and the label of the detected object. Oh also how to path a custom blob to the sdk?
How to get depthai sdk xyz and label data
ALwen I built a consolespatialtiny_yolo.py that demonstrates exactly what you are asking (I believe):
cycob Hi hello so i tried your code with a custom blob (yolov5 converted to blob). Upon review if you wanna use a custom yolo model you have to change the camRgb.setPreviewSize(416, 416) on line 58 of your code to match the nn image input. then you have to change the anchor shiz on line 92 as well as the anchor mask on line 93. and you can see the anchor values when you convert the .pt model and one of the file with the .json file has all the value of the anchors and shits. Ohh also change the number of classes. Anyways here is the altered code for my case.
#!/usr/bin/env python3
from pathlib import Path
import sys
import cv2
import depthai as dai
import numpy as np
import time
'''
Spatial Tiny-yolo example
Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to device.
Using tiny-yolo-v4 network
USAGE: console_spatial_tiny_yolo.py
REQ: model located at /home/pi/wali_pi5/oak_models
'''
# nnBlobPath = str((Path(file).parent / Path('../models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
nnBlobPath = r"C:\Users\admin\Downloads\Thesis Lester\320\bestaug25_openvino_2022.1_6shave.blob"
if not Path(nnBlobPath).exists():
raise FileNotFoundError('Required YOLO v4 blob not found')
# Tiny yolo v3/4 label texts
labelMap = [
"Above",
"Door",
"Human",
"Tripping",
"Wall",
"Wall_Bike",
"Wall_Car",
"Wall_Pole",
"stairs"
]
# Create pipeline
pipeline = dai.Pipeline()
# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
spatialDetectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork)
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
xoutNN = pipeline.create(dai.node.XLinkOut)
xoutNN.setStreamName("detections")
# Properties
camRgb.setPreviewSize(320, 320)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
camRgb.setInterleaved(False)
camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)
# setting node configs
# to eliminate warning 2024-01-17 alan
"""
[184430101175A41200] [3.1] [147.531] [SpatialDetectionNetwork(1)] [warning]
Neural network inference was performed on socket 'RGB', depth frame is aligned to socket 'RIGHT'.
Bounding box mapping will not be correct, and will lead to erroneus spatial values.
Align depth map to socket 'RGB' using 'setDepthAlign'.
"""
stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
# stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
# Commented out 6Jan2022 for Oak-D-Lite
spatialDetectionNetwork.setBlobPath(nnBlobPath)
spatialDetectionNetwork.setConfidenceThreshold(0.5)
spatialDetectionNetwork.input.setBlocking(False)
spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
spatialDetectionNetwork.setDepthLowerThreshold(100)
spatialDetectionNetwork.setDepthUpperThreshold(5000)
# Yolo specific parameters
spatialDetectionNetwork.setNumClasses(9)
spatialDetectionNetwork.setCoordinateSize(4)
spatialDetectionNetwork.setAnchors(np.array([ 10.0,
13.0,
16.0,
30.0,
33.0,
23.0,
30.0,
61.0,
62.0,
45.0,
59.0,
119.0,
116.0,
90.0,
156.0,
198.0,
373.0,
326.0]))
spatialDetectionNetwork.setAnchorMasks({ "side40": [
0,
1,
2
],
"side20": [
3,
4,
5
],
"side10": [
6,
7,
8
] })
spatialDetectionNetwork.setIouThreshold(0.5)
# Linking
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
camRgb.preview.link(spatialDetectionNetwork.input)
spatialDetectionNetwork.out.link(xoutNN.input)
stereo.depth.link(spatialDetectionNetwork.inputDepth)
# Connect to device and start pipeline
with dai.Device(pipeline) as device:
# Output queues will be used to get the rgb frames and nn data from the outputs defined above
detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
startTime = time.monotonic()
counter = 0
fps = 0
while True:
inDet = detectionNNQueue.get()
counter+=1
current_time = time.monotonic()
if (current_time - startTime) > 1 :
fps = counter / (current_time - startTime)
counter = 0
startTime = current_time
detections = inDet.detections
# Output FPS and detections to console
print("NN fps: {:<5.1f} ".format(fps),end="\\r")
if len(detections) != 0:
for detection in detections:
try:
label = labelMap[detection.label]
except:
label = str(detection.label)
x = int(detection.spatialCoordinates.x)
y = int(detection.spatialCoordinates.y)
z = int(detection.spatialCoordinates.z)
print("\\n{:<10s} X:{:<5d} Y:{:<5d} Z:{:<5d} mm".format(label, x, y, z))
print(" ")
Update you can get the confidence by this code
confidence = detection.confidence
ALwen Now my only concern is i wonder how to visualize it. like frames with the xyz labels and confidnce value
No your only concern should be to put in the work to understand this very large subject area by reading and rereading the intro to depthai demo, and really understanding the spatial tiny yolo demo program that does exactly what you are now asking. I consider your latest ask to be very vague thread creep. This thread answered the question you posed.