Hi hello I'm really lost and need help on creating a code in which the oak d lite gets the xyz and labels of the detected objects so i can do logic and stuff, but for simple demo lets just print the xyz coordinates and the label of the detected object. Oh also how to path a custom blob to the sdk?

    cycob Hi hello so i tried your code with a custom blob (yolov5 converted to blob). Upon review if you wanna use a custom yolo model you have to change the camRgb.setPreviewSize(416, 416) on line 58 of your code to match the nn image input. then you have to change the anchor shiz on line 92 as well as the anchor mask on line 93. and you can see the anchor values when you convert the .pt model and one of the file with the .json file has all the value of the anchors and shits. Ohh also change the number of classes. Anyways here is the altered code for my case.

    #!/usr/bin/env python3

    from pathlib import Path
    import sys
    import cv2
    import depthai as dai
    import numpy as np
    import time

    '''
    Spatial Tiny-yolo example
    Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to device.
    Using tiny-yolo-v4 network

    USAGE: console_spatial_tiny_yolo.py

    REQ: model located at /home/pi/wali_pi5/oak_models

    '''

    # nnBlobPath = str((Path(file).parent / Path('../models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
    nnBlobPath = r"C:\Users\admin\Downloads\Thesis Lester\320\bestaug25_openvino_2022.1_6shave.blob"

    if not Path(nnBlobPath).exists():
    raise FileNotFoundError('Required YOLO v4 blob not found')

    # Tiny yolo v3/4 label texts
    labelMap = [
    "Above",
    "Door",
    "Human",
    "Tripping",
    "Wall",
    "Wall_Bike",
    "Wall_Car",
    "Wall_Pole",
    "stairs"
    ]

    # Create pipeline
    pipeline = dai.Pipeline()

    # Define sources and outputs
    camRgb = pipeline.create(dai.node.ColorCamera)
    spatialDetectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork)
    monoLeft = pipeline.create(dai.node.MonoCamera)
    monoRight = pipeline.create(dai.node.MonoCamera)
    stereo = pipeline.create(dai.node.StereoDepth)

    xoutNN = pipeline.create(dai.node.XLinkOut)

    xoutNN.setStreamName("detections")

    # Properties
    camRgb.setPreviewSize(320, 320)
    camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
    camRgb.setInterleaved(False)
    camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)

    monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
    monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
    monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
    monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)

    # setting node configs

    # to eliminate warning 2024-01-17 alan
    """
    [184430101175A41200] [3.1] [147.531] [SpatialDetectionNetwork(1)] [warning]
    Neural network inference was performed on socket 'RGB', depth frame is aligned to socket 'RIGHT'.
    Bounding box mapping will not be correct, and will lead to erroneus spatial values.
    Align depth map to socket 'RGB' using 'setDepthAlign'.
    """
    stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)

    # stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
    # Commented out 6Jan2022 for Oak-D-Lite

    spatialDetectionNetwork.setBlobPath(nnBlobPath)
    spatialDetectionNetwork.setConfidenceThreshold(0.5)
    spatialDetectionNetwork.input.setBlocking(False)
    spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
    spatialDetectionNetwork.setDepthLowerThreshold(100)
    spatialDetectionNetwork.setDepthUpperThreshold(5000)

    # Yolo specific parameters
    spatialDetectionNetwork.setNumClasses(9)
    spatialDetectionNetwork.setCoordinateSize(4)
    spatialDetectionNetwork.setAnchors(np.array([ 10.0,
    13.0,
    16.0,
    30.0,
    33.0,
    23.0,
    30.0,
    61.0,
    62.0,
    45.0,
    59.0,
    119.0,
    116.0,
    90.0,
    156.0,
    198.0,
    373.0,
    326.0]))
    spatialDetectionNetwork.setAnchorMasks({ "side40": [
    0,
    1,
    2
    ],
    "side20": [
    3,
    4,
    5
    ],
    "side10": [
    6,
    7,
    8
    ] })
    spatialDetectionNetwork.setIouThreshold(0.5)

    # Linking
    monoLeft.out.link(stereo.left)
    monoRight.out.link(stereo.right)

    camRgb.preview.link(spatialDetectionNetwork.input)

    spatialDetectionNetwork.out.link(xoutNN.input)

    stereo.depth.link(spatialDetectionNetwork.inputDepth)

    # Connect to device and start pipeline
    with dai.Device(pipeline) as device:

    # Output queues will be used to get the rgb frames and nn data from the outputs defined above
    detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
    
    startTime = time.monotonic()
    counter = 0
    fps = 0
    
    while True:
        inDet = detectionNNQueue.get()
    
    
        counter+=1
        current_time = time.monotonic()
        if (current_time - startTime) > 1 :
            fps = counter / (current_time - startTime)
            counter = 0
            startTime = current_time
    
        detections = inDet.detections
    
        # Output FPS and detections to console
        print("NN fps: {:<5.1f}    ".format(fps),end="\\r")
        if len(detections) != 0:
            for detection in detections:
                try:
                    label = labelMap[detection.label]
                except:
                    label = str(detection.label)
                x = int(detection.spatialCoordinates.x)
                y = int(detection.spatialCoordinates.y)
                z = int(detection.spatialCoordinates.z)
                print("\\n{:<10s} X:{:<5d}  Y:{:<5d}  Z:{:<5d} mm".format(label, x, y, z))
            print(" ")

    cycob Now my only concern is i wonder how to visualize it. like frames with the xyz labels and confidnce value

    thank you sir

      Update you can get the confidence by this code

      confidence = detection.confidence

      ALwen Now my only concern is i wonder how to visualize it. like frames with the xyz labels and confidnce value

      No your only concern should be to put in the work to understand this very large subject area by reading and rereading the intro to depthai demo, and really understanding the spatial tiny yolo demo program that does exactly what you are now asking. I consider your latest ask to be very vague thread creep. This thread answered the question you posed.