jakaskerl
Did you miss spatial_calc.initialConfig.addROI(config)
in your code?
No I have this line in my code.
'''import cv2
import depthai as dai
import socket
import json
import errno
from ultralytics import YOLO
import numpy as np
stepSize = 0.05
newConfig = False
# Create pipelinepipeline = dai.Pipeline()
# Define sources and outputsmonoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
spatialLocationCalculator = pipeline.create(dai.node.SpatialLocationCalculator)
xoutDepth = pipeline.create(dai.node.XLinkOut)
xoutSpatialData = pipeline.create(dai.node.XLinkOut)
xinSpatialCalcConfig = pipeline.create(dai.node.XLinkIn)
xoutDepth.setStreamName("depth")
xoutSpatialData.setStreamName("spatialData")
xinSpatialCalcConfig.setStreamName("spatialCalcConfig")
# Properties
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setCamera("left")
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setCamera("right")
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
stereo.setLeftRightCheck(True)stereo.setSubpixel(True)
# Create YOLO
modelmodel = YOLO('yolov8n.pt')
# Define source and outputcamRgb = pipeline.create(dai.node.ColorCamera)
xoutVideo = pipeline.create(dai.node.XLinkOut)
xoutVideo.setStreamName("video")
# Properties
camRgb.setBoardSocket(dai.CameraBoardSocket.CAM_A)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
camRgb.setVideoSize(640,400)
xoutVideo.input.setBlocking(False)
xoutVideo.input.setQueueSize(1)
# LinkingcamRgb.video.link(xoutVideo.input) ###for RGB
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
spatialLocationCalculator.passthroughDepth.link(xoutDepth.input)
stereo.depth.link(spatialLocationCalculator.inputDepth)
spatialLocationCalculator.out.link(xoutSpatialData.input)
xinSpatialCalcConfig.out.link(spatialLocationCalculator.inputConfig)
# Connect to device and start pipeline
while True:
     Â
       videoIn = video.get()
       # Get BGR frame from NV12 encoded video frame to show with OpenCV
       frame = videoIn.getCvFrame()
       # Perform YOLOv8 inference on the frame
       results = model(frame)
       # Get annotated frame with YOLOv8 predictions
       annotated_frame = results[0].plot()
       # All data needed to be transferred
       result=results[0]
       output=[]
       # roi
       inDepth = depthQueue.get() # Blocking call, will wait until a new data has arrived
       depthFrame = inDepth.getFrame() # depthFrame values are in millimeters
       depth_downscaled = depthFrame[::4]
       if np.all(depth_downscaled == 0):
           min_depth = 0 # Set a default minimum depth value when all elements are zero
       else:
           min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
       max_depth = np.percentile(depth_downscaled, 99)
       depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
       depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
       spatialData = spatialCalcQueue.get().getSpatialLocations()
      # YOLO
       for box in result.boxes:
           # coordinates of the bounding box
           x1,y1,x2,y2=[round(x) for x in box.xyxy[0].tolist()]
           # obj id
           class_id=box.cls[0].item()
           id=result.names[class_id]
           output.append([x1,y1,x2,y2, id])
           print(output)
         Â
           #### x1,y1=topLeft, x2,y2=bottomRight
           # config
           # size of the box
           '''topLeft = dai.Point2f(x1/depthFrameColor.shape[1], y1/depthFrameColor.shape[0])
           bottomRight = dai.Point2f(x2/depthFrameColor.shape[1], y2/depthFrameColor.shape[0])'''
           topLeft = dai.Point2f(x1/630, y1/390)
           bottomRight = dai.Point2f(x2/630, y2/390)
           config = dai.SpatialLocationCalculatorConfigData()
           config.depthThresholds.lowerThreshold = 100
           config.depthThresholds.upperThreshold = 10000
           calculationAlgorithm = dai.SpatialLocationCalculatorAlgorithm.MEDIAN
           ##  size is using from here
           config.roi = dai.Rect(topLeft, bottomRight)
           spatialLocationCalculator.inputConfig.setWaitForMessage(False)
           spatialLocationCalculator.initialConfig.addROI(config)
           for depthData in spatialData:
               roi = depthData.config.roi
               roi = roi.denormalize(width=depthFrameColor.shape[1], height=depthFrameColor.shape[0])
               xmin = int(roi.topLeft().x)
               ymin = int(roi.topLeft().y)
               xmax = int(roi.bottomRight().x)
               ymax = int(roi.bottomRight().y)
               depthMin = depthData.depthMin
               depthMax = depthData.depthMax
               fontType = cv2.FONT_HERSHEY_TRIPLEX
               cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1)
               cv2.putText(depthFrameColor, f"X: {int(depthData.spatialCoordinates.x)} mm", (xmin + 10, ymin + 20), fontType, 0.5, color)
               cv2.putText(depthFrameColor, f"Y: {int(depthData.spatialCoordinates.y)} mm", (xmin + 10, ymin + 35), fontType, 0.5, color)
               cv2.putText(depthFrameColor, f"Z: {int(depthData.spatialCoordinates.z)} mm", (xmin + 10, ymin + 50), fontType, 0.5, color)
         Â
       # Display the annotated frame
       cv2.imshow("YOLOv8 Inference", annotated_frame)
       cv2.imshow("depth", depthFrameColor)
       if cv2.waitKey(1) == ord('q'):
           break
cv2.destroyAllWindows()'''