• DepthAI-v2
  • can I calculate the depth of ROI instead of 1 point?

Hello,
i am trying to output the distance, taking spatialgen2host as an example. In the file, there is line "spatials, centroid = hostSpatials.calcspatials(depthData, (x1,y1))" which uses calc_spatial() in calc.py. however, i want to feed in this function not the centroid, but the whole roi (x1,y1,x2,y2). the camera shows a few frames at first and froze and i must force to quit to stop the camera. the work is shown below, did i get wrong anywhere. any help will be appreciated

    import cv2

    import depthai as dai

    from calc import HostSpatialsCalc

    import numpy as np

    import math

    import socket

    import json

    from ultralytics import YOLO

    # Create YOLO model

    model = YOLO('yolov8n-seg.pt')

    # Create a DepthAI pipeline

    pipeline = dai.Pipeline()

    # Define source and output

    camRgb = pipeline.create(dai.node.ColorCamera)

    xoutVideo = pipeline.create(dai.node.XLinkOut)

    xoutVideo.setStreamName("video")

    # Define source and output for stereo cam

    monoLeft = pipeline.create(dai.node.MonoCamera)

    monoRight = pipeline.create(dai.node.MonoCamera)

    stereo = pipeline.create(dai.node.StereoDepth)

    # Properties

    camRgb.setBoardSocket(dai.CameraBoardSocket.CAM_A)

    camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)

    camRgb.setVideoSize(640,480)

    xoutVideo.input.setBlocking(False)

    xoutVideo.input.setQueueSize(1)

    # Properties for depth map

    monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)

    monoLeft.setBoardSocket(dai.CameraBoardSocket.CAM_B)

    monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)

    monoRight.setBoardSocket(dai.CameraBoardSocket.CAM_C)

    stereo.initialConfig.setConfidenceThreshold(255)

    stereo.setLeftRightCheck(True)

    stereo.setSubpixel(False)

    # Linking

    camRgb.video.link(xoutVideo.input)

    # Linking for depth map

    monoLeft.out.link(stereo.left)

    monoRight.out.link(stereo.right)

    xoutDepth = pipeline.create(dai.node.XLinkOut)

    xoutDepth.setStreamName("depth")

    stereo.depth.link(xoutDepth.input)

    xoutDepth = pipeline.create(dai.node.XLinkOut)

    xoutDepth.setStreamName("disp")

    stereo.disparity.link(xoutDepth.input)

    # Connect to device and start pipeline

    with dai.Device(pipeline) as device:

    video = device.getOutputQueue(name="video", maxSize=1, blocking=False)

    # Output queue will be used to get the depth frames from the outputs defined above

    depth=device.getOutputQueue(name="depth")

    hostSpatials = HostSpatialsCalc(device)

    step = 3

    delta = 5

    hostSpatials.setDeltaRoi(delta)

    color = (255, 255, 255)

    while True:

    videoIn = video.get()

    # Get BGR frame from NV12 encoded video frame to show with OpenCV

    frame = videoIn.getCvFrame()

    # Perform YOLOv8 inference on the frame

    results = model(frame)

    # Get annotated frame with YOLOv8 predictions

    annotated_frame = results[0].plot()

    # All data needed to be transferred

    result=results[0]

    output=[]

    # DEPTHFRAME

    depthFrame = depth.get().getFrame()

    depthData=depth.get()

    depth_downscaled = depthFrame[::4]

    if np.all(depth_downscaled == 0):

    min_depth = 0  # Set a default minimum depth value when all elements are zero

    else:

    min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)

    max_depth = np.percentile(depth_downscaled, 99)

    depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)

    depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)

    height = frame.shape[0]

    width  = frame.shape[1]

    for box in result.boxes:

    # coordinates of the bounding box

    x1,y1,x2,y2=[round(x) for x in box.xyxy[0].tolist()]

    roi=(x1,y1,x2,y2)

    # obj id

    class_id=box.cls[0].item()

    id=result.names[class_id]

    output.append([x1,y1,x2,y2, id])

    print(output)

    center_box_x=int((x1+x2)/2)

    center_box_y=int((y1+y2)/2)

    #### starting ROI, NEED TO DEFINE ROI AS THE BOUNDING BOX

    # Create a dai.Rect object with the normalized coordinates

    roi_rect = dai.Rect(dai.Point2f(x1, y1), dai.Point2f(x2, y2))

    roi_rect=roi_rect.denormalize(depthFrameColor.shape[1],depthFrameColor.shape[0])

    topLeft=roi_rect.topLeft()

    bottomRight=roi_rect.bottomRight()

    xmin = int(topLeft.x)

    ymin = int(topLeft.y)

    xmax = int(bottomRight.x)

    ymax = int(bottomRight.y)

    cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1)

    # Denormalize bounding box

    xL = int(xmin * width)

    xR = int(xmax * width)

    yL = int(ymin * height)

    yR = int(ymax * height)

    cv2.circle(annotated_frame,(x1,y1),10,(0,255,255),-1) # yellow dot

    # this line from spatial mobile net

    #cv2.putText(frame, f"Z: {int(box.spatialCoordinates.z)} mm", (xL + 10, yL + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)

    # this line is from 2d map

    spatials, centroid = hostSpatials.calc_spatials(depthData,roi)

    cv2.putText(annotated_frame, "Z: " + ("{:.1f}m".format(spatials['z']/1000) if not math.isnan(spatials['z']) else "--"), (xL + 10, yL + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)

    # Display the annotated frame

    cv2.imshow("YOLOv8 Inference", annotated_frame)

    # Display Depth Frame

    #cv2.imshow("depth", disp)

    if cv2.waitKey(1) == ord('q'):

    break

    cv2.destroyAllWindows()

    KwinT additional info, the spatial cal was never gone through, it only shows a few frames with object detection and I guest when it hits the spatial, roi =… line, it cannot go through it so it freezes

    Hi @KwinT
    Just change the calc.py function to accept roi. Either way, when you pass a point to calc_spatials it will create a ROI around it and average the values.

    Also, here is GPT "fixed code" that I haven't checked, but might solve the blocking.

    import cv2
    import depthai as dai
    from calc import HostSpatialsCalc
    import numpy as np
    import math
    from ultralytics import YOLO
    
    # Create YOLO model
    model = YOLO('yolov8n-seg.pt')
    
    # Create a DepthAI pipeline
    pipeline = dai.Pipeline()
    
    # Define source and output for RGB camera
    camRgb = pipeline.create(dai.node.ColorCamera)
    xoutVideo = pipeline.create(dai.node.XLinkOut)
    xoutVideo.setStreamName("video")
    
    # Define source and output for stereo camera
    monoLeft = pipeline.create(dai.node.MonoCamera)
    monoRight = pipeline.create(dai.node.MonoCamera)
    stereo = pipeline.create(dai.node.StereoDepth)
    
    # RGB camera properties
    camRgb.setBoardSocket(dai.CameraBoardSocket.RGB)
    camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
    camRgb.setVideoSize(640, 480)
    xoutVideo.input.setBlocking(False)
    xoutVideo.input.setQueueSize(1)
    
    # Stereo camera properties
    monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
    monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
    monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
    monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)
    stereo.initialConfig.setConfidenceThreshold(255)
    stereo.setLeftRightCheck(True)
    stereo.setSubpixel(False)
    
    # Linking
    camRgb.video.link(xoutVideo.input)
    monoLeft.out.link(stereo.left)
    monoRight.out.link(stereo.right)
    
    xoutDepth = pipeline.create(dai.node.XLinkOut)
    xoutDepth.setStreamName("depth")
    stereo.depth.link(xoutDepth.input)
    
    # Connect to device and start pipeline
    with dai.Device(pipeline) as device:
        videoQueue = device.getOutputQueue(name="video", maxSize=1, blocking=False)
        depthQueue = device.getOutputQueue(name="depth", maxSize=1, blocking=False)
        hostSpatials = HostSpatialsCalc(device)
    
        color = (255, 255, 255)
    
        while True:
            inDepth = depthQueue.get()  # Get depth frame
            depthFrame = inDepth.getFrame()
    
            videoIn = videoQueue.get()  # Get video frame
            frame = videoIn.getCvFrame()
    
            # Perform YOLOv8 inference on the frame
            results = model(frame)
    
            # Annotate frame with YOLO predictions
            annotated_frame = np.squeeze(results.render())
    
            # Process detections and calculate spatial information
            for box in results.boxes:
                # Coordinates of the bounding box (normalized)
                x1, y1, x2, y2 = box.xyxy[0].tolist()
                
                # Calculate spatial information for the ROI
                spatials = hostSpatials.calc_spatials(inDepth, (x1, y1, x2, y2))
    
                # Display spatial information on the frame
                cv2.putText(annotated_frame, f"Z: {spatials['z']/1000:.2f}m", (int(x1), int(y1)), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)
    
            # Display the annotated frame
            cv2.imshow("YOLOv8 Inference", annotated_frame)
    
            if cv2.waitKey(1) == ord('q'):
                break
    
    cv2.destroyAllWindows()

    Thoughts?