TinyYOLOv7 with spatial information

Aix

Hello,

I am trying to develop a method to tree mensuration using depth information. I have a tiny yolo v7 model trained, I am trying to build a script inspired from the spatial with yolo example you have. How ever I mostly get something like this:
Traceback (most recent call last):

File "pipeline.py", line 128, in <module>

inRgb = qRgb.get()

RuntimeError: Communication exception - possible device error/misconfiguration. Original message 'Couldn't read data from stream: 'rgb' (X_LINK_ERROR)'

from what I understand this maybe due to the bandwidth limitation, so I tried to decrease the fps down to 2, however a similar problem occurs , where the cv preview get stuck.

I am using OAK-D with usb 3 connection.

The input to the model takes 640x640. Would you have any suggestions on this matter?

erik

Hi Aix ,
Please provide full MRE. This sounds like either Pipeline issue, or Connectivity issue, so please provide the information for both.
Thanks, Erik

Aix

Hello erik,

I am providing the MRE with the converted model and the pipeline creation script (please find the yolo7 model files through this repo: https://github.com/Alp-1/oakdyolov7) I would appreciate your comments.
Thank you.

Pipeline Creation Script:

import sys
import cv2
import depthai as dai
import numpy as np
import time


nnBlobPath = "best_openvino_2022.1_4shave.blob"

labelMap = ["trunk"]

syncNN = True

# Create pipeline
pipeline = dai.Pipeline()

# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
spatialDetectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork)
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
nnNetworkOut = pipeline.create(dai.node.XLinkOut)

xoutRgb = pipeline.create(dai.node.XLinkOut)
xoutNN = pipeline.create(dai.node.XLinkOut)
xoutDepth = pipeline.create(dai.node.XLinkOut)

xoutRgb.setStreamName("rgb")
xoutNN.setStreamName("detections")
xoutDepth.setStreamName("depth")
nnNetworkOut.setStreamName("nnNetwork")

# Properties
camRgb.setPreviewSize(640, 640)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP)
camRgb.setInterleaved(False)
camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
camRgb.setFps(2)

monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)

# setting node configs
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
# Align depth map to the perspective of RGB camera, on which inference is done
stereo.setDepthAlign(dai.CameraBoardSocket.RGB)
stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())

spatialDetectionNetwork.setBlobPath(nnBlobPath)
spatialDetectionNetwork.setConfidenceThreshold(0.5)
spatialDetectionNetwork.input.setBlocking(False)
spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
spatialDetectionNetwork.setDepthLowerThreshold(100)
spatialDetectionNetwork.setDepthUpperThreshold(5000)

# Yolo specific parameters
spatialDetectionNetwork.setNumClasses(len(labelMap))
spatialDetectionNetwork.setCoordinateSize(4)
spatialDetectionNetwork.setAnchors([10.0,
                13.0,
                16.0,
                30.0,
                33.0,
                23.0,
                30.0,
                61.0,
                62.0,
                45.0,
                59.0,
                119.0,
                116.0,
                90.0,
                156.0,
                198.0,
                373.0,
                326.0])
spatialDetectionNetwork.setAnchorMasks({"side80": [
                    0,
                    1,
                    2
                ],
                "side40": [
                    3,
                    4,
                    5
                ],
                "side20": [
                    6,
                    7,
                    8
                ]})
spatialDetectionNetwork.setIouThreshold(0.5)

# Linking
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)

camRgb.preview.link(spatialDetectionNetwork.input)
if syncNN:
    spatialDetectionNetwork.passthrough.link(xoutRgb.input)
else:
    camRgb.preview.link(xoutRgb.input)

spatialDetectionNetwork.out.link(xoutNN.input)

stereo.depth.link(spatialDetectionNetwork.inputDepth)
spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
spatialDetectionNetwork.outNetwork.link(nnNetworkOut.input)

# Connect to device and start pipeline
with dai.Device(pipeline) as device:

    # Output queues will be used to get the rgb frames and the spatial detection data from the outputs defined above
    qRgb = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
    qDet = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
    qDepth = device.getOutputQueue(name="depth", maxSize=4, blocking=False)

    frame = None
    detections = []
    startTime = time.monotonic()
    counter = 0

    while True:
        if syncNN:
            inRgb = qRgb.get()
            inDet = qDet.get()
            inDepth = qDepth.get()

            frame = inRgb.getCvFrame()
            depthFrame = inDepth.getFrame()
            detections = inDet.detections
        else:
            inRgb = qRgb.tryGet()
            inDet = qDet.tryGet()
            inDepth = qDepth.tryGet()

            if inRgb is not None:
                frame = inRgb.getCvFrame()

            if inDet is not None:
                detections = inDet.detections

            if inDepth is not None:
                depthFrame = inDepth.getFrame()

        if frame is not None:
            # Display the resulting frame
            for detection in detections:
                x1, y1, x2, y2 = int(detection.xmin), int(detection.ymin), int(detection.xmax), int(detection.ymax)
                centerX = int((detection.xmin + detection.xmax) / 2)
                centerY = int((detection.ymin + detection.ymax) / 2)

                if detection.label < len(labelMap):
                    label = labelMap[detection.label]
                else:
                    label = "unknown"

                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                cv2.circle(frame, (centerX, centerY), 2, (0, 0, 255), 2)

                # Get depth value for the center point of the object
                depth_value = depthFrame[centerY, centerX]
                cv2.putText(frame, f"Depth: {depth_value}mm", (x1, y2 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

            cv2.imshow("Spatial Detection", frame)

        counter += 1
        if (time.monotonic() - startTime) > 1:
            print(f"FPS: {counter / (time.monotonic() - startTime)}")
            counter = 0
            startTime = time.monotonic()

        # Press 'q' to exit the loop
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Release resources and close windows
cv2.destroyAllWindows()

jakaskerl

Hi Aix
It doesn't seem like it's a pipeline issue since everything works find on my side with OAK-D Pro. Like Erik said, looks like a connectivity issue. Can you try to isolate the part of the code that causes the issue? I'd also suggest reinstalling/updating depthai to latest version (I tested it on the current main branch).

Thanks,
Jaka

Aix

Hello jakaskerl

I tried to run it on a fresh install on Raspberry Pi this time around instead of Ubuntu, instead of getting the Trackback now the program runs and freezes. Have this behaviour has been present with typical connectivity issues prior?

Thank you

erik

Hi Aix , once again, please follow Requesting support guidelines.