Hello, I trained a model for detecting an object. I used YOLOv7 for training. Then I transformed the weights file(best.pt) to blob file by using this tool: https://tools.luxonis.com/ . As an initial step, I tried to use this blob file in one of the example codes which is: depthai-python/examples/SpatialDetection/spatial_mobilenet.py. I just changed the model file, Labels, and preview size. Then the code became like this: from pathlib import Path import sys import cv2 import depthai as dai import numpy as np import time ''' Spatial detection network demo. Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to the center of depth map. if(window.hljsLoader && !document.currentScript.parentNode.hasAttribute('data-s9e-livepreview-onupdate')) { window.hljsLoader.highlightBlocks(document.currentScript.parentNode); } ''' # Get argument first nnBlobPath = str((Path( file ).parent / Path('/home/apakgrup/depthai-python/examples/spatial/best_openvino_2022.1_6shave.blob')).resolve().absolute()) if len(sys.argv) > 1: nnBlobPath = sys.argv[1] if(window.hljsLoader && !document.currentScript.parentNode.hasAttribute('data-s9e-livepreview-onupdate')) { window.hljsLoader.highlightBlocks(document.currentScript.parentNode); } if not Path(nnBlobPath).exists(): import sys raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"') if(window.hljsLoader && !document.currentScript.parentNode.hasAttribute('data-s9e-livepreview-onupdate')) { window.hljsLoader.highlightBlocks(document.currentScript.parentNode); } # MobilenetSSD label texts labelMap = ["fire", "smoke"] syncNN = True # Create pipeline pipeline = dai.Pipeline() # Define sources and outputs camRgb = pipeline.create(dai.node.ColorCamera) spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork) monoLeft = pipeline.create(dai.node.MonoCamera) monoRight = pipeline.create(dai.node.MonoCamera) stereo = pipeline.create(dai.node.StereoDepth) xoutRgb = pipeline.create(dai.node.XLinkOut) xoutNN = pipeline.create(dai.node.XLinkOut) xoutDepth = pipeline.create(dai.node.XLinkOut) xoutRgb.setStreamName("rgb") xoutNN.setStreamName("detections") xoutDepth.setStreamName("depth") # Properties camRgb.setPreviewSize(640, 640) camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) camRgb.setInterleaved(False) camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR) monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) monoLeft.setCamera("left") monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) monoRight.setCamera("right") # Setting node configs stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) # Align depth map to the perspective of RGB camera, on which inference is done stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A) stereo.setSubpixel(True) stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight()) spatialDetectionNetwork.setBlobPath(nnBlobPath) spatialDetectionNetwork.setConfidenceThreshold(0.5) spatialDetectionNetwork.input.setBlocking(False) spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5) spatialDetectionNetwork.setDepthLowerThreshold(100) spatialDetectionNetwork.setDepthUpperThreshold(5000) # Linking monoLeft.out.link(stereo.left) monoRight.out.link(stereo.right) camRgb.preview.link(spatialDetectionNetwork.input) if syncNN: spatialDetectionNetwork.passthrough.link(xoutRgb.input) if(window.hljsLoader && !document.currentScript.parentNode.hasAttribute('data-s9e-livepreview-onupdate')) { window.hljsLoader.highlightBlocks(document.currentScript.parentNode); } else: camRgb.preview.link(xoutRgb.input) if(window.hljsLoader && !document.currentScript.parentNode.hasAttribute('data-s9e-livepreview-onupdate')) { window.hljsLoader.highlightBlocks(document.currentScript.parentNode); } spatialDetectionNetwork.out.link(xoutNN.input) stereo.depth.link(spatialDetectionNetwork.inputDepth) spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input) # Connect to device and start pipeline with dai.Device(pipeline) as device: # Output queues will be used to get the rgb frames and nn data from the outputs defined above previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False) detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False) depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False) startTime = time.monotonic() counter = 0 fps = 0 color = (255, 255, 255) while True: inPreview = previewQueue.get() inDet = detectionNNQueue.get() depth = depthQueue.get() counter+=1 current_time = time.monotonic() if (current_time - startTime) > 1 : fps = counter / (current_time - startTime) counter = 0 startTime = current_time frame = inPreview.getCvFrame() depthFrame = depth.getFrame() # depthFrame values are in millimeters depth_downscaled = depthFrame[::4] min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1) max_depth = np.percentile(depth_downscaled, 99) depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8) depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT) detections = inDet.detections # If the frame is available, draw bounding boxes on it and show the frame height = frame.shape[0] width = frame.shape[1] for detection in detections: roiData = detection.boundingBoxMapping roi = roiData.roi roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0]) topLeft = roi.topLeft() bottomRight = roi.bottomRight() xmin = int(topLeft.x) ymin = int(topLeft.y) xmax = int(bottomRight.x) ymax = int(bottomRight.y) cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1) # Denormalize bounding box x1 = int(detection.xmin \* width) x2 = int(detection.xmax \* width) y1 = int(detection.ymin \* height) y2 = int(detection.ymax \* height) try: label = labelMap[detection.label] except: label = detection.label cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) cv2.putText(frame, "{:.2f}".format(detection.confidence\*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255) cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), cv2.FONT_HERSHEY_SIMPLEX) cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, (255,255,255)) cv2.imshow("depth", depthFrameColor) cv2.imshow("preview", frame) if cv2.waitKey(1) == ord('q'): break if(window.hljsLoader && !document.currentScript.parentNode.hasAttribute('data-s9e-livepreview-onupdate')) { window.hljsLoader.highlightBlocks(document.currentScript.parentNode); } At the beginning, the code runs normally, then after few seconds it gives me this error:RuntimeError: Communication exception - possible device error/misconfiguration. Original message 'Couldn't read data from stream: 'detections' (X_LINK_ERROR)' the example code run normally, this error happened after making these modifications. Can someone help me? thank you

[unknown] I have put them in the same folder still not working. I knew that would not be the solution. It was just a trial.

RuntimeError: Communication exception

MhmdBarazi

Hello,

I trained a model for detecting an object. I used YOLOv7 for training. Then I transformed the weights file(best.pt) to blob file by using this tool: https://tools.luxonis.com/. As an initial step, I tried to use this blob file in one of the example codes which is: depthai-python/examples/SpatialDetection/spatial_mobilenet.py. I just changed the model file, Labels, and preview size. Then the code became like this:

from pathlib import Path

import sys

import cv2

import depthai as dai

import numpy as np

import time

'''

Spatial detection network demo.

Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to the center of depth map.

'''

# Get argument first

nnBlobPath = str((Path(file).parent / Path('/home/apakgrup/depthai-python/examples/spatial/best_openvino_2022.1_6shave.blob')).resolve().absolute())

if len(sys.argv) > 1:

nnBlobPath = sys.argv[1]

if not Path(nnBlobPath).exists():

import sys

raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')

# MobilenetSSD label texts

labelMap = ["fire", "smoke"]

syncNN = True

# Create pipeline

pipeline = dai.Pipeline()

# Define sources and outputs

camRgb = pipeline.create(dai.node.ColorCamera)

spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)

monoLeft = pipeline.create(dai.node.MonoCamera)

monoRight = pipeline.create(dai.node.MonoCamera)

stereo = pipeline.create(dai.node.StereoDepth)

xoutRgb = pipeline.create(dai.node.XLinkOut)

xoutNN = pipeline.create(dai.node.XLinkOut)

xoutDepth = pipeline.create(dai.node.XLinkOut)

xoutRgb.setStreamName("rgb")

xoutNN.setStreamName("detections")

xoutDepth.setStreamName("depth")

# Properties

camRgb.setPreviewSize(640, 640)

camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)

camRgb.setInterleaved(False)

camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)

monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)

monoLeft.setCamera("left")

monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)

monoRight.setCamera("right")

# Setting node configs

stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)

# Align depth map to the perspective of RGB camera, on which inference is done

stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)

stereo.setSubpixel(True)

stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())

spatialDetectionNetwork.setBlobPath(nnBlobPath)

spatialDetectionNetwork.setConfidenceThreshold(0.5)

spatialDetectionNetwork.input.setBlocking(False)

spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)

spatialDetectionNetwork.setDepthLowerThreshold(100)

spatialDetectionNetwork.setDepthUpperThreshold(5000)

# Linking

monoLeft.out.link(stereo.left)

monoRight.out.link(stereo.right)

camRgb.preview.link(spatialDetectionNetwork.input)

if syncNN:

spatialDetectionNetwork.passthrough.link(xoutRgb.input)

else:

camRgb.preview.link(xoutRgb.input)

spatialDetectionNetwork.out.link(xoutNN.input)

stereo.depth.link(spatialDetectionNetwork.inputDepth)

spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)

# Connect to device and start pipeline

with dai.Device(pipeline) as device:

# Output queues will be used to get the rgb frames and nn data from the outputs defined above

previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)

detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)

depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)

startTime = time.monotonic()

counter = 0

fps = 0

color = (255, 255, 255)

while True:

    inPreview = previewQueue.get()

    inDet = detectionNNQueue.get()

    depth = depthQueue.get()

    counter+=1

    current_time = time.monotonic()

    if (current_time - startTime) > 1 :

        fps = counter / (current_time - startTime)

        counter = 0

        startTime = current_time

    frame = inPreview.getCvFrame()

    depthFrame = depth.getFrame() # depthFrame values are in millimeters

    depth_downscaled = depthFrame[::4]

    min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)

    max_depth = np.percentile(depth_downscaled, 99)

    depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)

    depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)

    detections = inDet.detections

    # If the frame is available, draw bounding boxes on it and show the frame

    height = frame.shape[0]

    width  = frame.shape[1]

    for detection in detections:

        roiData = detection.boundingBoxMapping

        roi = roiData.roi

        roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])

        topLeft = roi.topLeft()

        bottomRight = roi.bottomRight()

        xmin = int(topLeft.x)

        ymin = int(topLeft.y)

        xmax = int(bottomRight.x)

        ymax = int(bottomRight.y)

        cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1)

        # Denormalize bounding box

        x1 = int(detection.xmin \* width)

        x2 = int(detection.xmax \* width)

        y1 = int(detection.ymin \* height)

        y2 = int(detection.ymax \* height)

        try:

            label = labelMap[detection.label]

        except:

            label = detection.label

        cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)

        cv2.putText(frame, "{:.2f}".format(detection.confidence\*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)

        cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)

        cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)

        cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)

        cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), cv2.FONT_HERSHEY_SIMPLEX)

    cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, (255,255,255))

    cv2.imshow("depth", depthFrameColor)

    cv2.imshow("preview", frame)

    if cv2.waitKey(1) == ord('q'):

        break

At the beginning, the code runs normally, then after few seconds it gives me this error:RuntimeError: Communication exception - possible device error/misconfiguration. Original message 'Couldn't read data from stream: 'detections' (X_LINK_ERROR)'

the example code run normally, this error happened after making these modifications. Can someone help me? thank you

jakaskerl

Hi MhmdBarazi
Could you swap your blob for the original blob to see if the model is causing the problems. Are you able to retrieve detections and preview the frame before the error occurs?

Thanks,
Jaka

MhmdBarazi

jakaskerl Hello,
Yes I tested the original code(with the original blob), it worked without any issues. I also tested the model before transformation(as pt file), it was detecting some results on google colab.

Concerning detection and preview, before getting the error message the camera view screen didn't even appear.

thank you for your reply,

jakaskerl

Hi MhmdBarazi
Seems like something is wrong with the conversion process most likely. Could you upload the .pt file (before conversion) here so we can see what the problem could be?

Thanks,
Jaka

MhmdBarazi

jakaskerl Hello, I attached my weights file link below. please let me know if something else can help you figure out this error. Thank you so much for your efforts.

MhmdBarazi

jakaskerl Could you access the file?

erik

MhmdBarazi I couldn't, could you make it accessible to anyone?

MhmdBarazi

erik

Could you access it now? 71MB file I have attached a smaller one firstly.

MhmdBarazi

Do I need the file (best.json) to run the blob file? there are also another files like: best.bin, best-simplified.on, best.xml.
I need to figure out the main reason for this.

MhmdBarazi

MhmdBarazi I've put them in the same folder, but it is still not working with the same error message. I know it would not be a solution, it was just a trial.

MhmdBarazi

can someone help me please it is urgent

MhmdBarazi

just an update. I built a yolov5 model, then I converted the file using the listed steps here: https://colab.research.google.com/github/luxonis/depthai-ml-training/blob/master/colab-notebooks/YoloV5_training.ipynb#scrollTo=g1eUou1F3O1z

I did every single step successfully, when I run the final code, I got this error:

for the conversion process I entered a train code on the google colab like this:

then, the converted file:

Any help please?

jakaskerl

Hi MhmdBarazi
Looks like you incorrectly specified the blob path, that's why it's trying to pull one from artefacts. Change to code to fix this.

Thanks,
Jaka

MhmdBarazi

jakaskerl Hello

When the blob path is incorrect it gives an error like this:

FileNotFoundError: Required file/s not found, please run "/usr/bin/python3 install_requirements.py"

Furthermore, I defined the path in the same way as the original one is. Thank you

jakaskerl

Hi MhmdBarazi
Hmm, might be an outdated version, could you check that please.

Thanks,
Jaka

MhmdBarazi

jakaskerl Hello,

I changed the file name from best_openvino_2022.1_6shave.blob to best_openvino_2021.4_6shave.blob. Then I changed the whole line of opening path like this:

nnBlobPath = str((os.path.dirname(os.path.abspath("file")) / Path('/home/apakgrup/depthai-python/examples/spatial/best_openvino_2021.4_6shave.blob')).resolve().absolute())

Unfortunately, still not work. But the error has changed like this:

[194430105190FE1200] [1.1.2] [3.951] [SpatialDetectionNetwork(1)] [error] ROI x:0.66726685 y:0.8051758 width:0.034057617 height:0 is not a valid rectangle.

[194430105190FE1200] [1.1.2] [3.952] [SpatialDetectionNetwork(1)] [error] ROI x:0.716774 y:0.9003906 width:0.03199768 height:0 is not a valid rectangle.

[194430105190FE1200] [1.1.2] [4.149] [XLinkOut(6)] [error] Message has too much metadata (665313B) to serialize. Maximum is 51200B. Dropping message

[194430105190FE1200] [1.1.2] [4.472] [system] [critical] Fatal error. Please report to developers. Log: 'TlsfMemoryManager' '255'

Traceback (most recent call last):

File "<string>", line 103, in <module>

RuntimeError: Communication exception - possible device error/misconfiguration. Original message 'Couldn't read data from stream: 'detections' (X_LINK_ERROR)'

Any help ,please jakaskerl @erik

Thanks

erik

MhmdBarazi What you shared above (result(1).zip) works for me.
python3 main.py --config barazi/barazi.json (from depthai-experiments/gen2-yolo/device-decoding), I just removed the Spatial=True inside the create_nn, to improve the FPS.

MhmdBarazi

erik thank you so much, But the problem is that I need spatial data(x,y,z) actually that is why I choose OAK device because I can locate the fire. Can I merge between fire detection and spatial data?

erik

Yes, you can. I am using latest develop version of depthai-sdk, and have limited FPS to 8:

from depthai_sdk import OakCamera, ArgsParser
import argparse

def a(packet):
    print(packet.detections)
# parse arguments
parser = argparse.ArgumentParser()
parser.add_argument("-conf", "--config", help="Trained YOLO json config path", default='model/yolo.json', type=str)
args = ArgsParser.parseArgs(parser)

with OakCamera(args=args) as oak:
    stereo = oak.stereo(fps=8)
    color = oak.create_camera('color', fps=8)
    nn = oak.create_nn(args['config'], color, nn_type='yolo', spatial=stereo)
    oak.visualize(nn, fps=True, scale=2/3)
    oak.callback(nn.out.passthrough, a)
    oak.start(blocking=True)

MhmdBarazi

erik It worked, thank you so much, just last thing, you have used a different code, the code I have uploaded doesn't work at all it means. Does it? I want to get this spatial data to use it in another code. That is why am asking. thank you so much for helping me out.

erik

MhmdBarazi I don't now, the code you used wasn't MRE and was badly formatted, so I haven't dug into it trying to debug it.