Error when deploy Roboflow model to depthai_sdk

KhnhTrungTrn · Feb 15, 2024

Hi,

I'm using this demo here to deploy a model trained from Roboflow but an error show up like this

{

"exit_code": 1,

"message": "Command failed with exit code 1, command: /opt/intel/openvino2021_4/deployment_tools/inference_engine/lib/intel64/myriad_compile -m /tmp/blobconverter/3ff28ee84d3c414bac9cfbd7531346d9/ripetomatodetection_1/FP16/ripetomatodetection_1.xml -o /tmp/blobconverter/3ff28ee84d3c414bac9cfbd7531346d9/ripetomatodetection_1/FP16/ripetomatodetection_1.blob -c /tmp/blobconverter/3ff28ee84d3c414bac9cfbd7531346d9/myriad_compile_config.txt -ip U8",

"stderr": "Unknown model format! Cannot find reader for model format: xml and read the model: /tmp/blobconverter/3ff28ee84d3c414bac9cfbd7531346d9/ripetomatodetection_1/FP16/ripetomatodetection_1.xml. Please check that reader library exists in your PATH.\\n",

"stdout": "Inference Engine: \\n\\tIE version ......... 2021.4.2\\n\\tBuild ........... 2021.4.2-3974-e2a469a3450-releases/2021/4\\n"

}

Raise HTTPError(http_error_msg, response=self)

requests.exceptions.HTTPError: 400 Client Error: BAD REQUEST for url: https://blobconverter.luxonis.com/compile?version=2021.4&no_cache=False

How to fix it? Thank you very much!

jakaskerl · Feb 15, 2024

Hi @KhnhTrungTrn
What model are you trying to convert? Only object detection models work with OAK devices.

Thanks,
Jaka

KhnhTrungTrn · Feb 16, 2024

I trained an object detection model on Roboflow

Model Type: Roboflow 3.0 Object Detection (Fast)

Checkpoint: COCOn

I also tried to use the "blobconverter.from_zoo()" to deploy the pt "face-detection-retail-0004" model from depthai zoo and it's kind of showing the similar error

Model face-detection-retail-0004 not found in model zoo

Conversion failed due to 400 Client Error: BAD REQUEST for url: https://blobconverter.luxonis.com/compile?version=2022.1&no_cache=False

Trying to find backup... (model="face-detection-retail-0004", shaves="6", version="2022.1")

Unable to fetch model from backup server due to: 404 Client Error: Not Found for url: https://artifacts.luxonis.com/artifactory/blobconverter-backup/blobs/face-detection-retail-0004_openvino_2022.1_6shave.blob

jakaskerl · Feb 16, 2024

Hi @KhnhTrungTrn
Looks like the precompiled blobs are available only for openvino version 2021.4.
I'd suggest manually compiling by going to https://blobconverter.luxonis.com/

Thanks,
Jaka

KhnhTrungTrn · Feb 16, 2024

If I have already had a blob file for my own custom model ( I convert from yolov8 .pt file), then how can I simply deploy it with depth measurement ? Is there any example ?

KhnhTrungTrn · Feb 16, 2024

I have tried this code to deploy my blob file but it did not work, the camera frame froze. Though, my custom model worked perfectly when I deployed it with the depthai_demo.py code.

import cv2
import depthai as dai
import time
import blobconverter


FRAME_SIZE = (640, 360)
DET_INPUT_SIZE = (320, 320)

blob_path = "/home/trungtran/catkin_ws/src/task_exe/src/best.blob"

pipeline = dai.Pipeline()

# Define a source - color camera
cam = pipeline.createColorCamera()
cam.setPreviewSize(FRAME_SIZE[0], FRAME_SIZE[1])
cam.setInterleaved(False)
cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
cam.setBoardSocket(dai.CameraBoardSocket.RGB)

mono_left = pipeline.createMonoCamera()
mono_left.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
mono_left.setBoardSocket(dai.CameraBoardSocket.LEFT)
mono_right = pipeline.createMonoCamera()
mono_right.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
mono_right.setBoardSocket(dai.CameraBoardSocket.RIGHT)

	
stereo = pipeline.createStereoDepth()
mono_left.out.link(stereo.left)
mono_right.out.link(stereo.right)


# Define a neural network model
tomato_spac_det_nn = pipeline.createYoloSpatialDetectionNetwork()
tomato_spac_det_nn.setConfidenceThreshold(0.5)
tomato_spac_det_nn.setBlobPath(blob_path)
tomato_spac_det_nn.setDepthLowerThreshold(100)
tomato_spac_det_nn.setDepthUpperThreshold(5000)

tomato_det_manip = pipeline.createImageManip()
tomato_det_manip.initialConfig.setResize(DET_INPUT_SIZE[0], DET_INPUT_SIZE[1])
tomato_det_manip.setMaxOutputFrameSize(1244160)
tomato_det_manip.setWaitForConfigInput(True)
tomato_det_manip.initialConfig.setKeepAspectRatio(False)

cam.preview.link(face_det_manip.inputImage)
tomato_det_manip.out.link(tomato_spac_det_nn.input)
stereo.depth.link(tomato_spac_det_nn.inputDepth)
# Define the link between the color camera and the neural network
x_preview_out = pipeline.createXLinkOut()
x_preview_out.setStreamName("preview")
cam.preview.link(x_preview_out.input)
# rgb.preview.link(rgb_out.input)

# Create an output for the object detection results
det_out = pipeline.createXLinkOut()
det_out.setStreamName('det_out')
face_spac_det_nn.out.link(det_out.input)

def display_info(frame, bbox, coordinates, status, status_color, fps):
    # Display bounding box
    cv2.rectangle(frame, bbox, status_color[status], 2)
 
    # Display coordinates
    if coordinates is not None:
        coord_x, coord_y, coord_z = coordinates
        cv2.putText(frame, f"X: {int(coord_x)} mm", (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
        cv2.putText(frame, f"Y: {int(coord_y)} mm", (bbox[0] + 10, bbox[1] + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
        cv2.putText(frame, f"Z: {int(coord_z)} mm", (bbox[0] + 10, bbox[1] + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
 
    # Create background for showing details
    cv2.rectangle(frame, (5, 5, 175, 100), (50, 0, 0), -1)
 
    # Display authentication status on the frame
    cv2.putText(frame, status, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, status_color[status])
 
    # Display instructions on the frame
    cv2.putText(frame, f'FPS: {fps:.2f}', (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255))
# Frame count
frame_count = 0
 
# Placeholder fps value
fps = 0
 
# Used to record the time when we processed last frames
prev_frame_time = 0
 
# Used to record the time at which we processed current frames
new_frame_time = 0
 
# Set status colors
status_color = {
    'Tomato Detected': (0, 255, 0),
    'No Tomato Detected': (0, 0, 255)
}
# Create a DepthAI device and start the pipeline
with dai.Device(pipeline) as device:
 
    # Output queue will be used to get the right camera frames from the outputs defined above
    q_cam = device.getOutputQueue(name="preview", maxSize=4, blocking=False)
 
    # Output queue will be used to get nn data from the video frames.
    q_det = device.getOutputQueue(name="det_out")
 
    # # Output queue will be used to get nn data from the video frames.
    # q_bbox_depth_mapping = device.getOutputQueue(name="bbox_depth_mapping_out", maxSize=4, blocking=False)
 
    while True:
        # Get right camera frame
        in_cam = q_cam.get()
        frame = in_cam.getCvFrame()
 
        bbox = None
        coordinates = None
 
        inDet = q_det.tryGet()
 
        if inDet is not None:
            detections = inDet.detections
 
            # if face detected
            if len(detections) != 0:
                detection = detections[0]
 
                # Correct bounding box
                xmin = max(0, detection.xmin)
                ymin = max(0, detection.ymin)
                xmax = min(detection.xmax, 1)
                ymax = min(detection.ymax, 1)
 
                # Calculate coordinates
                x = int(xmin*FRAME_SIZE[0])
                y = int(ymin*FRAME_SIZE[1])
                w = int(xmax*FRAME_SIZE[0]-xmin*FRAME_SIZE[0])
                h = int(ymax*FRAME_SIZE[1]-ymin*FRAME_SIZE[1])
 
                bbox = (x, y, w, h)
 
                # Get spacial coordinates
                coord_x = detection.spatialCoordinates.x
                coord_y = detection.spatialCoordinates.y
                coord_z = detection.spatialCoordinates.z
 
                coordinates = (coord_x, coord_y, coord_z)
 
       
        if bbox:
            # Tomato detected
            status = 'Tomato Detected'
        else:
            # No tomato detected
            status = 'No Tomato Detected'
 
        # Display info on frame
        display_info(frame, bbox, coordinates, status, status_color, fps)
 
        # Calculate average fps
        if frame_count % 10 == 0:
            # Time when we finish processing last 100 frames
            new_frame_time = time.time()
 
            # Fps will be number of frame processed in one second
            fps = 1 / ((new_frame_time - prev_frame_time)/10)
            prev_frame_time = new_frame_time
 
        # Capture the key pressed
        key_pressed = cv2.waitKey(1) & 0xff
 
        # Stop the program if Esc key was pressed
        if key_pressed == 27:
            break
 
        # Display the final frame
        cv2.imshow("Face Cam", frame)
 
        # Increment frame count
        # frame_count += 1
 
cv2.destroyAllWindows()

jakaskerl · Feb 17, 2024

KhnhTrungTrn
You mean like YoloSpatialDetection? https://docs.luxonis.com/projects/api/en/latest/components/nodes/yolo_spatial_detection_network/

KhnhTrungTrn
Are you getting any errors? Could you try lowering the FPS to 5 to check if the NN is not able to run fast enough?

Thanks,
Jaka