I'm working on a DepthAI project using an OAK-D 4 Pro camera, and I'm trying to run a custom person detection model called scrfd_person.rvc4.tar.xz. I followed the official RVC4 documentation and used the code example that loads an NN Archive with dai.NNArchive() and passes it to a SpatialDetectionNetwork. But when I run the code, I get an error saying Unsupported parser: SCRFDParser. I'm not sure why this is happening since I thought the archive would include everything needed. Has anyone else run into this with custom models from the NN Archive? Do I need to do something different to make the SCRFD model work?

https://rvc4.docs.luxonis.com/software/ai-inference/inference/

#!/usr/bin/env python3

from pathlib import Path

import sys

import cv2

import depthai as dai

import numpy as np

import time

nn_archive = dai.NNArchive("/home/oshkosh-host/Downloads/scrfd_person.rvc4.tar.xz")

FPS = 30

class SpatialVisualizer(dai.node.HostNode):

def __init__(self):

    dai.node.HostNode.__init__(self)

    self.sendProcessingToPipeline(True)

def build(self, depth:dai.Node.Output, detections: dai.Node.Output, rgb: dai.Node.Output):

    self.link_args(depth, detections, rgb) # Must match the inputs to the process method

def process(self, depthPreview, detections, rgbPreview):

    depthPreview = depthPreview.getCvFrame()

    rgbPreview = rgbPreview.getCvFrame()

    depthFrameColor = self.processDepthFrame(depthPreview)

    self.displayResults(rgbPreview, depthFrameColor, detections.detections)

def processDepthFrame(self, depthFrame):

    depth_downscaled = depthFrame[::4]

    if np.all(depth_downscaled == 0):

        min_depth = 0

    else:

        min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)

    max_depth = np.percentile(depth_downscaled, 99)

    depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)

    return cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)

def displayResults(self, rgbFrame, depthFrameColor, detections):

    height, width, _ = rgbFrame.shape

    for detection in detections:

        self.drawBoundingBoxes(depthFrameColor, detection)

        self.drawDetections(rgbFrame, detection, width, height)

    cv2.imshow("depth", depthFrameColor)

    cv2.imshow("rgb", rgbFrame)

    if cv2.waitKey(1) == ord('q'):

        self.stopPipeline()

def drawBoundingBoxes(self, depthFrameColor, detection):

    roiData = detection.boundingBoxMapping

    roi = roiData.roi

    roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])

    topLeft = roi.topLeft()

    bottomRight = roi.bottomRight()

    cv2.rectangle(depthFrameColor, (int(topLeft.x), int(topLeft.y)), (int(bottomRight.x), int(bottomRight.y)), (255, 255, 255), 1)

def drawDetections(self, frame, detection, frameWidth, frameHeight):

    x1 = int(detection.xmin \* frameWidth)

    x2 = int(detection.xmax \* frameWidth)

    y1 = int(detection.ymin \* frameHeight)

    y2 = int(detection.ymax \* frameHeight)

    try:

        label = self.labelMap[detection.label]  # Ensure labelMap is accessible

    except IndexError:

        label = detection.label

    color = (255, 255, 255)

    cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)

    cv2.putText(frame, "{:.2f}".format(detection.confidence \* 100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)

    cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)

    cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)

    cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)

    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 1)

# Creates the pipeline and a default device implicitly

with dai.Pipeline() as p:

# Define sources and outputs

camRgb = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_A)

monoLeft = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_B)

monoRight = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_C)

stereo = p.create(dai.node.StereoDepth)

spatialDetectionNetwork = p.create(dai.node.SpatialDetectionNetwork).build(camRgb, stereo, nn_archive, fps=FPS)

visualizer = p.create(SpatialVisualizer)

# setting node configs

stereo.setExtendedDisparity(True)

platform = p.getDefaultDevice().getPlatform()

if platform == dai.Platform.RVC2:

    # For RVC2, width must be divisible by 16

    stereo.setOutputSize(640, 400)

spatialDetectionNetwork.input.setBlocking(False)

spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)

spatialDetectionNetwork.setDepthLowerThreshold(100)

spatialDetectionNetwork.setDepthUpperThreshold(5000)

# Linking

monoLeft.requestOutput((640, 400)).link(stereo.left)

monoRight.requestOutput((640, 400)).link(stereo.right)

visualizer.labelMap = spatialDetectionNetwork.getClasses()

visualizer.build(stereo.depth, spatialDetectionNetwork.out, spatialDetectionNetwork.passthrough)

p.run()

This is the code that I running what should I fix to run that tar.xw file

Hi, glad to see you using your OAK-D 4 Pro camera!
The reason why the SCRFD Person model doesn't work when used in SpatialDetectionNetwork is because they aren't compatible with eachother. The SpatialDetectionNetwork can be currently used only with SSD or YOLO models (e.g. YoloV6-nano). This is because the node uses DAI native DetectionNetwork underneath where as SCRFD model needs a different kind of postprocessing. So there are two paths you can go:

  • If you want to perform spatial calculation on the device I would recommend you use SpatialDetectionNetwork in a combination with YOLOv6 model from our ZOO and take only detections that have "person" class.
  • The other option is you use SCRFD Person model in a pipeline like this, merge the detections with stereo depth and get the spatial coordinates then on host.

Let me know if you have any questions or issues.
Best,
Klemen

Hello,

Thank you so much for your response, your guidance was very helpful.

2 months later

I am trying to create an archive for for my model and this is the code
from luxonis_ml.nn_archive import ArchiveGenerator

from luxonis_ml.nn_archive.config_building_blocks import (DataType, InputType)

from luxonis_ml.nn_archive.config import CONFIG_VERSION

labels = ["a", "b", "c", "d", "e", "f"]

config = {

"config_version": CONFIG_VERSION,

"model": {

    "metadata": {

        "name":      "last",

        "path":      "last.onnx",

        "precision": DataType.FLOAT32,

    },

    "inputs": [{

        "name":       "images",

        "dtype":      DataType.FLOAT32,

        "input_type": InputType.IMAGE,

        "shape":      [1, 3, 896, 896],

        "layout":     "NCHW",

        "preprocessing": {

            "mean":   [0.0, 0.0, 0.0],

            "scale":  [255.0, 255.0, 255.0]

        },

    }],

    "outputs": [{

        "name":  "output0",

        "dtype": DataType.FLOAT32,

        "shape": [1, 10, 16464],

    }],

    "heads": [{

        "parser": "DetectionParser",

        "metadata": {

            "postprocessor_path": None,

            "conf_threshold": 0.50,

            "iou_threshold":  0.50,

            "max_det":        300,

            "label_names":    labels

        },

        "outputs": ["output0"],

    }],

},

}

archive = ArchiveGenerator(

archive_name      = "last",

save_path         = "./",

cfg_dict          = config,

executables_paths = ["last.onnx"],

)

archive_path = archive.make_archive()

is this correct I am facing this error in here please help me resolve this its very important project

parsers[index] = pipeline.create(parser).build(head_config)

TypeError: DetectionParser.init() missing 3 required positional arguments: 'conf_threshold', 'iou_threshold', and 'max_det'
the error begins here nn_with_parser = pipeline.create(ParsingNeuralNetwork).build(

@jakaskerl

    Hi @KevinSani,

    the creation of the NN Archive seems to be correct.

    parsers[index] = pipeline.create(parser).build(head_config)

    What do you provide here as head_config? I don't see this specified in your message.

    To simplify things, why don't you use the ParsingNeuralNetwork to automatically load the parser? The whole point of creating the NN Archive is that the parser creation step is abstracted. You can consult a simple example of a pipeline using the ParsingNeuralNetwork here. For example, you could do something as follows:

    input_node = pipeline.create(dai.node.Camera).build()
    nn_archive = dai.NNArchive("path/to/your/archive.tar.xz")
    nn_with_parser = pipeline.create(ParsingNeuralNetwork).build(
    input_node, nn_archive
    )

    Hello @jakob I know what was the error the detection parser I am using was an old version but my model doesn't support detection parser as it needs 2 outputs, my output is a single tensor with 10 values so I am trying to use the YoloExtended parser are there any examples to using that particular parser on a custom model

    6 days later

    Hi @KevinSani,

    not sure I understand what exactly you need. But if looking for a YOLOExtendedParser example, check out our yolo-p example.