I'm working on a DepthAI project using an OAK-D 4 Pro camera, and I'm trying to run a custom person detection model called scrfd_person.rvc4.tar.xz. I followed the official RVC4 documentation and used the code example that loads an NN Archive with dai.NNArchive() and passes it to a SpatialDetectionNetwork. But when I run the code, I get an error saying Unsupported parser: SCRFDParser. I'm not sure why this is happening since I thought the archive would include everything needed. Has anyone else run into this with custom models from the NN Archive? Do I need to do something different to make the SCRFD model work?

https://rvc4.docs.luxonis.com/software/ai-inference/inference/

#!/usr/bin/env python3

from pathlib import Path

import sys

import cv2

import depthai as dai

import numpy as np

import time

nn_archive = dai.NNArchive("/home/oshkosh-host/Downloads/scrfd_person.rvc4.tar.xz")

FPS = 30

class SpatialVisualizer(dai.node.HostNode):

def __init__(self):

    dai.node.HostNode.__init__(self)

    self.sendProcessingToPipeline(True)

def build(self, depth:dai.Node.Output, detections: dai.Node.Output, rgb: dai.Node.Output):

    self.link_args(depth, detections, rgb) # Must match the inputs to the process method

def process(self, depthPreview, detections, rgbPreview):

    depthPreview = depthPreview.getCvFrame()

    rgbPreview = rgbPreview.getCvFrame()

    depthFrameColor = self.processDepthFrame(depthPreview)

    self.displayResults(rgbPreview, depthFrameColor, detections.detections)

def processDepthFrame(self, depthFrame):

    depth_downscaled = depthFrame[::4]

    if np.all(depth_downscaled == 0):

        min_depth = 0

    else:

        min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)

    max_depth = np.percentile(depth_downscaled, 99)

    depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)

    return cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)

def displayResults(self, rgbFrame, depthFrameColor, detections):

    height, width, _ = rgbFrame.shape

    for detection in detections:

        self.drawBoundingBoxes(depthFrameColor, detection)

        self.drawDetections(rgbFrame, detection, width, height)

    cv2.imshow("depth", depthFrameColor)

    cv2.imshow("rgb", rgbFrame)

    if cv2.waitKey(1) == ord('q'):

        self.stopPipeline()

def drawBoundingBoxes(self, depthFrameColor, detection):

    roiData = detection.boundingBoxMapping

    roi = roiData.roi

    roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])

    topLeft = roi.topLeft()

    bottomRight = roi.bottomRight()

    cv2.rectangle(depthFrameColor, (int(topLeft.x), int(topLeft.y)), (int(bottomRight.x), int(bottomRight.y)), (255, 255, 255), 1)

def drawDetections(self, frame, detection, frameWidth, frameHeight):

    x1 = int(detection.xmin \* frameWidth)

    x2 = int(detection.xmax \* frameWidth)

    y1 = int(detection.ymin \* frameHeight)

    y2 = int(detection.ymax \* frameHeight)

    try:

        label = self.labelMap[detection.label]  # Ensure labelMap is accessible

    except IndexError:

        label = detection.label

    color = (255, 255, 255)

    cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)

    cv2.putText(frame, "{:.2f}".format(detection.confidence \* 100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)

    cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)

    cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)

    cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color)

    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 1)

# Creates the pipeline and a default device implicitly

with dai.Pipeline() as p:

# Define sources and outputs

camRgb = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_A)

monoLeft = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_B)

monoRight = p.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_C)

stereo = p.create(dai.node.StereoDepth)

spatialDetectionNetwork = p.create(dai.node.SpatialDetectionNetwork).build(camRgb, stereo, nn_archive, fps=FPS)

visualizer = p.create(SpatialVisualizer)

# setting node configs

stereo.setExtendedDisparity(True)

platform = p.getDefaultDevice().getPlatform()

if platform == dai.Platform.RVC2:

    # For RVC2, width must be divisible by 16

    stereo.setOutputSize(640, 400)

spatialDetectionNetwork.input.setBlocking(False)

spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)

spatialDetectionNetwork.setDepthLowerThreshold(100)

spatialDetectionNetwork.setDepthUpperThreshold(5000)

# Linking

monoLeft.requestOutput((640, 400)).link(stereo.left)

monoRight.requestOutput((640, 400)).link(stereo.right)

visualizer.labelMap = spatialDetectionNetwork.getClasses()

visualizer.build(stereo.depth, spatialDetectionNetwork.out, spatialDetectionNetwork.passthrough)

p.run()

This is the code that I running what should I fix to run that tar.xw file

Hi, glad to see you using your OAK-D 4 Pro camera!
The reason why the SCRFD Person model doesn't work when used in SpatialDetectionNetwork is because they aren't compatible with eachother. The SpatialDetectionNetwork can be currently used only with SSD or YOLO models (e.g. YoloV6-nano). This is because the node uses DAI native DetectionNetwork underneath where as SCRFD model needs a different kind of postprocessing. So there are two paths you can go:

  • If you want to perform spatial calculation on the device I would recommend you use SpatialDetectionNetwork in a combination with YOLOv6 model from our ZOO and take only detections that have "person" class.
  • The other option is you use SCRFD Person model in a pipeline like this, merge the detections with stereo depth and get the spatial coordinates then on host.

Let me know if you have any questions or issues.
Best,
Klemen

Hello,

Thank you so much for your response, your guidance was very helpful.