Two Stage Object Detection

Aawetzel · Sep 15, 2022

I’ve been attempting to get this two-stage neural network model working and I have been simplifying but nothing works. It appears to get 4 frames in and then hangs up. I have attempted using the loopback approach and other sync approaches as found in different online examples and experiments. Would you mind looking at the simplified pipeline I am running and let me know if anything is broken, or if it is simply hanging up because the second network is too slow?

from os.path import dirname
import depthai as dai
import cv2


def frame_norm(img, bounding_box):
    norm_vals = np.full(len(bounding_box), img.shape[0])
    norm_vals[::2] = img.shape[1]
    return (np.clip(np.array(bounding_box), 0, 1) * norm_vals).astype(int)


PATH = os.path.abspath(os.getcwd())
anchors = [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0]
masks = {"side40": [0, 1, 2], "side20": [3, 4, 5], "side10": [6, 7, 8]}

# Create Pipeline
pipeline = dai.Pipeline()

# Create Cam
cam = pipeline.create(dai.node.ColorCamera)
cam.setPreviewSize(int(1920), int(1080))
cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
cam.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB)
cam.setInterleaved(False)
cam.setFps(10)

# Image Manip
manip = pipeline.createImageManip()
manip.initialConfig.setResize(320, 320)
manip.setMaxOutputFrameSize(320*320*3)
manip.initialConfig.setKeepAspectRatio(False)

# Create mouse Neural Network
mouse_nn = pipeline.create(dai.node.YoloDetectionNetwork)
mouse_nn.setBlob(os.path.join(PATH, 'mouse.blob'))
mouse_nn.setConfidenceThreshold(.7)
mouse_nn.setNumClasses(1)
mouse_nn.setCoordinateSize(4)
mouse_nn.setAnchors(anchors)
mouse_nn.setAnchorMasks(masks)

# Create scroll_wheel Neural Network
scroll_wheel_nn = pipeline.create(dai.node.YoloDetectionNetwork)
scroll_wheel_nn.setBlob(os.path.join(PATH, 'scroll.blob'))
scroll_wheel_nn.setConfidenceThreshold(.7)
scroll_wheel_nn.setNumClasses(1)
scroll_wheel_nn.setCoordinateSize(4)
scroll_wheel_nn.setAnchors(anchors)
scroll_wheel_nn.setAnchorMasks(masks)

# Create Script
script = pipeline.create(dai.node.Script)
script.setScript("""
def limit_roi(det):
    if det.xmin <= 0: det.xmin = 0.001
    if det.ymin <= 0: det.ymin = 0.001
    if det.xmax >= 1: det.xmax = 0.999
    if det.ymax >= 1: det.ymax = 0.999
    

while True:
    mouse_det = node.io['mouse'].get().detections
    for det in mouse_det:
        limit_roi(det)
        cfg = ImageManipConfig()
        cfg.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax)
        cfg.setResize(320, 320)
        cfg.setKeepAspectRatio(False)
        node.io['manip_cfg'].send(cfg)
        

""")


# Create scroll_wheel Manip
scroll_wheel_manip = pipeline.create(dai.node.ImageManip)
scroll_wheel_manip.initialConfig.setResize(320, 320)
scroll_wheel_manip.inputConfig.setWaitForMessage(True)

# Label
cam_x = pipeline.create(dai.node.XLinkOut)
cam_x.setStreamName('cam')
scroll_wheel_x = pipeline.create(dai.node.XLinkOut)
scroll_wheel_x.setStreamName('scroll_wheel')

# Link
cam.preview.link(cam_x.input)
scroll_wheel_nn.out.link(scroll_wheel_x.input)
cam.preview.link(manip.inputImage)
cam.preview.link(scroll_wheel_manip.inputImage)
manip.out.link(mouse_nn.input)  # manip to mouse_nn input
scroll_wheel_manip.out.link(scroll_wheel_nn.input)
mouse_nn.out.link(script.inputs['mouse'])
script.outputs['manip_cfg'].link(scroll_wheel_manip.inputConfig)  # sends manip_cfg to scroll_wheel input

with dai.Device(pipeline) as device:
    cam_q = device.getOutputQueue('cam', maxSize=4, blocking=False)
    scroll_wheel_q = device.getOutputQueue('scroll_wheel', maxSize=4, blocking=False)

    mouse_color = (0, 255, 0)
    scroll_wheel_color = (255, 0, 255)

    while True:
        frame = cam_q.get().getCvFrame()
        scroll_wheel_det = scroll_wheel_q.get().detections

        for detection in scroll_wheel_det:
            bbox = frame_norm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
            print(f'BBOX: {bbox}')

`

erik · Sep 15, 2022

Hi awetzel ,
ImageManip(5).inputImage is blocking, which means that it will take 4 frames and block because it doesn't get any config (for config you need detections). A simple solution would be to set InputImage to non-blocking and queue size of 1 (so it's always the latest image), even better would be proper 2-stage NN (with frame-detection syncing).

scroll_wheel_manip.inputImage.setBlocking(False)
scroll_wheel_manip.inputImage.setQueueSize(1)

This should fix the issue.
Thanks, Erik

Aawetzel · Sep 16, 2022

That makes a lot of sense and fixed it. I was unable to get the frame syncing to work, so I figured I'd get the simplest possible model working with two-stage object detection and then work my way back up.