Hi,

I am using an Oak-d pro with IR and trying to make yolo work, right now I have this code:

#!/usr/bin/env python3

import cv2
import depthai as dai
import time
import numpy as np

if 1:  # PoE config
    fps = 30
    res = dai.MonoCameraProperties.SensorResolution.THE_400_P
    poolSize = 24  # default 3, increased to prevent desync
else:  # USB
    fps = 30
    res = dai.MonoCameraProperties.SensorResolution.THE_720_P
    poolSize = 8  # default 3, increased to prevent desync
    
def frameNorm(frame, bbox):
    normVals = np.full(len(bbox), frame.shape[0])
    normVals[::2] = frame.shape[1]
    return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)

# Path al modelo
pathBase = ""
pathYoloBlob = pathBase + "/yolov8n_openvino_2022.1_6shave.blob"

# Diccionario de yolo
translate = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorbike', 4: 'aeroplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'sofa', 58: 'pottedplant', 59: 'bed', 60: 'diningtable', 61: 'toilet', 62: 'tvmonitor', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}

# Create pipeline
pipeline = dai.Pipeline()

# Define sources and outputs
monoL = pipeline.create(dai.node.MonoCamera)
monoR = pipeline.create(dai.node.MonoCamera)

monoL.setCamera("left")
monoL.setResolution(res)
monoL.setFps(fps)
monoL.setNumFramesPool(poolSize)


monoR.setCamera("right")
monoR.setResolution(res)
monoR.setFps(fps)
monoR.setNumFramesPool(poolSize)


xoutFloodL = pipeline.create(dai.node.XLinkOut)
xoutFloodR = pipeline.create(dai.node.XLinkOut)


xoutFloodL.setStreamName('flood-left')
xoutFloodR.setStreamName('flood-right')
streams = ['flood-left', 'flood-right', 'nn_yolo']

# Script node for frame routing and IR dot/flood alternate
script = pipeline.create(dai.node.Script)
script.setProcessor(dai.ProcessorType.LEON_CSS)
script.setScript("""
    #dotBright = 0.8
    floodBright = 0.1
    LOGGING = False  # Set `True` for latency/timings debugging

    node.warn(f'IR drivers detected: {str(Device.getIrDrivers())}')

    while True:
        # Wait first for a frame event, received at MIPI start-of-frame
        event = node.io['event'].get()
        if LOGGING: tEvent = Clock.now()

        # Immediately reconfigure the IR driver.
        # Note the logic is inverted, as it applies for next frame
        Device.setIrFloodLightIntensity(floodBright)
        if LOGGING: tIrSet = Clock.now()

        # Wait for the actual frames (after MIPI capture and ISP proc is done)
        frameL = node.io['frameL'].get()
        if LOGGING: tLeft = Clock.now()
        frameR = node.io['frameR'].get()
        if LOGGING: tRight = Clock.now()
        if LOGGING:
            latIR      = (tIrSet - tEvent               ).total_seconds() * 1000
            latEv      = (tEvent - event.getTimestamp() ).total_seconds() * 1000
            latProcL   = (tLeft  - event.getTimestamp() ).total_seconds() * 1000
            diffRecvRL = (tRight - tLeft                ).total_seconds() * 1000
            node.warn(f'T[ms] latEv:{latEv:5.3f} latIR:{latIR:5.3f} latProcL:{latProcL:6.3f} '
                    + f' diffRecvRL:{diffRecvRL:5.3f}')


        # Route the frames to their respective outputs
        node.io['floodL'].send(frameL)
        node.io['floodR'].send(frameR)

""")

# Linking

# TODO: peta al par de segundos si se intenta pone monoR.frameEvent.link(script.inputs['event'])
# Tambien si se cambia mas abajo monoR.out.link(manip.inputImage) a monoL.out.link(manip.inputImage)
monoL.frameEvent.link(script.inputs['event'])
monoL.out.link(script.inputs['frameL'])
monoR.out.link(script.inputs['frameR'])

script.outputs['floodL'].link(xoutFloodL.input)
script.outputs['floodR'].link(xoutFloodR.input)

# Parte yolo

nn = pipeline.createYoloDetectionNetwork()
nn.setBlobPath(pathYoloBlob)

nn.setNumClasses(80)
nn.setCoordinateSize(4)
nn.setAnchors([])
nn.setAnchorMasks({})
nn.setIouThreshold(0.5)
nn.setConfidenceThreshold(0.5)


# Tomamos la imagen y la ponemos del size para la nn y en RGB
manip = pipeline.create(dai.node.ImageManip)

manip.initialConfig.setResize(640, 640)
manip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)
manip.setMaxOutputFrameSize(1228800)  # 640 * 640 * 3
# Link de la camara a manip 
monoR.out.link(manip.inputImage)

# Link de manip a la nn
manip.out.link(nn.input)

xout_nn_yolo = pipeline.createXLinkOut()
xout_nn_yolo.setStreamName("nn_yolo")
nn.out.link(xout_nn_yolo.input)

# Connect to device and start pipeline
time_start = time.time()
with dai.Device(pipeline) as device:
      
    q_nn_yolo = device.getOutputQueue("nn_yolo")
    q_flood_right = device.getOutputQueue("flood-right")
    
    frame = None
    while True:
    
        in_nn_yolo = q_nn_yolo.tryGet()
        in_flood_right = q_flood_right.tryGet()
        
        if in_flood_right is not None:
            # If the packet from RGB camera is present, we're retrieving the frame in OpenCV format using getCvFrame
            frame = in_flood_right.getCvFrame()


        if in_nn_yolo is not None:
            # when data from nn is received, we take the detections array that contains mobilenet-ssd results
            detections_yolo = in_nn_yolo.detections
        
        if frame is not None:
            for detection in detections_yolo:
                    # for each bounding box, we first normalize it to match the frame size
                    bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
                    # and then draw a rectangle on the frame to show the actual result
                    cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 0, 255), 2)   

            cv2.imshow("preview", frame)

        if cv2.waitKey(5) == ord('q'):
            break

I have two problems:

  • I cannot delete monoR or monoL because for some reason if I change monoL.frameEvent.link(script.inputs['event']) to monoR.frameEvent.link(script.inputs['event']) only works for one or two seconds but at least it works using both.

  • With this code it works for 2 seconds and this is the output:

I understand that the problem is the frame because it draws more than a rectangle of detections so I guess the part of yolo is working ok, any clues of what is the problem?

Thanks,
Pedro.

It works if I get the queue of flood-left too, i guess the library do not clean the queue until it is retrive.

q_flood_left = device.getOutputQueue("flood-left")

But I still have the question if it is possible to only use one monocamera (monoR for example) and not waste resources.

    Hi pedro-UCA
    If you create a host queue and don't read it, it will not be dropped, but blocked.

    Remove the sending of left frame to the host and you won't need to create a host queue. It will be discarded inside the script node.

    # Script node for frame routing and IR dot/flood alternate
    script = pipeline.create(dai.node.Script)
    script.setProcessor(dai.ProcessorType.LEON_CSS)
    script.setScript("""
        #dotBright = 0.8
        floodBright = 0.1
        LOGGING = False  # Set `True` for latency/timings debugging
    
        node.warn(f'IR drivers detected: {str(Device.getIrDrivers())}')
    
        while True:
            # Wait first for a frame event, received at MIPI start-of-frame
            event = node.io['event'].get()
            if LOGGING: tEvent = Clock.now()
    
            # Immediately reconfigure the IR driver.
            # Note the logic is inverted, as it applies for next frame
            Device.setIrFloodLightIntensity(floodBright)
            if LOGGING: tIrSet = Clock.now()
    
            # Wait for the actual frames (after MIPI capture and ISP proc is done)
            frameL = node.io['frameL'].get()
            if LOGGING: tLeft = Clock.now()
            frameR = node.io['frameR'].get()
            if LOGGING: tRight = Clock.now()
            if LOGGING:
                latIR      = (tIrSet - tEvent               ).total_seconds() * 1000
                latEv      = (tEvent - event.getTimestamp() ).total_seconds() * 1000
                latProcL   = (tLeft  - event.getTimestamp() ).total_seconds() * 1000
                diffRecvRL = (tRight - tLeft                ).total_seconds() * 1000
                node.warn(f'T[ms] latEv:{latEv:5.3f} latIR:{latIR:5.3f} latProcL:{latProcL:6.3f} '
                        + f' diffRecvRL:{diffRecvRL:5.3f}')
    
    
            # Route the frames to their respective outputs
            # node.io['floodL'].send(frameL)
            node.io['floodR'].send(frameR)
    
    """)
    
    # Linking
    
    # TODO: peta al par de segundos si se intenta pone monoR.frameEvent.link(script.inputs['event'])
    # Tambien si se cambia mas abajo monoR.out.link(manip.inputImage) a monoL.out.link(manip.inputImage)
    monoL.frameEvent.link(script.inputs['event'])
    monoL.out.link(script.inputs['frameL'])
    monoR.out.link(script.inputs['frameR'])
    
    # script.outputs['floodL'].link(xoutFloodL.input)
    script.outputs['floodR'].link(xoutFloodR.input)

    Thanks,
    Jaka