• OAK4
  • trying to run inference on a feed of images not from camera node

Hello, I am trying to run inference with OAK 4 S using a custom YOLO model but feeding images that are loaded locally. in the code that I am providing I am just alternating randomly between two frames but ideally this should be a stream of images. I could run the pipeline using the benchmark node but it seems to be picking up only the first image and not using next images despite feeding them to the input queue. I also tried using xlinkin and xlinkout but it seems to be not working for me and I am lacking documentation for these two nodes on the new version of the API. Here is the code that I created.

import depthai as dai

import numpy as np

import time

import cv2

import random

device = dai.Device()
modelPath = "/home/asker/oak4/depthai-core/examples/python/.depthai_cached_models/bk_gh_od_02_12_24_640_best_objects.rvc4.tar.xz"modelArchive = dai.NNArchive(modelPath)
inputSize = modelArchive.getInputSize()type = modelArchive.getConfig().model.inputs[0].preprocessing.daiTypeif type: try: frameType = dai.ImgFrame.Type.BGR888i except AttributeError: type = None
testImg = cv2.imread("/home/asker/oak4/depthai-core/test.jpg")print(inputSize)testImg = cv2.resize(testImg, (inputSize[0], inputSize[1]))# Construct the input (white) image for benchmarkingimg = np.ones((inputSize[1], inputSize[0], 3), np.uint8) * 255inputFrame = dai.ImgFrame()inputFrame.setCvFrame(testImg, frameType)
with dai.Pipeline(device) as p: benchmarkOut = p.create(dai.node.BenchmarkOut) benchmarkOut.setRunOnHost(False) # The node can run on host or on device benchmarkOut.setFps(-1) # As fast as possible
neuralNetwork = p.create(dai.node.DetectionNetwork).build( benchmarkOut.out, modelArchive) # labelMap = neuralNetwork.getClasses()
benchmarkIn = p.create(dai.node.BenchmarkIn) benchmarkIn.setRunOnHost(False) # The node can run on host or on device benchmarkIn.sendReportEveryNMessages(100) benchmarkIn.logReportsAsWarnings(False) neuralNetwork.out.link(benchmarkIn.input)
outputQueue = benchmarkIn.report.createOutputQueue() inputQueue = benchmarkOut.input.createInputQueue()
qRgb = neuralNetwork.passthrough.createOutputQueue() qDet = neuralNetwork.out.createOutputQueue()
p.start()
frame = None detections = [] startTime = time.monotonic() counter = 0 color2 = (255, 255, 255)
# nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height def frameNorm(frame, bbox): normVals = np.full(len(bbox), frame.shape[0]) normVals[::2] = frame.shape[1] return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
def displayFrame(name, frame): color = (255, 0, 0) for detection in detections: bbox = frameNorm( frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax), ) print(f"{bbox=}") cv2.putText( frame, "class", (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255, ) cv2.putText( frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255, ) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) # Show the frame cv2.imshow(name, frame)
while p.isRunning(): if random.choice([True, False]): inputFrame.setCvFrame(testImg, frameType) else: inputFrame.setCvFrame(img, frameType) inputQueue.send(inputFrame) # Send the input image only once # benchmarkReport = outputQueue.get() # time.sleep(0.01) inRgb: dai.ImgFrame = qRgb.get() inDet: dai.ImgDetections = qDet.get() if inRgb is not None: frame = inRgb.getCvFrame() cv2.putText( frame, "NN fps: {:.2f}".format( counter / (time.monotonic() - startTime)), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color2, )
if inDet is not None: detections = inDet.detections print(inDet.detections) counter += 1
if frame is not None: displayFrame("rgb", frame) print("FPS: {:.2f}".format( counter / (time.monotonic() - startTime))) if cv2.waitKey(1) == ord("q"): p.stop() break # assert isinstance(benchmarkReport, dai.BenchmarkReport) # print(f"FPS is {benchmarkReport.fps}")

The question is what is the best way to feed images to a neural network using the new pipeline? should I use Xlink or is there a new node that can handle this? if it is Xlink, can you provide some guidance on how to use it and how to define the pipeline (queues, links, etc)?

Thanks in advance.

    MohamedAsker
    Benchmark node currently does not support that, but you can directly send the images in without it.

    Thanks for your reply. I now have this script that runs but I had to initialize an xLinkIn node and connect it to the neural net to be able to build it. The problem is that I am running a YOLOV6N 640 model and it can only run at 53 FPS while it was running with more than 200 FPS during benchmarking. What can be the reason for such a huge drop?

    Here is the script that I have now:

    #!/usr/bin/env python3

    import cv2import depthai as daiimport numpy as npimport timefrom depthai_nodes import ParsingNeuralNetwork

    import random

    device = dai.Device()
    modelPath = "./.depthai_cached_models/bk_gh_od_02_12_24_640_best_objects.rvc4.tar.xz"modelArchive = dai.NNArchive(modelPath)
    inputSize = modelArchive.getInputSize()

    type = modelArchive.getConfig().model.inputs[0].preprocessing.daiType

    if type: try: frameType = dai.ImgFrame.Type.BGR888i except AttributeError: type = None
    testImg = cv2.imread("/home/asker/oak4/depthai-core/test.jpg")

    testImg2 = cv2.imread("/home/asker/oak4/depthai-core/test2.jpg")

    testImg = cv2.resize(testImg, (inputSize[0], inputSize[1]))

    testImg2 = cv2.resize(testImg2, (inputSize[0], inputSize[1]))

    with dai.Pipeline(device) as pipeline:
    xLinkIn = pipeline.create(dai.node.XLinkIn)

    neuralNetwork = pipeline.create(ParsingNeuralNetwork).build( xLinkIn.out, modelArchive, )

    qRgb = neuralNetwork.input.createInputQueue( blocking=False, maxSize=8)

    qDet = neuralNetwork.out.createOutputQueue()
    pipeline.start()

    frame = None

    detections = []

    startTime = time.time()

    counter = 0

    color2 = (255, 255, 255)
    inputFrame = dai.ImgFrame()

    if random.choice([True, False]): inputFrame.setCvFrame(testImg, frameType) else: inputFrame.setCvFrame(testImg2, frameType)

    while pipeline.isRunning():

    qRgb.send(inputFrame)

    inDet: dai.ImgDetections = qDet.get()

    if inDet is not None:

    detections = inDet.detections

    counter += 1
    if time.time() - startTime > 1:

    print("FPS: {:.2f}".format(counter / (time.time() - startTime)))

    counter = 0

    startTime = time.time()
    if cv2.waitKey(1) == ord("q"): pipeline.stop() break

      MohamedAsker here is the generated pipeline if this could help

      {"connections":[{"node1Id":7,"node1Output":"out","node1OutputGroup":"","node2Id":2,"node2Input":"in","node2InputGroup":""}

      ,{"node1Id":4,"node1Output":"out","node1OutputGroup":"","node2Id":8,"node2Input":"in","node2InputGroup":""}

      ,{"node1Id":2,"node1Output":"out","node1OutputGroup":"","node2Id":4,"node2Input":"in","node2InputGroup":""}

      ,{"node1Id":0,"node1Output":"out","node1OutputGroup":"","node2Id":2,"node2Input":"in","node2InputGroup":""}

      ],"globalProperties":{"calibData":null,"cameraTuningBlobSize":null,"cameraTuningBlobUri":"","leonCssFrequencyHz":700000000.0,"leonMssFrequencyHz":700000000.0,"pipelineName":null,"pipelineVersion":null,"sippBufferSize":18432,"sippDmaBufferSize":16384,"xlinkChunkSize":-1}

      ,"nodes":[[8,{"alias":"","id":8,"ioInfo":[[["","in"],{"blocking":true,"group":"","id":8,"name":"in","queueSize":3,"type":3,"waitForMessage":false}

      ]],"logLevel":3,"name":"XLinkOut","parentId":-1,"properties":[185,3,136,0,0,128,191,189,9,95,95,120,95,52,95,111,117,116,0]}

      ],[7,{"alias":"","id":7,"ioInfo":[[["","out"],{"blocking":false,"group":"","id":7,"name":"out","queueSize":8,"type":0,"waitForMessage":false}

      ]],"logLevel":3,"name":"XLinkIn","parentId":-1,"properties":[185,3,189,9,95,95,120,95,50,95,95,105,110,130,0,0,80,0,8]}

      ],[4,{"alias":"","id":4,"ioInfo":[[["","out"],{"blocking":false,"group":"","id":6,"name":"out","queueSize":8,"type":0,"waitForMessage":false}

      ],[["","imageIn"],{"blocking":true,"group":"","id":5,"name":"imageIn","queueSize":5,"type":3,"waitForMessage":true}

      ],[["","in"],{"blocking":true,"group":"","id":4,"name":"in","queueSize":5,"type":3,"waitForMessage":true}

      ]],"logLevel":3,"name":"DetectionParser","parentId":-1,"properties":[185,3,8,187,0,185,8,0,136,0,0,0,63,8,4,186,0,187,0,186,0,136,0,0,0,63]}

      ],[2,{"alias":"","id":2,"ioInfo":[[["","passthrough"],{"blocking":false,"group":"","id":3,"name":"passthrough","queueSize":8,"type":0,"waitForMessage":false}

      ],[["","out"],{"blocking":false,"group":"","id":2,"name":"out","queueSize":8,"type":0,"waitForMessage":false}

      ],[["","in"],{"blocking":true,"group":"","id":1,"name":"in","queueSize":3,"type":3,"waitForMessage":true}

      ]],"logLevel":3,"name":"NeuralNetwork","parentId":-1,"properties":[185,10,1,190,189,0,189,13,97,115,115,101,116,58,95,95,109,111,100,101,108,8,0,0,0,189,0,187,0]}

      ],[0,{"alias":"","id":0,"ioInfo":[[["","out"],{"blocking":false,"group":"","id":0,"name":"out","queueSize":8,"type":0,"waitForMessage":false}

      ]],"logLevel":3,"name":"XLinkIn","parentId":-1,"properties":[185,3,189,0,130,0,0,80,0,8]}

      ]]}

        MohamedAsker I also tried not using an xLinkNode and instead of using the build function I set the nnarchive like that

        neuralNetwork = pipeline.create(ParsingNeuralNetwork)

        neuralNetwork.setNNArchive(modelArchive)
        qRgb = neuralNetwork.input.createInputQueue()
        qDet = neuralNetwork.out.createOutputQueue()

        and still getting 53 FPS. Is this the expected behaviour or is there something that could make it better? I am also getting this warning before inference starts

        [3260625470] [192.168.178.22] [1742473775.004] [DetectionParser(3)] [warning] Did not get the input image sizes from the imageIn input. Defaulting to 416 x 416

          MohamedAsker
          My guess is that it's a combination of bandwidth (6406403853 size) + the cv.resize which takes a lot of time. The benchmark node works on device and only sends the first frame so it can run much faster.

          MohamedAsker [3260625470] [192.168.178.22] [1742473775.004] [DetectionParser(3)] [warning] Did not get the input image sizes from the imageIn input. Defaulting to 416 x 416

          You can ignore it. It will be removed in newer releases.

          Thanks
          Jaka