Hello, I am running some preliminary experiments with OAK 4 S camera and I am using a custom YOLOV6N model with input size of 640 x 640. I am feeding a static image so not using the camera feed in this case as input to the neural network. Everything seems to run at an FPS of roughly 88 but I get this warning in the beginning of each run:
[3260625470] [192.168.178.22] [1742226352.711] [DetectionParser(4)] [warning] Did not get the input image sizes from the imageIn input. Defaulting to 416 x 416
I am not sure where should I set this and whether this affects the inference or not
here is the code I used
import depthai as dai
import numpy as np
import time
import cv2
from depthai_nodes import ParsingNeuralNetwork
device = dai.Device()
modelPath = "./depthai-core/examples/python/.depthai_cached_models/bk_gh_od_02_12_24_640_best_objects.rvc4.tar.xz"
modelArchive = dai.NNArchive(modelPath)
inputSize = modelArchive.getInputSize()
type = modelArchive.getConfig().model.inputs[0].preprocessing.daiType
if type: try: frameType = dai.ImgFrame.Type.BGR888i except AttributeError: type = None
testImg = cv2.imread("/home/asker/oak4/depthai-core/test.jpg")
print(inputSize)testImg = cv2.resize(testImg, (inputSize[0], inputSize[1]))
inputFrame = dai.ImgFrame()
inputFrame.setWidth(inputSize[0])
inputFrame.setHeight(inputSize[1])
inputFrame.setCvFrame(testImg, frameType)
print(inputFrame)
with dai.Pipeline(device) as p:
benchmarkOut = p.create(dai.node.BenchmarkOut)
benchmarkOut.setRunOnHost(False)
benchmarkOut.setFps(-1)
neuralNetwork = p.create(ParsingNeuralNetwork).build( benchmarkOut.out, modelArchive, )
parser_output_queue = neuralNetwork.out.createOutputQueue()
benchmarkIn = p.create(dai.node.BenchmarkIn)
benchmarkIn.setRunOnHost(False)
benchmarkIn.sendReportEveryNMessages(100)
benchmarkIn.logReportsAsWarnings(False)
neuralNetwork.out.link(benchmarkIn.input)
outputQueue = benchmarkIn.report.createOutputQueue()
inputQueue = benchmarkOut.input.createInputQueue()
qRgb = neuralNetwork.passthrough.createOutputQueue()
qDet = neuralNetwork.out.createOutputQueue()
p.start()
frame = None
detections = []
startTime = time.time()
counter = 0
color2 = (255, 255, 255)
def frameNorm(frame, bbox):
normVals = np.full(len(bbox), frame.shape[0])
normVals[::2] = frame.shape[1]
return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
def displayFrame(name, frame):
color = (255, 0, 0)
for detection in detections:
bbox = frameNorm( frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax), )
# print(f"{bbox=}")
cv2.putText( frame, "class", (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255, ) cv2.putText( frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255, )
cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) cv2.imshow(name, frame)
while p.isRunning():
inputFrame = dai.ImgFrame()
inputFrame.setWidth(inputSize[0])
inputFrame.setHeight(inputSize[1])
inputFrame.setCvFrame(testImg, frameType)
inputQueue.send(inputFrame)
# benchmarkReport = outputQueue.get() # time.sleep(0.01)
# print(f"{benchmarkReport=}") inRgb: dai.ImgFrame = qRgb.get()
inDet: dai.ImgDetections = qDet.get()
if inRgb is not None:
frame = inRgb.getCvFrame()
cv2.putText( frame, "NN fps: {:.2f}".format( counter / (time.monotonic() - startTime)), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, (color2), )
if inDet is not None:
detections = inDet.detections
counter += 1
if frame is not None:
displayFrame("rgb", frame)
if time.time() - startTime > 10:
print("FPS: {:.2f}".format( counter / (time.time() - startTime)))
counter = 0
startTime = time.time()
if cv2.waitKey(1) == ord("q"):
p.stop()
break
# assert isinstance(benchmarkReport, dai.BenchmarkReport)
# print(f"FPS is {benchmarkReport.fps}")