Control the fps on video inferencing on Oak-d using the yolov8 nano

krishnashravan · Jul 19, 2023

i have trained person detection using yolov8 nano on 5k images.
i have two modes
when the arg for video is given it takes video path which does video inferencing on it
there are two videos where people are close by so the video runs smootly and with good frames(getinputqueue blocking is True)
there is another video where the people in the video are a bit far away so it takes longer to process to detect so the frames go down(it looks a bit slower than real time(blocking true)
anyway i can control the fps of the video?
any comments would be helpful.

krishnashravan · Jul 19, 2023

`def init_pipeline():
pipeline = depthai.Pipeline()

manip1 = pipeline.createImageManip()
manip1.initialConfig.setResizeThumbnail(sizeX, sizeY)
manip1.initialConfig.setKeepAspectRatio(True)
manip1.setMaxOutputFrameSize(sizeX * sizeY * 3)
manip1.inputImage.setBlocking(False)

if not video:
	cam_rgb = pipeline.createColorCamera()
	cam_rgb.setResolution(depthai.ColorCameraProperties.SensorResolution.THE_4_K)
	cam_rgb.setPreviewSize(640, 480)
	cam_rgb.setInterleaved(False)

	xout_rgb = pipeline.createXLinkOut()
	xout_rgb.setStreamName("rgb")
	cam_rgb.preview.link(xout_rgb.input)
	cam_rgb.preview.link(manip1.inputImage)
else:
	xinFrame = pipeline.createXLinkIn()
	xinFrame.setStreamName("inFrame")
	xinFrame.setMaxDataSize(1920*1280*3)
	xinFrame.out.link(manip1.inputImage)

	xout_manip = pipeline.createXLinkOut()
	xout_manip.setStreamName("manip")
	xout_manip.input.setBlocking(False)
	manip1.out.link(xout_manip.input)

# Next, we want a neural network that will produce the detections
print("Model: " + model + ", Yolo: " + str(isYoloModel), ", OpenVino: " + str(not isCustomModel))

if isYoloModel is False:
	detection_nn = pipeline.createMobileNetDetectionNetwork()
else:
	detection_nn = pipeline.createYoloDetectionNetwork()

	with open('../../data/280epoch5000segimg.json') as f:
		config = json.load(f)

		# Extract the values from the JSON
		num_classes = config['nn_config']['NN_specific_metadata']['classes']
		coordinates = config['nn_config']['NN_specific_metadata']['coordinates']
		anchors = config['nn_config']['NN_specific_metadata']['anchors']
		anchor_masks = config['nn_config']['NN_specific_metadata']['anchor_masks']
		iou_threshold = config['nn_config']['NN_specific_metadata']['iou_threshold']

		# Set the values
		detection_nn.setNumClasses(num_classes)
		detection_nn.setCoordinateSize(coordinates)
		detection_nn.setAnchors(anchors)
		detection_nn.setAnchorMasks(anchor_masks)
		detection_nn.setIouThreshold(iou_threshold)

# Blob is the Neural Network file, compiled for MyriadX. It contains both the definition and weights of the model
# We're using a blobconverter tool to retreive the MobileNetSSD blob automatically from OpenVINO Model Zoo
# detection_nn.setBlobPath(blobconverter.from_zoo(name='mobilenet-ssd', shaves=6))
# Next, we filter out the detections that are below a confidence threshold. Confidence can be anywhere between <0..1>
detection_nn.setConfidenceThreshold(0.5)
detection_nn.setNumPoolFrames(4)
detection_nn.input.setBlocking(False)
detection_nn.setNumInferenceThreads(2)
# Next, we link the camera 'preview' output to the neural network detection input, so that it can produce detections
manip1.out.link(detection_nn.input)

if isCustomModel is True:
	nnPath = str((parentDir / Path('../../data/' + model)).resolve().absolute())
	detection_nn.setBlobPath(nnPath)
	print("Custom Model" + nnPath + "Size: " + str(sizeX) + "x" + str(sizeY))
else:
	detection_nn.setBlobPath(blobconverter.from_zoo(name='mobilenet-ssd', shaves=6))
	print("Model from OpenVINO Zoo" + "Size: " + str(sizeX) + "x" + str(sizeY))

xout_nn = pipeline.createXLinkOut()
xout_nn.setStreamName("nn")
detection_nn.out.link(xout_nn.input)

return pipeline

def detect_and_count():
global outputFrame, lock, zones_current_count, listeners, loop

pipeline = init_pipeline()

with depthai.Device(pipeline) as device:
	q_nn = device.getOutputQueue("nn")
	# q_manip = device.getOutputQueue(name="manip", maxSize=4, blocking=False)
	# q_nn = device.getOutputQueue(name="nn", maxSize=4, blocking=False)

	frame = None
	detections = []

	timestamp = datetime.utcnow()
	zone_data = []

	if video:
		q_rgb = device.getOutputQueue("manip")
		q_in = device.getInputQueue(name="inFrame", maxSize=30, blocking=True)
		videoPath = str((parentDir / Path('../../data/' + video_source)).resolve().absolute())
		cap = cv2.VideoCapture(videoPath)
		inputFrameShape = (sizeX, sizeY)
	else:
		q_rgb = device.getOutputQueue("rgb")

	# loop over frames from the video stream
	while True:
		if video:
			if not cap.isOpened():
				print("Video over")
				break

			read_correctly, frame = cap.read()
			if not read_correctly:
				break
			if frame is not None:
				img = depthai.ImgFrame()
				img.setType(depthai.ImgFrame.Type.BGR888p)
				img.setData(to_planar(frame, inputFrameShape))

				img.setWidth(inputFrameShape[0])
				img.setHeight(inputFrameShape[1])
				q_in.send(img)

		in_rgb = q_rgb.tryGet()
		in_nn = q_nn.tryGet()

		if in_nn is not None:
			detections = in_nn.detections

		if in_rgb is not None:
			frame = in_rgb.getCvFrame()

			zone_data += check_overlap(frame, detections)

			now = datetime.utcnow()
			if now.second != timestamp.second:
				t = threading.Thread(target=insert_data, args=(zone_data, ))
				t.daemon = True
				t.start()
				zone_data = []
			timestamp = now

			with lock:
				outputFrame = frame.copy()

			# at any time, you can press "q" and exit the main loop, therefore exiting the program itself
		if cv2.waitKey(1) == ord('q'):
			break`

this is the pipeline and the detect code
the live inferencing runs in this size
cam_rgb.setPreviewSize(640, 480)
anyway i can make it do the same for the video inferencing as well

jakaskerl · Jul 19, 2023

Hi krishnashravan
You can set the video size like this:

read_correctly, frame = cap.read()
if not read_correctly:
    break
if frame is not None:
    # Resize the frame to the desired size
    frame_resized = cv2.resize(frame, (640, 480))  # Resize to your preview size
    img = depthai.ImgFrame()
    img.setType(depthai.ImgFrame.Type.BGR888p)
    img.setData(to_planar(frame_resized, (640, 480)))  # Pass the resized frame

    img.setWidth(640)  # Set the width to the desired size
    img.setHeight(480)  # Set the height to the desired size
    q_in.send(img)

As far as fps settings go, you can't really change fps in the middle of a run. I'd suggest lowering the resolution or setting the blocking behaviour to false is you wish to have faster running streams.

Thanks,
Jaka