We wrote a script that is able to save RGB data in avi format and store the depth arrays as numpy arrays. The amount of frames in the video and depthmaps are equal in the end. but for every recording, the depth has a couple of frames delay when playing back.
Whenever I manually check the frames, and match them (lets say the depth lacks 10 frames behind), and I delete the first 10 depth frames, the rest is completely in sync. The number of frames that have this offset varies between 0 and 10 ish for every recording, also different for each camera when recording with multiple cameras.
Any idea what could be the case for this?
Our code looks as follows:
import argparse
from pathlib import Path
import depthai as dai
import cv2
import numpy as np
from loguru import logger
import time
import contextlib
import threading
from utils import create_luxonis_pipeline, verify_directories
def worker(devInfo, stack, dic):
openvino_version = dai.OpenVINO.Version.VERSION_2021_4
device: dai.Device = stack.enter_context(dai.Device(openvino_version, devInfo, False))
dic[f"rgb-" + devInfo.getMxId()] = device.getOutputQueue(name="rgb", maxSize=1, blocking=False)
if args.record_depth is True:
dic[f"dep-" + devInfo.getMxId()] = device.getOutputQueue(name="depth", maxSize=1, blocking=False)
def print_time(start, end):
diff = end - start
total_seconds = round(diff, 0)
minutes, seconds = divmod(total_seconds, 60)
logger.info(f"time elapsed while recording: {int(minutes)} minutes and {int(seconds)} seconds.")
def record_videos(args):
# find all the available cameras and setup the device streams
device_infos = dai.Device.getAllAvailableDevices()
while len(device_infos) != int(args.n_cams):
f"Could find {len(device_infos)} cameras, which is not the requested amount "
f"({args.n_cams}). Trying again in {2} seconds")
device_infos = dai.Device.getAllAvailableDevices()
mxIds = []
for info in device_infos:
with contextlib.ExitStack() as stack:
queues = {}
threads = []
for mxId in mxIds:
time.sleep(1) # Currently required due to XLink race issues
_, devMxID = dai.Device.getDeviceByMxId(mxId=mxId)
thread = threading.Thread(target=worker, args=(devMxID, stack, queues))
for t in threads:
t.join() # Wait for all threads to finish
# create separate save folders for each camera
for mxid in mxIds:
mxid_path = Path(args.save_path).joinpath(args.folder_name).joinpath(mxid)
Path(mxid_path).mkdir(parents=True, exist_ok=True)
# Create output writers;
fourcc = cv2.VideoWriter_fourcc(*"XVID")
video_writers = {
f"{name}": cv2.VideoWriter(f"{args.save_path}/{args.folder_name}/{name.split('-')[1]}/{name}.avi",
fourcc, args.output_fps, (1280, 800)) for name, _ in queues.items() if "rgb" in name
frame_idx_dict = {}
for key, value in queues.items():
if "dep" in key:
frame_idx_dict[key] = 0
time_start = time.time()
time_check = args.print_every
logger.info("Started recording video! Press the q key to quit.")
while True:
# do print time check
if args.print_every != 0:
end_time = time.time()
if (end_time-time_start) >= time_check:
print_time(time_start, end_time)
time_check += args.print_every
input_images = {name: queue.get().getCvFrame() for name, queue in queues.items()}
if len(input_images.keys()) == 2 * int(args.n_cams):
for name in input_images.keys():
if "rgb" in name:
cv2.imshow(name, input_images[name])
# pass
if "dep" in name:
if args.show_depth is True:
cv2.imshow(name, input_images[name])
frame_idx_dict[name] += 1
# pass
if args.record_depth:
if cv2.waitKey(1) == ord('q'):
time_end = time.time()
print_time(time_start, time_end)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Capture and save videos from multiple cameras.")
parser.add_argument("--save_path", type=str, required=True, help="Absolute path to store the videos")
parser.add_argument("--n_cams", type=int, default=1, help="Number of cameras to record with. default is 1")
parser.add_argument("--folder_name", type=str, required=False, help="How to name the newly created folder with data, default is timestamp") # assert if folder name exists
parser.add_argument("--record_depth", type=bool, default=True, help="Store depth maps as numpy arrays, default is True")
parser.add_argument("--show_depth", type=bool, default=False, help="Show the depth maps while streaming, default is False")
parser.add_argument("--output_fps", type=int, default=15, help="frames per second for which the video is formatted, default is 15")
parser.add_argument("--print_every", type=int, default=0, help="Print every N seconds how much time has been recorded, default is 0, which means no printing")
## pipeline settings here
parser.add_argument("--input_fps", type=int, default=15, help="(maximum) frames per second the camera can output, default is 15")
args = parser.parse_args()
args.save_path, args.folder_name = verify_directories(args.save_path, args.folder_name)