I am having a fairly strange issue when recording data with the oak-d-cm4-poe. I have a simple pipeline that reads video at 25fps from the RGB camera and passes it through a very lightweight classification model, then records the model result as well as the writing the frame to a file using the h265 video encoder.
This pipeline seems to record smoothly (about 0.2% frame drops), however after ~23 minutes of continuous recording I hit a weird issue where the recording skips for about 2 seconds. This is not a frame drop - the sequence number sent by the device remains continuous, however it simply waits for 2 seconds before recording the next frame. This leaves a corrupted video file behind - I'm not exactly sure how/why the video is corrupted, but I think it may have to do with a lack of keyframes that should be there.
The video file still plays fine and looks ok to the naked eye, however when I read through it with opencv I get the errors shown below - which causes my automated processing of the video file (a key step done after recording is completed) to fail. I have not had very much success identifying the specific cause of these errors or how to handle them appropriately, or even how to identify them in an automated fashion during processing (they don't throw an error that would be caught in a try/except as far as I'm aware, it's treated more as a warning, but eventually the whole process just stops reading from the file - long before the file actually ends).
[hevc @ 0x7f91bd1a0e00] Could not find ref with POC 36
[hevc @ 0x7f91bd1c2400] Duplicate POC in a sequence: 42.
Does anyone have any idea of why this strange 2 second "pause" is happening and/or how I could attempt to correct it? We need to record for 5 hours continuously with this pipeline, so having these "pauses" is not good.
Alternatively, if this issue cannot be solved - is there at least a way to prevent the video file from being corrupted?
Here is my pipeline code:
import depthai as dai
from datetime import datetime
import argparse
from csv import DictWriter
parser = argparse.ArgumentParser()
parser.add_argument('-wv', '--write_video', default="video_nn.h265", type=str, help="Path to the video write file")
parser.add_argument('-wt', '--write_timestamp', default="timestamps_nn.csv", type=str, help="Path to the timestamp write file")
parser.add_argument('-mf', '--model_file', default="regnetx_002.blob", type=str, help="Path to the model file")
parser.add_argument('-t', '--recording_time', default=20, type=int, help="recording time in seconds")
parser.add_argument('-f', '--fps', default=25, type=int, help="fps")
args = parser.parse_args()
# sample command: python /home/pi/jack/record_with_nn.py -wv /home/pi/storage/2023-08-11/oak3--2023-08-11--08_50_01.h265 -wt /home/pi/storage/2023-08-11/oak3--2023-08-11--08_50_01.csv -mf /home/pi/jack/regnetx_002_new.blob -t 18600 -f 25
fps = args.fps
# print(fps)
# Create pipeline
pipeline = dai.Pipeline()
# Define sources and output
camRgb = pipeline.create(dai.node.ColorCamera)
videoEnc = pipeline.create(dai.node.VideoEncoder)
xout = pipeline.create(dai.node.XLinkOut)
xout.setStreamName('h265')
nn = pipeline.createNeuralNetwork()
nn.setBlobPath(args.model_file)
nn_xout = pipeline.create(dai.node.XLinkOut)
nn_xout.setStreamName("nn_out")
# Properties
camRgb.setBoardSocket(dai.CameraBoardSocket.RGB)
camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_4_K)
camRgb.setPreviewSize(512, 512)
camRgb.setInterleaved(False)
camRgb.setFps(fps)
videoEnc.setDefaultProfilePreset(fps, dai.VideoEncoderProperties.Profile.H265_MAIN)
#fixed focus
# focus settings: 0..255 [far..near]. 120-130 = infinity
camRgb.initialControl.setManualFocus(130)
# Linking
camRgb.video.link(videoEnc.input)
camRgb.preview.link(nn.input)
videoEnc.bitstream.link(xout.input)
nn.out.link(nn_xout.input)
CSV_FIELD_NAMES = [
'total_time_elapsed',
'timestamp',
'total_frame_count',
'device_sequence_number',
'nn_score_0',
'nn_score_1',
'nn_pred'
]
def write_csv_entry(
timestamp_writer: DictWriter,
now: datetime,
start_time: datetime,
frame_count_local: int,
device_sequence_number: int,
nn_score_0: float,
nn_score_1: float,
nn_pred: int
):
total_time_delta = now - start_time
timestamp_writer.writerow(dict(
total_time_elapsed=total_time_delta,
timestamp=now.isoformat(),
total_frame_count=frame_count_local,
device_sequence_number=device_sequence_number,
nn_score_0=nn_score_0,
nn_score_1=nn_score_1,
nn_pred=nn_pred
))
# Connect to device and start pipeline
with dai.Device(pipeline) as device:
# Output queue will be used to get the encoded data from the output defined above
q = device.getOutputQueue(name="h265", maxSize=fps, blocking=False)
nn_queue = device.getOutputQueue(name='nn_out', maxSize=fps, blocking=False)
# The .h265 file is a raw stream file (not playable yet)
start_time = datetime.now()
now = datetime.now()
with open(args.write_video, 'wb') as videoFile, open(args.write_timestamp, 'w') as timestamp_file:
timestamp_writer: DictWriter = DictWriter(timestamp_file, fieldnames=CSV_FIELD_NAMES)
timestamp_writer.writeheader()
print("Press Ctrl+C to stop encoding...")
try:
frame_count = 0
while (now-start_time).total_seconds() < args.recording_time:
h265Packet = q.get() # Blocking call, will wait until a new data has arrived
nn_packet = nn_queue.get()
h265Packet.getData().tofile(videoFile) # Appends the packet data to the opened file
preds = nn_packet.getLayerFp16('probs')
# print(preds)
nn_pred = 1
if preds[0] > preds[1]:
nn_pred = 0
now = datetime.now()
write_csv_entry(
timestamp_writer=timestamp_writer,
start_time=start_time,
now=now,
device_sequence_number=h265Packet.getSequenceNum(),
frame_count_local=frame_count,
nn_score_0=preds[0],
nn_score_1=preds[1],
nn_pred=nn_pred
)
frame_count +=1
except KeyboardInterrupt:
# Keyboard interrupt (Ctrl + C) detected
pass
# stop = time.time()
# print('')
# print(stop - start)
Here is a snippet from the timestamp file generated by the above code that helped me diagnose the issue. You can see where the "time elapsed" (total time from recording start) changes from 23:12.7 to 23:14.9, however the sequence number remains continuous:
|total_time_elapsed|timestamp |total_frame_count|device_sequence_number|nn_score_0 |nn_score_1 |nn_pred|
|------------------|-----------------------|-----------------|----------------------|------------|------------|-------|
|0:23:12.504644 |2023-08-11T09:13:17.197|34909 |34934 |1.849609375 |-2.33984375 |0 |
|0:23:12.546059 |2023-08-11T09:13:17.238|34910 |34935 |1.8486328125|-2.326171875|0 |
|0:23:12.585118 |2023-08-11T09:13:17.277|34911 |34936 |1.802734375 |-2.259765625|0 |
|0:23:12.626668 |2023-08-11T09:13:17.319|34912 |34937 |1.880859375 |-2.390625 |0 |
|0:23:12.666367 |2023-08-11T09:13:17.358|34913 |34938 |1.74609375 |-2.23046875 |0 |
|0:23:12.705958 |2023-08-11T09:13:17.398|34914 |34939 |1.931640625 |-2.390625 |0 |
|0:23:12.745240 |2023-08-11T09:13:17.437|34915 |34940 |1.8935546875|-2.384765625|0 |
|0:23:14.968688 |2023-08-11T09:13:19.661|34916 |34941 |1.7548828125|-2.240234375|0 |
|0:23:14.970050 |2023-08-11T09:13:19.662|34917 |34971 |1.8447265625|-2.330078125|0 |
|0:23:14.971231 |2023-08-11T09:13:19.663|34918 |34972 |1.8759765625|-2.36328125 |0 |
|0:23:14.972430 |2023-08-11T09:13:19.664|34919 |34973 |1.9599609375|-2.46484375 |0 |
|0:23:14.973611 |2023-08-11T09:13:19.665|34920 |34974 |1.85546875 |-2.353515625|0 |
|0:23:14.974788 |2023-08-11T09:13:19.667|34921 |34975 |1.8740234375|-2.353515625|0 |
|0:23:14.975948 |2023-08-11T09:13:19.668|34922 |34976 |1.876953125 |-2.361328125|0 |
|0:23:14.977793 |2023-08-11T09:13:19.670|34923 |34977 |1.8369140625|-2.314453125|0 |
|0:23:14.979458 |2023-08-11T09:13:19.671|34924 |34978 |1.857421875 |-2.326171875|0 |
|0:23:14.981418 |2023-08-11T09:13:19.673|34925 |34979 |1.85546875 |-2.333984375|0 |