Hello,
I am trying to write a scrip that can output, and save to file, the points from the human-pose-estimation-0001 CNN, along with the confidence values.
However, I am struggling to work out the data structure returned by the CNN and how to extract the required data points.
Can anyone give me some help to achieve this, or point me in the right direction?
Many thanks.
My code I have been testing so far.
from depthai_sdk import OakCamera, RecordType
import cv2
import numpy as np
import depthai as dai
from depthai import NNData
from depthai_sdk.classes import Detections
from depthai_sdk.classes import TwoStagePacket
from depthai_sdk.visualize.configs import TextPosition
#This is the methods I have been testing to get data points.
def decode(nn_data: NNData):
layer = nn_data.getFirstLayerFp16()
results = np.array(layer).reshape((1, 38, 32, 57))
dets = Detections(nn_data)
print(results)
for result in results[0][0]:
dets.detections.append(result[1], result[2], result[3:])
print(result[1], result[2], result[3:])
""" if result[2] > 0.5:
dets.add(result[1], result[2], result[3:])
print(result[1], result[2], result[3:]) """
print(dets)
return dets
with OakCamera() as oak:
color = oak.create_camera('color', resolution='1080P', fps=10, encode='H265')
left = oak.create_camera('left', resolution='800p', fps=10, encode='H265')
right = oak.create_camera('right', resolution='800p', fps=10, encode='H265')
stereo = oak.create_stereo(name="Stero", left=left, right=right, fps=10, encode='H265')
nn = oak.create_nn('human-pose-estimation-0001', color, decode_fn=decode)
# Synchronize & save all (encoded) streams
oak.record([color.out.encoded, left.out.encoded, right.out.encoded, stereo.out.depth, stereo.out.disparity], './data/', RecordType.VIDEO) \
.configure_syncing(enable_sync=True, threshold_ms=50)
# Show color stream
oak.visualize([nn], scale=2/3, fps=True)
oak.start(blocking=True)