Example of saving human-pose-estimation-0001 points along with confidence values

BAW01

Hello,

I am trying to write a scrip that can output, and save to file, the points from the human-pose-estimation-0001 CNN, along with the confidence values.

However, I am struggling to work out the data structure returned by the CNN and how to extract the required data points.

Can anyone give me some help to achieve this, or point me in the right direction?

Many thanks.

My code I have been testing so far.

from depthai_sdk import OakCamera, RecordType
import cv2
import numpy as np
import depthai as dai
from depthai import NNData
from depthai_sdk.classes import Detections
from depthai_sdk.classes import TwoStagePacket
from depthai_sdk.visualize.configs import TextPosition

#This is the methods I have been testing to get data points.
def decode(nn_data: NNData):
    layer = nn_data.getFirstLayerFp16()
    results = np.array(layer).reshape((1, 38, 32, 57))
    dets = Detections(nn_data)
    print(results)

    for result in results[0][0]:
        dets.detections.append(result[1], result[2], result[3:])
        print(result[1], result[2], result[3:])
        """ if result[2] > 0.5:
            dets.add(result[1], result[2], result[3:])
            print(result[1], result[2], result[3:]) """

    print(dets)
    return dets

with OakCamera() as oak:
    color = oak.create_camera('color', resolution='1080P', fps=10, encode='H265')
    left = oak.create_camera('left', resolution='800p', fps=10, encode='H265')
    right = oak.create_camera('right', resolution='800p', fps=10, encode='H265')
    stereo = oak.create_stereo(name="Stero", left=left, right=right, fps=10, encode='H265')

    nn = oak.create_nn('human-pose-estimation-0001', color, decode_fn=decode)

    # Synchronize & save all (encoded) streams
    oak.record([color.out.encoded, left.out.encoded, right.out.encoded, stereo.out.depth, stereo.out.disparity], './data/', RecordType.VIDEO) \
        .configure_syncing(enable_sync=True, threshold_ms=50)

    # Show color stream
    oak.visualize([nn], scale=2/3, fps=True)

    oak.start(blocking=True)

jakaskerl

Hi @BAW01
This is the decoder used in the background. Replace it with your current decoding function and add the logic to save the results.

Thanks,
Jaka