I'm trying to get something to a point where some middle school students can experiment with it, and I'd be able to hand it off except the data isn't making sense. Plus, I'd need to correlate the detection points with their interpretation, e.g., elbow, shoulder, etc.
The thing that has me 100% stuck right now is getting near-zero values for xmax and ymax. If I can fix that, at least I can show it to them tomorrow.
I'll post my Python code, a portion of my shell output, and a screen shot. If there's anything else I can provide, or provide differently, please let me know.
Thanks!
# The starting point for this was:
# https://github.com/luxonis/depthai-tutorials/blob/master/1-hello-world/hello_world.py
# Description is at https://docs.luxonis.com/projects/api/en/latest/tutorials/hello_world
# first, import all necessary modules
from pathlib import Path
import blobconverter
import cv2
import depthai
import numpy as np
import inspect
# Pipeline tells DepthAI what operations to perform when running - you define all of the resources used and flows here
pipeline = depthai.Pipeline()
print("pipeline =", pipeline)
# First, we want the Color camera as the output
cam_rgb = pipeline.createColorCamera()
cam_rgb.setPreviewSize(456, 256) # the preview frame size, available as 'preview' output of the node # 0001
#cam_rgb.setPreviewSize(448, 448) # the preview frame size, available as 'preview' output of the node # 0007
cam_rgb.setInterleaved(False)
#cam_rgb.setFps(1)
# Next, we want a neural network that will produce the detections
detection_nn = pipeline.createMobileNetDetectionNetwork()
# Blob is the Neural Network file, compiled for MyriadX. It contains both the definition and weights of the model
# We're using a blobconverter tool to retreive the MobileNetSSD blob automatically from OpenVINO Model Zoo
detection_nn.setBlobPath(blobconverter.from_zoo(name='human-pose-estimation-0001', shaves=6))
# Next, we filter out the detections that are below a confidence threshold. Confidence can be anywhere between <0..1>
detection_nn.setConfidenceThreshold(0.5)
# Next, we link the camera 'preview' output to the neural network detection input, so that it can produce detections
cam_rgb.preview.link(detection_nn.input)
# XLinkOut is a "way out" from the device. Any data you want to transfer to host need to be send via XLink
xout_rgb = pipeline.createXLinkOut()
# For the rgb camera output, we want the XLink stream to be named "rgb"
xout_rgb.setStreamName("rgb")
# Linking camera preview to XLink input, so that the frames will be sent to host
cam_rgb.preview.link(xout_rgb.input)
# The same XLinkOut mechanism will be used to receive nn results
xout_nn = pipeline.createXLinkOut()
xout_nn.setStreamName("nn")
detection_nn.out.link(xout_nn.input)
# Pipeline is now finished, and we need to find an available device to run our pipeline
# we are using context manager here that will dispose the device after we stop using it
with depthai.Device(pipeline) as device:
# From this point, the Device will be in "running" mode and will start sending data via XLink
# To consume the device results, we get two output queues from the device, with stream names we assigned earlier
q_rgb = device.getOutputQueue("rgb")
q_nn = device.getOutputQueue("nn")
# Here, some of the default values are defined. Frame will be an image from "rgb" stream, detections will contain nn results
frame = None
detections = []
# Since the detections returned by nn have values from <0..1> range, they need to be multiplied by frame width/height to
# receive the actual position of the bounding box on the image
def frameNorm(frame, bbox):
normVals = np.full(len(bbox), frame.shape[0])
normVals[::2] = frame.shape[1]
return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
# Main host-side application loop
while True:
# we try to fetch the data from nn/rgb queues. tryGet will return either the data packet or None if there isn't any
in_rgb = q_rgb.tryGet()
in_nn = q_nn.tryGet()
if in_rgb is not None:
# If the packet from RGB camera is present, we're retrieving the frame in OpenCV format using getCvFrame
frame = in_rgb.getCvFrame()
if frame is not None and in_nn is not None:
# when data from nn is received, we take the detections array that contains mobilenet-ssd results
detections = in_nn.detections
if len(detections) > 0:
#print("detections=", detections)
for detection in detections:
label = detection.label
confidence = detection.confidence
xmin = detection.xmin
ymin = detection.ymin
xmax = detection.xmax
ymax = detection.ymax
#print(detection)
print("label =", label, "; confidence =", confidence, "; xmin =", xmin, "; ymin =", ymin, "; xmax =", xmax, "; ymax =", ymax)
bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
cv2.line(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 2)
# After the drawing is finished, we show the frame on the screen
cv2.imshow("preview", frame)
# at any time, you can press "q" and exit the main loop, therefore exiting the program itself
if cv2.waitKey(1) == ord('q'):
break
label = 0 ; confidence = 0.74169921875 ; xmin = 0.7783203125 ; ymin = 0.246826171875 ; xmax = 0.00054931640625 ; ymax = 0.00054931640625
label = 0 ; confidence = 0.60107421875 ; xmin = 0.42529296875 ; ymin = 0.105712890625 ; xmax = 0.03570556640625 ; ymax = 0.0181884765625
label = 0 ; confidence = 0.6875 ; xmin = 0.4169921875 ; ymin = 0.0377197265625 ; xmax = 0.00335693359375 ; ymax = 0.0025634765625
label = 0 ; confidence = 0.84716796875 ; xmin = 0.046630859375 ; ymin = 0.0 ; xmax = 6.103515625e-05 ; ymax = 0.0006103515625
label = 0 ; confidence = 0.837890625 ; xmin = 0.92333984375 ; ymin = 0.05877685546875 ; xmax = 6.103515625e-05 ; ymax = 6.103515625e-05
label = 0 ; confidence = 0.52783203125 ; xmin = 0.10552978515625 ; ymin = 0.00970458984375 ; xmax = 0.01348876953125 ; ymax = 0.00103759765625
label = 0 ; confidence = 0.9228515625 ; xmin = 0.92919921875 ; ymin = 0.91064453125 ; xmax = 0.8486328125 ; ymax = 0.6025390625
label = 0 ; confidence = 0.64892578125 ; xmin = 0.6376953125 ; ymin = 0.1900634765625 ; xmax = 0.01153564453125 ; ymax = 0.000244140625
label = 0 ; confidence = 0.91943359375 ; xmin = 0.6884765625 ; ymin = 0.02935791015625 ; xmax = 0.000244140625 ; ymax = 0.000244140625
label = 0 ; confidence = 0.58447265625 ; xmin = -6.103515625e-05 ; ymin = -6.103515625e-05 ; xmax = -6.103515625e-05 ; ymax = -6.103515625e-05
label = 0 ; confidence = 0.97265625 ; xmin = 0.57958984375 ; ymin = -6.103515625e-05 ; xmax = -6.103515625e-05 ; ymax = -6.103515625e-05
label = 0 ; confidence = 0.64794921875 ; xmin = 0.41455078125 ; ymin = 0.00909423828125 ; xmax = 0.00018310546875 ; ymax = 6.103515625e-05
label = 0 ; confidence = 0.52392578125 ; xmin = 0.390380859375 ; ymin = 0.00244140625 ; xmax = 0.0081787109375 ; ymax = -0.00048828125
label = 0 ; confidence = 0.8427734375 ; xmin = 0.06243896484375 ; ymin = 0.00213623046875 ; xmax = 0.003173828125 ; ymax = 0.0030517578125
label = 0 ; confidence = 0.93603515625 ; xmin = 0.77978515625 ; ymin = 0.02655029296875 ; xmax = -0.00146484375 ; ymax = 0.00042724609375
label = 0 ; confidence = 0.56884765625 ; xmin = 0.07354736328125 ; ymin = 0.00042724609375 ; xmax = -0.01214599609375 ; ymax = -0.01409912109375
label = 0 ; confidence = 0.8115234375 ; xmin = 0.464111328125 ; ymin = 0.0447998046875 ; xmax = 0.00042724609375 ; ymax = -0.00469970703125
label = 0 ; confidence = 0.619140625 ; xmin = 0.76171875 ; ymin = 0.308349609375 ; xmax = 0.01397705078125 ; ymax = 0.00042724609375
label = 0 ; confidence = 0.50341796875 ; xmin = 0.703125 ; ymin = 0.154296875 ; xmax = 0.0057373046875 ; ymax = 0.00653076171875
label = 0 ; confidence = 0.91357421875 ; xmin = 0.369873046875 ; ymin = 0.00054931640625 ; xmax = 0.00054931640625 ; ymax = 0.0015869140625
label = 0 ; confidence = 0.64599609375 ; xmin = 0.06475830078125 ; ymin = 0.00054931640625 ; xmax = 0.00054931640625 ; ymax = 0.0006103515625
label = 0 ; confidence = 0.79052734375 ; xmin = 0.767578125 ; ymin = 0.193603515625 ; xmax = 0.00054931640625 ; ymax = 0.00054931640625
label = 0 ; confidence = 0.6181640625 ; xmin = 0.424560546875 ; ymin = 0.09307861328125 ; xmax = 0.0433349609375 ; ymax = 0.0252685546875
label = 0 ; confidence = 0.681640625 ; xmin = 0.384521484375 ; ymin = 0.025390625 ; xmax = 0.00335693359375 ; ymax = 0.00225830078125
label = 0 ; confidence = 0.8330078125 ; xmin = 0.042236328125 ; ymin = -6.103515625e-05 ; xmax = 6.103515625e-05 ; ymax = 0.000732421875
label = 0 ; confidence = 0.8486328125 ; xmin = 0.9111328125 ; ymin = 0.0548095703125 ; xmax = 6.103515625e-05 ; ymax = 6.103515625e-05
label = 0 ; confidence = 0.5390625 ; xmin = 0.08209228515625 ; ymin = 0.00091552734375 ; xmax = 0.010986328125 ; ymax = 0.00189208984375
label = 0 ; confidence = 0.9208984375 ; xmin = 0.931640625 ; ymin = 0.9267578125 ; xmax = 0.85888671875 ; ymax = 0.603515625
label = 0 ; confidence = 0.63818359375 ; xmin = 0.611328125 ; ymin = 0.1787109375 ; xmax = 0.012451171875 ; ymax = 0.000244140625
label = 0 ; confidence = 0.91064453125 ; xmin = 0.6923828125 ; ymin = 0.03057861328125 ; xmax = 0.000244140625 ; ymax = 0.000244140625
