Deep Sort with Spatial Mono
As mentioned, you should also pass the grayscale to the deepsort embedder. Note, however, that those models are usually trained on color images so performance might be slightly worse.
I think the error stems from passing a grayscale image to a neural network that expects a color image (the input itself is too small). This means that grayscale image would likely have to be turned into grayscale RGB by ImageManip. Unsure how that can be done in SDK itself (cc @jakaskerl on this).
Hi jsiic
with OakCamera() as oak:
left = oak.create_camera("left")
right = oak.create_camera("right")
stereo = oak.create_stereo(left=left, right=right, fps=15)
yolo = oak.create_nn('yolov6nr3_coco_640x352', left, spatial = stereo)
embedder = oak.create_nn('mobilenetv2_imagenet_embedder_224x224', input=yolo)
embedder.image_manip.setFrameType(dai.RawImgFrame.Type.BGR888p)
embedder.config_spatial(
bb_scale_factor=0.5, # Scaling bounding box before averaging the depth in that ROI
lower_threshold=300, # Discard depth points below 30cm
upper_threshold=10000, # Discard depth pints above 10m
# Average depth points before calculating X and Y spatial coordinates:
calc_algo=dai.SpatialLocationCalculatorAlgorithm.AVERAGE
)
oak.visualize(embedder, fps=True)
# oak.show_graph()
oak.start(blocking=True)
Thanks,
Jaka
- Edited
So it seems to be working!
But now I am having trouble wrapping my head around how to print out the data now. I would like it to be in the format of something like:
Tracking ID: X-depth value, Y-depth value, Z-depth value
But I am having trouble wrapping my head around how to do this in the callback function. The original code in the SDK example is the following:
def cb(packet: TwoStagePacket):
detections = packet.img_detections.detections
vis = packet.visualizer
# Update the tracker
object_tracks = tracker.iter(detections, packet.nnData, (640, 640))
for track in object_tracks:
if not track.is_confirmed() or \
track.time_since_update > 1 or \
track.detection_id >= len(detections) or \
track.detection_id < 0:
continue
det = packet.detections[track.detection_id]
vis.add_text(f'ID: {track.track_id}', bbox=(*det.top_left, *det.bottom_right), position=TextPosition.MID)
frame = vis.draw(packet.frame)
cv2.imshow('DeepSort tracker', frame)
I was thinking that I should be able to print the spatial data by having something like this in the for loop:
print(packet.spatials.detections[track.detection_id].x)
but I am getting the error:
File "C:...\OakD\depthai-experiments\gen2-deepsort-tracking\jae_deepsort_spatial_mono.py", line 26, in cb
print(packet.spatials.detections[track.detection_id].x)
^^^^^^^^^^^^^^^
AttributeError: 'TwoStagePacket' object has no attribute 'spatials'
I am sort of at a loss here again....
Hi jsiic
def cb(packet: TwoStagePacket):
detections = packet.img_detections.detections
vis = packet.visualizer
# Update the tracker
object_tracks = tracker.iter(detections, packet.nnData, (640, 640))
for track in object_tracks:
if not track.is_confirmed() or \
track.time_since_update > 1 or \
track.detection_id >= len(detections) or \
track.detection_id < 0:
continue
det = packet.detections[track.detection_id]
spatials = detections[track.detection_id].spatialCoordinates
print(f'ID: {track.track_id}, Class: {det.label}, BBox: {det.top_left}, {det.bottom_right}, Spatials: {spatials.x}, {spatials.y}, {spatials.z}')
vis.add_text(f'ID: {track.track_id}', bbox=(*det.top_left, *det.bottom_right), position=TextPosition.MID)
frame = vis.draw(packet.frame)
cv2.imshow('DeepSort tracker', frame)
Thanks,
Jaka
- Edited
jakaskerl Hi jaka, I am getting errors with labels and spatials:
File "C:....\depthai-experiments\gen2-deepsort-tracking\jae_deepsort_spatial_mono.py", line 24, in cb
print(f'ID: {track.track_id}, Class: {det.label}, BBox: {det.top_left}, {det.bottom_right}, Spatials: {spatials.x}, {spatials.y}, {spatials.z}')
^^^^^^^^^
AttributeError: 'Detection' object has no attribute 'label'
Sentry is attempting to send 2 pending error messages
Tried running without label and got the following error for spatials:
File "C:\...OakD\depthai-experiments\gen2-deepsort-tracking\jae_deepsort_spatial_mono.py", line 24, in cb
print(f'ID: {track.track_id}, BBox: {det.top_left}, {det.bottom_right}, Spatials: {spatials.x}, {spatials.y}, {spatials.z}')
^^^^^^^^
NameError: name 'spatials' is not defined
Sentry is attempting to send 2 pending error messages
I also tried det.spatials.x and detections.spatials.x but not working...
- Edited
jakaskerl Actually the spatials x y z are working. Class: {det.label} is not. Here it is!
import cv2
from depthai_sdk import OakCamera
import depthai as dai
from depthai_sdk.classes.packets import TwoStagePacket
from depthai_sdk.visualize.configs import TextPosition
from deep_sort_realtime.deepsort_tracker import DeepSort
tracker = DeepSort(max_age=1000, nn_budget=None, embedder=None, nms_max_overlap=1.0, max_cosine_distance=0.2)
def cb(packet: TwoStagePacket):
detections = packet.img_detections.detections
vis = packet.visualizer
# Update the tracker
object_tracks = tracker.iter(detections, packet.nnData, (640, 640))
for track in object_tracks:
if not track.is_confirmed() or \
track.time_since_update > 1 or \
track.detection_id >= len(detections) or \
track.detection_id < 0:
continue
det = packet.detections[track.detection_id]
spatials = detections[track.detection_id].spatialCoordinates
print(
f'ID: {track.track_id}, Class: {det.label}, BBox: {det.top_left}, {det.bottom_right}, Spatials: {spatials.x}, {spatials.y}, {spatials.z}')
vis.add_text(f'ID: {track.track_id}', bbox=(*det.top_left, *det.bottom_right), position=TextPosition.MID)
frame = vis.draw(packet.frame)
cv2.imshow('DeepSort tracker', frame)
with OakCamera() as oak:
left = oak.create_camera("left")
right = oak.create_camera("right")
stereo = oak.create_stereo(left=left, right=right, fps=15)
yolo = oak.create_nn('yolov6nr3_coco_640x352', left, spatial=stereo)
embedder = oak.create_nn('mobilenetv2_imagenet_embedder_224x224', input=yolo)
embedder.image_manip.setFrameType(dai.RawImgFrame.Type.BGR888p)
embedder.config_spatial(
bb_scale_factor=0.5, # Scaling bounding box before averaging the depth in that ROI
lower_threshold=300, # Discard depth points below 30cm
upper_threshold=10000, # Discard depth pints above 10m
# Average depth points before calculating X and Y spatial coordinates:
calc_algo=dai.SpatialLocationCalculatorAlgorithm.AVERAGE
)
oak.visualize(embedder, fps=True, callback = cb)
# oak.show_graph()
oak.start(blocking=True)