Hi there.
I'm trying to implement the "RGB & TinyYolo with spatial data" example using yolov8n_coco_640x352.blob (for 16/9) combined with the "Rotated Spatial Detections" example (the sensor is upside down). I'm implementing it in Touchdesigner. So far everything works great, with low latency and nice FPS. The accuracy is also great. The only problem is that my "outBoundingBoxDepthMapping" output remains empty. So here I'm not even sure that the model is really using the spatial data. I tried to understand what's wrong with the code but I can't find where the problem is, all my linking seems correct. Could someone have a look?
Thanks in advance for your help!
RGB & TinyYolo with spatial data example: https://oak-api.readthedocs.io/en/stable/samples/SpatialDetection/spatial_tiny_yolo/#rgb-tinyyolo-with-spatial-data
Rotated Spatial Detections example: https://docs.luxonis.com/software/depthai/examples/rotated_spatial_detections/
My code:
model_url = f' https://artifacts.luxonis.com/artifactory/luxonis-depthai-data-local/network/yolov8n_coco_640x352.blob'
def createPipeline(oakDeviceOp):
# Path to blob
retDict = op.TDResources.FileDownloader.Download(url=model_url, clear=False)
nnPath = retDict['path']
rgbfps = float(30)
# Create pipeline
pipeline = dai.Pipeline()
# models
detectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork)
objectTracker = pipeline.create(dai.node.ObjectTracker)
# sources
camRgb = pipeline.create(dai.node.ColorCamera)
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
# outputs
outRgb = pipeline.create(dai.node.XLinkOut)
outRgb.setStreamName("color")
outTracklets = pipeline.create(dai.node.XLinkOut)
outTracklets.setStreamName("tracklets")
outBoundingBoxDepthMapping = pipeline.create(dai.node.XLinkOut)
outBoundingBoxDepthMapping.setStreamName("boundingBoxDepthMapping")
outDepth = pipeline.create(dai.node.XLinkOut)
outDepth.setStreamName("depth")
# CamRGB Properties
camRgb.setPreviewSize(640, 352)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
camRgb.setInterleaved(False)
camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
camRgb.setFps(rgbfps)
camRgb.setImageOrientation(dai.CameraImageOrientation.ROTATE_180_DEG)
# Stereo Properties
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.CAM_B)
monoLeft.setCamera("left")
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.CAM_C)
monoRight.setCamera("right")
# setting node configs
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
# Align depth map to the perspective of RGB camera, on which inference is done
stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
stereo.setSubpixel(True)
stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())
rotate_stereo_manip = pipeline.createImageManip()
rotate_stereo_manip.initialConfig.setVerticalFlip(True)
rotate_stereo_manip.initialConfig.setHorizontalFlip(True)
rotate_stereo_manip.setFrameType(dai.ImgFrame.Type.RAW16)
stereo.depth.link(rotate_stereo_manip.inputImage)
detectionNetwork.setBlobPath(nnPath)
#detectionNetwork.setNumInferenceThreads(2)
detectionNetwork.setConfidenceThreshold(0.6)
detectionNetwork.input.setBlocking(False)
detectionNetwork.input.setQueueSize(1)
detectionNetwork.setBoundingBoxScaleFactor(0.5)
detectionNetwork.setDepthLowerThreshold(600) # 60 cm
detectionNetwork.setDepthUpperThreshold(10000) # 10 m
detectionNetwork.setNumClasses(1)
detectionNetwork.setCoordinateSize(4)
#detectionNetwork.setAnchors([10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319])
#detectionNetwork.setAnchorMasks({"side26": [1, 2, 3], "side13": [3, 4, 5]})
detectionNetwork.setIouThreshold(0.5)
# LINKING
syncNN = True
rbgOutput = True
depthOutput = True
camRgb.preview.link(detectionNetwork.input)
if syncNN:
detectionNetwork.passthrough.link(outRgb.input)
else:
camRgb.preview.link(outRgb.input)
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
detectionNetwork.out.link(outTracklets.input)
detectionNetwork.boundingBoxMapping.link(outBoundingBoxDepthMapping.input)
#stereo.depth.link(detectionNetwork.inputDepth)
rotate_stereo_manip.out.link(detectionNetwork.inputDepth)
if depthOutput :
detectionNetwork.passthroughDepth.link(outDepth.input)
if rbgOutput :
objectTracker.passthroughTrackerFrame.link(outRgb.input)
# Tracker settings
label_index = int(op('null_tracked_label_index')[0,0].val)
objectTracker.setDetectionLabelsToTrack([label_index])
# possible tracking types: ZERO_TERM_COLOR_HISTOGRAM, ZERO_TERM_IMAGELESS, SHORT_TERM_IMAGELESS, SHORT_TERM_KCF
objectTracker.setTrackerType(dai.TrackerType.SHORT_TERM_IMAGELESS)
# take the smallest ID when new object is tracked, possible options: SMALLEST_ID, UNIQUE_ID
objectTracker.setTrackerIdAssignmentPolicy(dai.TrackerIdAssignmentPolicy.UNIQUE_ID)
# Linking
detectionNetwork.out.link(objectTracker.inputDetections)
detectionNetwork.passthrough.link(objectTracker.inputDetectionFrame)
detectionNetwork.passthrough.link(objectTracker.inputTrackerFrame)
objectTracker.out.link(outTracklets.input)
return pipeline