Hello! We are having issues with ImageManip (linked to openCV stream) not sending data to the provided example spatial detection network. The spatial detection model does not seem to take a liking to the node as input. Normally the RGB cam would send a preview but we resorted to setting imageMinip's resolution to 300x300 and sending that data to the spatial det network. Unfortunately, the program seems to hang on retrieving the NN data from the OAK.
MRE attached.
PS: there is no custom stereo code in this file, just attempting to get the RGB camera in this one from our OpenCV input (left frame of the cam is the rgb). Also, xinRgb was renamed to xinFrame.
#!/usr/bin/env python3
from pathlib import Path
import cv2
import depthai as dai
import numpy as np
import time
import argparse
labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
"diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
nnPathDefault = str((Path(__file__).parent / Path('../models/mobilenet-ssd_openvino_2021.4_5shave.blob')).resolve().absolute())
parser = argparse.ArgumentParser()
parser.add_argument('nnPath', nargs='?', help="Path to mobilenet detection network blob", default=nnPathDefault)
parser.add_argument('-ff', '--full_frame', action="store_true", help="Perform tracking on full RGB frame", default=False)
args = parser.parse_args()
fullFrameTracking = args.full_frame
# Create pipeline
pipeline = dai.Pipeline()
# Define sources and outputs
camRgb = pipeline.create(dai.node.ImageManip)
spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
objectTracker = pipeline.create(dai.node.ObjectTracker)
xoutFrame = pipeline.create(dai.node.XLinkOut)
xinFrame = pipeline.create(dai.node.XLinkIn)
trackerOut = pipeline.create(dai.node.XLinkOut)
xinFrame.setStreamName("inFrame")
xoutFrame.setStreamName("preview")
trackerOut.setStreamName("tracklets")
# Properties
camRgb.initialConfig.setResize(300,300)
# camRgb.setPreviewSize(300, 300)
# camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
# camRgb.setInterleaved(False)
camRgb.initialConfig.setResizeThumbnail(300,300)
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)
# setting node configs
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
# stereo.setEmptyCalibration()
# Align depth map to the perspective of RGB camera, on which inference is done
# stereo.setDepthAlign(dai.CameraBoardSocket.RGB)
# stereo.setLeftRightCheck(False)
stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())
spatialDetectionNetwork.setBlobPath(args.nnPath)
spatialDetectionNetwork.setConfidenceThreshold(0.5)
spatialDetectionNetwork.input.setBlocking(False)
spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
spatialDetectionNetwork.setDepthLowerThreshold(100)
spatialDetectionNetwork.setDepthUpperThreshold(5000)
objectTracker.setDetectionLabelsToTrack([15]) # track only person
# possible tracking types: ZERO_TERM_COLOR_HISTOGRAM, ZERO_TERM_IMAGELESS, SHORT_TERM_IMAGELESS, SHORT_TERM_KCF
objectTracker.setTrackerType(dai.TrackerType.ZERO_TERM_COLOR_HISTOGRAM)
# take the smallest ID when new object is tracked, possible options: SMALLEST_ID, UNIQUE_ID
objectTracker.setTrackerIdAssignmentPolicy(dai.TrackerIdAssignmentPolicy.SMALLEST_ID)
# Linking
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
xinFrame.out.link(camRgb.inputImage)
camRgb.out.link(spatialDetectionNetwork.input)
# camRgb.out.link(xoutFrame.input)
# camRgb.out.link(objectTracker.inputTrackerFrame)
objectTracker.passthroughTrackerFrame.link(xoutFrame.input) #this function is used to show the tracking frame
objectTracker.out.link(trackerOut.input)
#link rgb camera's output to xoutRgb
if fullFrameTracking:
camRgb.setPreviewKeepAspectRatio(False)
camRgb.video.link(objectTracker.inputTrackerFrame)
objectTracker.inputTrackerFrame.setBlocking(False)
# do not block the pipeline if it's too slow on full frame
objectTracker.inputTrackerFrame.setQueueSize(2)
else:
spatialDetectionNetwork.passthrough.link(objectTracker.inputTrackerFrame)
spatialDetectionNetwork.passthrough.link(objectTracker.inputDetectionFrame)
spatialDetectionNetwork.out.link(objectTracker.inputDetections)
stereo.depth.link(spatialDetectionNetwork.inputDepth) # this code link depth to spatialDetectionNetwork. If you want your own custom depth camera, you can link your own depth camera to spatialDetectionNetwork.inputDepth
def to_planar(arr: np.ndarray, shape: tuple) -> np.ndarray:
return cv2.resize(arr, shape).transpose(2, 0, 1).flatten()
# Connect to device and start pipeline
with dai.Device(pipeline) as device:
device.setLogLevel(dai.LogLevel.WARN)
device.setLogOutputLevel(dai.LogLevel.WARN)
for i in range(-1,10):
cap = cv2.VideoCapture(i)
if cap.get(3) == 2560.0 and cap.get(4) == 720.0:
# cap.release()
# cap = cv2.VideoCapture(i)
#print name of camera
break
else:
cap.release()
qIn = device.getInputQueue(name="inFrame")
preview = device.getOutputQueue("preview", 4, False)
tracklets = device.getOutputQueue("tracklets", 4, False)
startTime = time.monotonic()
counter = 0
fps = 0
color = (255, 255, 255)
while(True):
ret, frame_stereo = cap.read()
if not ret:
print("Can't receive frame (stream end?). Exiting ...")
break
h, w = frame_stereo.shape[:2]
left = frame_stereo[:, :w//2]
# rgb = cv2.cvtColor(left, cv2.COLOR_BGR2RGB)
# rgb = left
# cv2.imshow('frame', left)
# if cv2.waitKey(1) == ord('q'):
# break
rgbImg = dai.ImgFrame()
rgbImg.setType(dai.ImgFrame.Type.BGR888p)
rgbImg.setData(to_planar(left, (300, 300)))
rgbImg.setType(dai.ImgFrame.Type.BGR888p)
rgbImg.setTimestamp(time.monotonic())
rgbImg.setWidth(300)
rgbImg.setHeight(300)
qIn.send(rgbImg)
print("BRUH")
imgFrame = preview.get()
# continue
print("BRUH2")
track = tracklets.get()
print("BRUH3")
counter+=1
current_time = time.monotonic()
if (current_time - startTime) > 1 :
fps = counter / (current_time - startTime)
counter = 0
startTime = current_time
frame = imgFrame.getCvFrame()
trackletsData = track.tracklets
for t in trackletsData:
roi = t.roi.denormalize(frame.shape[1], frame.shape[0])
x1 = int(roi.topLeft().x)
y1 = int(roi.topLeft().y)
x2 = int(roi.bottomRight().x)
y2 = int(roi.bottomRight().y)
try:
label = labelMap[t.label]
except:
label = t.label
cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, f"ID: {[t.id]}", (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, t.status.name, (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.rectangle(frame, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX)
cv2.putText(frame, f"X: {int(t.spatialCoordinates.x)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, f"Y: {int(t.spatialCoordinates.y)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, f"Z: {int(t.spatialCoordinates.z)} mm", (x1 + 10, y1 + 95), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
cv2.imshow("tracker", frame)
if cv2.waitKey(1) == ord('q'):
break