I have tried this code to deploy my blob file but it did not work, the camera frame froze. Though, my custom model worked perfectly when I deployed it with the depthai_demo.py code.
import cv2
import depthai as dai
import time
import blobconverter
FRAME_SIZE = (640, 360)
DET_INPUT_SIZE = (320, 320)
blob_path = "/home/trungtran/catkin_ws/src/task_exe/src/best.blob"
pipeline = dai.Pipeline()
# Define a source - color camera
cam = pipeline.createColorCamera()
cam.setPreviewSize(FRAME_SIZE[0], FRAME_SIZE[1])
cam.setInterleaved(False)
cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
cam.setBoardSocket(dai.CameraBoardSocket.RGB)
mono_left = pipeline.createMonoCamera()
mono_left.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
mono_left.setBoardSocket(dai.CameraBoardSocket.LEFT)
mono_right = pipeline.createMonoCamera()
mono_right.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
mono_right.setBoardSocket(dai.CameraBoardSocket.RIGHT)
stereo = pipeline.createStereoDepth()
mono_left.out.link(stereo.left)
mono_right.out.link(stereo.right)
# Define a neural network model
tomato_spac_det_nn = pipeline.createYoloSpatialDetectionNetwork()
tomato_spac_det_nn.setConfidenceThreshold(0.5)
tomato_spac_det_nn.setBlobPath(blob_path)
tomato_spac_det_nn.setDepthLowerThreshold(100)
tomato_spac_det_nn.setDepthUpperThreshold(5000)
tomato_det_manip = pipeline.createImageManip()
tomato_det_manip.initialConfig.setResize(DET_INPUT_SIZE[0], DET_INPUT_SIZE[1])
tomato_det_manip.setMaxOutputFrameSize(1244160)
tomato_det_manip.setWaitForConfigInput(True)
tomato_det_manip.initialConfig.setKeepAspectRatio(False)
cam.preview.link(face_det_manip.inputImage)
tomato_det_manip.out.link(tomato_spac_det_nn.input)
stereo.depth.link(tomato_spac_det_nn.inputDepth)
# Define the link between the color camera and the neural network
x_preview_out = pipeline.createXLinkOut()
x_preview_out.setStreamName("preview")
cam.preview.link(x_preview_out.input)
# rgb.preview.link(rgb_out.input)
# Create an output for the object detection results
det_out = pipeline.createXLinkOut()
det_out.setStreamName('det_out')
face_spac_det_nn.out.link(det_out.input)
def display_info(frame, bbox, coordinates, status, status_color, fps):
# Display bounding box
cv2.rectangle(frame, bbox, status_color[status], 2)
# Display coordinates
if coordinates is not None:
coord_x, coord_y, coord_z = coordinates
cv2.putText(frame, f"X: {int(coord_x)} mm", (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, f"Y: {int(coord_y)} mm", (bbox[0] + 10, bbox[1] + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, f"Z: {int(coord_z)} mm", (bbox[0] + 10, bbox[1] + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
# Create background for showing details
cv2.rectangle(frame, (5, 5, 175, 100), (50, 0, 0), -1)
# Display authentication status on the frame
cv2.putText(frame, status, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, status_color[status])
# Display instructions on the frame
cv2.putText(frame, f'FPS: {fps:.2f}', (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255))
# Frame count
frame_count = 0
# Placeholder fps value
fps = 0
# Used to record the time when we processed last frames
prev_frame_time = 0
# Used to record the time at which we processed current frames
new_frame_time = 0
# Set status colors
status_color = {
'Tomato Detected': (0, 255, 0),
'No Tomato Detected': (0, 0, 255)
}
# Create a DepthAI device and start the pipeline
with dai.Device(pipeline) as device:
# Output queue will be used to get the right camera frames from the outputs defined above
q_cam = device.getOutputQueue(name="preview", maxSize=4, blocking=False)
# Output queue will be used to get nn data from the video frames.
q_det = device.getOutputQueue(name="det_out")
# # Output queue will be used to get nn data from the video frames.
# q_bbox_depth_mapping = device.getOutputQueue(name="bbox_depth_mapping_out", maxSize=4, blocking=False)
while True:
# Get right camera frame
in_cam = q_cam.get()
frame = in_cam.getCvFrame()
bbox = None
coordinates = None
inDet = q_det.tryGet()
if inDet is not None:
detections = inDet.detections
# if face detected
if len(detections) != 0:
detection = detections[0]
# Correct bounding box
xmin = max(0, detection.xmin)
ymin = max(0, detection.ymin)
xmax = min(detection.xmax, 1)
ymax = min(detection.ymax, 1)
# Calculate coordinates
x = int(xmin*FRAME_SIZE[0])
y = int(ymin*FRAME_SIZE[1])
w = int(xmax*FRAME_SIZE[0]-xmin*FRAME_SIZE[0])
h = int(ymax*FRAME_SIZE[1]-ymin*FRAME_SIZE[1])
bbox = (x, y, w, h)
# Get spacial coordinates
coord_x = detection.spatialCoordinates.x
coord_y = detection.spatialCoordinates.y
coord_z = detection.spatialCoordinates.z
coordinates = (coord_x, coord_y, coord_z)
if bbox:
# Tomato detected
status = 'Tomato Detected'
else:
# No tomato detected
status = 'No Tomato Detected'
# Display info on frame
display_info(frame, bbox, coordinates, status, status_color, fps)
# Calculate average fps
if frame_count % 10 == 0:
# Time when we finish processing last 100 frames
new_frame_time = time.time()
# Fps will be number of frame processed in one second
fps = 1 / ((new_frame_time - prev_frame_time)/10)
prev_frame_time = new_frame_time
# Capture the key pressed
key_pressed = cv2.waitKey(1) & 0xff
# Stop the program if Esc key was pressed
if key_pressed == 27:
break
# Display the final frame
cv2.imshow("Face Cam", frame)
# Increment frame count
# frame_count += 1
cv2.destroyAllWindows()