import cv2
import depthai as dai
from calc import HostSpatialsCalc
import numpy as np
import math
import socket
import json
from ultralytics import YOLO
# Create YOLO model
model = YOLO('yolov8n-seg.pt')
# Create a DepthAI pipeline
pipeline = dai.Pipeline()
# Define source and output
camRgb = pipeline.create(dai.node.ColorCamera)
xoutVideo = pipeline.create(dai.node.XLinkOut)
xoutVideo.setStreamName("video")
# Define source and output for stereo cam
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
# Properties
camRgb.setBoardSocket(dai.CameraBoardSocket.CAM_A)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
camRgb.setVideoSize(640,480)
xoutVideo.input.setBlocking(False)
xoutVideo.input.setQueueSize(1)
# Properties for depth map
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.CAM_B)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.CAM_C)
stereo.initialConfig.setConfidenceThreshold(255)
stereo.setLeftRightCheck(True)
stereo.setSubpixel(False)
# Linking
camRgb.video.link(xoutVideo.input)
# Linking for depth map
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
xoutDepth = pipeline.create(dai.node.XLinkOut)
xoutDepth.setStreamName("depth")
stereo.depth.link(xoutDepth.input)
xoutDepth = pipeline.create(dai.node.XLinkOut)
xoutDepth.setStreamName("disp")
stereo.disparity.link(xoutDepth.input)
# Connect to device and start pipeline
with dai.Device(pipeline) as device:
video = device.getOutputQueue(name="video", maxSize=1, blocking=False)
# Output queue will be used to get the depth frames from the outputs defined above
depth=device.getOutputQueue(name="depth")
hostSpatials = HostSpatialsCalc(device)
step = 3
delta = 5
hostSpatials.setDeltaRoi(delta)
color = (255, 255, 255)
while True:
videoIn = video.get()
# Get BGR frame from NV12 encoded video frame to show with OpenCV
frame = videoIn.getCvFrame()
# Perform YOLOv8 inference on the frame
results = model(frame)
# Get annotated frame with YOLOv8 predictions
annotated_frame = results[0].plot()
# All data needed to be transferred
result=results[0]
output=[]
# DEPTHFRAME
depthFrame = depth.get().getFrame()
depthData=depth.get()
depth_downscaled = depthFrame[::4]
if np.all(depth_downscaled == 0):
min_depth = 0 # Set a default minimum depth value when all elements are zero
else:
min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
max_depth = np.percentile(depth_downscaled, 99)
depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
height = frame.shape[0]
width = frame.shape[1]
for box in result.boxes:
# coordinates of the bounding box
x1,y1,x2,y2=[round(x) for x in box.xyxy[0].tolist()]
roi=(x1,y1,x2,y2)
# obj id
class_id=box.cls[0].item()
id=result.names[class_id]
output.append([x1,y1,x2,y2, id])
print(output)
center_box_x=int((x1+x2)/2)
center_box_y=int((y1+y2)/2)
#### starting ROI, NEED TO DEFINE ROI AS THE BOUNDING BOX
# Create a dai.Rect object with the normalized coordinates
roi_rect = dai.Rect(dai.Point2f(x1, y1), dai.Point2f(x2, y2))
roi_rect=roi_rect.denormalize(depthFrameColor.shape[1],depthFrameColor.shape[0])
topLeft=roi_rect.topLeft()
bottomRight=roi_rect.bottomRight()
xmin = int(topLeft.x)
ymin = int(topLeft.y)
xmax = int(bottomRight.x)
ymax = int(bottomRight.y)
cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1)
# Denormalize bounding box
xL = int(xmin * width)
xR = int(xmax * width)
yL = int(ymin * height)
yR = int(ymax * height)
cv2.circle(annotated_frame,(x1,y1),10,(0,255,255),-1) # yellow dot
# this line from spatial mobile net
#cv2.putText(frame, f"Z: {int(box.spatialCoordinates.z)} mm", (xL + 10, yL + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
# this line is from 2d map
spatials, centroid = hostSpatials.calc_spatials(depthData,roi)
cv2.putText(annotated_frame, "Z: " + ("{:.1f}m".format(spatials['z']/1000) if not math.isnan(spatials['z']) else "--"), (xL + 10, yL + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
# Display the annotated frame
cv2.imshow("YOLOv8 Inference", annotated_frame)
# Display Depth Frame
#cv2.imshow("depth", disp)
if cv2.waitKey(1) == ord('q'):
break
cv2.destroyAllWindows()