import cv2
import depthai as dai
import numpy as np
def getFrame(queue):
# Get frame from queue
frame = queue.get()
# Convert frame to OpenCV format and return
return frame.getCvFrame()
# Start defining a pipeline
pipeline = dai.Pipeline()
print("depthai pipline defined")
# Set up RGB camera
rgb = pipeline.createColorCamera()
print("RGB pipeline created")
rgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
print("resolution set")
rgb.setIspScale(1920//1056, 1080//1056)
print("downscaling complete")
# Load the YOLO model
nn = pipeline.createNeuralNetwork()
print("neural net pipeline created")
nn.setBlobPath("../Yolov8CamColor/NNBlob/yolov6l_openvino_2022.1_6shave.blob")
print("found blob path")
# Link the RGB camera to the neural network
rgb.video.link(nn.input)
print("neural net linked to RGB")
# Create an output queue for the neural network
nnOut = pipeline.createXLinkOut()
nnOut.setStreamName("nn")
nn.out.link(nnOut.input)
print("completed output queue for nn")
# Create an output queue for the RGB camera
xoutRgb = pipeline.createXLinkOut()
xoutRgb.setStreamName("rgb")
rgb.video.link(xoutRgb.input)
print("completed output queue for RGB camera")
# Pipeline is defined
with dai.Device(pipeline) as device:
# Output queues will be used to get the rgb frames and nn data from the outputs defined above
rgbQueue = device.getOutputQueue(name="rgb", maxSize=1, blocking=False)
nnQueue = device.getOutputQueue(name="nn", maxSize=1, blocking=False)
frame_width = 1056
frame_height = 1056
while True:
# Get the RGB frame
print("getting RGB frames")
frame = rgbQueue.get().getCvFrame()
# Get the detections
print("getting detections")
detections = nnQueue.get().getFirstLayerFp16()
# Only process detections if length is greater than 7
if len(detections) > 7:
# Loop over the detections and add them to your images
for i in range(0, len(detections), 7):
class_id = int(detections[i + 1])
confidence = detections[i + 2]
x_min = int(detections[i + 3] * frame_width)
y_min = int(detections[i + 4] * frame_height)
x_max = int(detections[i + 5] * frame_width)
y_max = int(detections[i + 6] * frame_height)
# Print out the size and content of detections for debugging
print(f"Size of detections: {len(detections)}")
print(f"Content of detections: {detections}")
# Loop over the detections and add them to images
for i in range(0, len(detections), 7):
class_id = int(detections[i + 1])
confidence = detections[i + 2]
x_min = int(detections[i + 3] * frame_width)
y_min = int(detections[i + 4] * frame_height)
x_max = int(detections[i + 5] * frame_width)
y_max = int(detections[i + 6] * frame_height)
# Draw a rectangle around the detected object and add a label
cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
cv2.putText(frame, str(class_id), (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
# Display the frame
cv2.imshow("RGB", frame)
# Check for keyboard input
key = cv2.waitKey(1)
if key == ord('q'):
# Quit when q is pressed
break
This is what I am attempting to run, I have the camera set to use RGB with pipeline.createcolorcamera() and pass it to the NN. I did use the tool you recommended however I am getting the error [1944301041B4771300] [1.4.1] [3.173] [NeuralNetwork(1)] [warning] Input image (1920x1080) does not match NN (1056x1056). I also attempted to resize the image but this has not fixed it either.