Hey , here is the code that I am using. Using the Yolotiny and oak-1 camera for the detection. Hope this helps.
from pathlib import Path
import sys
import cv2
import depthai as dai
import numpy as np
import time
labelMap = ["barcode"]
syncNN = True
# Create pipeline
pipeline = dai.Pipeline()
# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
detectionNetwork = pipeline.create(dai.node.YoloDetectionNetwork)
xoutRgb = pipeline.create(dai.node.XLinkOut)
nnOut = pipeline.create(dai.node.XLinkOut)
xoutRgb.setStreamName("rgb")
nnOut.setStreamName("nn")
# Properties
camRgb.setPreviewSize(320, 320)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_4_K)
camRgb.setInterleaved(False)
camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
camRgb.setFps(40)
# Network specific settings
detectionNetwork.setConfidenceThreshold(0.5)
detectionNetwork.setNumClasses(1)
detectionNetwork.setCoordinateSize(4)
detectionNetwork.setAnchors(
[23.09375, 6.93359375, 16.28125, 33.5625, 35.5, 24.859375, 71.25, 17.328125, 39.59375, 32.125, 52.25, 36.125,
70.875, 33.5625, 65.5, 48.9375, 88.5625, 63.75])
detectionNetwork.setAnchorMasks({"side40": [0, 1, 2], "side20": [3, 4, 5], "side10": [6, 7, 8]})
detectionNetwork.setBlobPath('model/barcode.blob')
detectionNetwork.setIouThreshold(0.5)
detectionNetwork.setNumInferenceThreads(2)
detectionNetwork.input.setBlocking(False)
camRgb.setVideoSize(960, 960)
# Linking
camRgb.preview.link(detectionNetwork.input)
if syncNN:
detectionNetwork.passthrough.link(xoutRgb.input)
else:
camRgb.preview.link(xoutRgb.input)
detectionNetwork.out.link(nnOut.input)
# Connect to device and start pipeline
with dai.Device(pipeline) as device:
# Output queues will be used to get the rgb frames and nn data from the outputs defined above
qRgb = device.getOutputQueue(name="rgb", maxSize=250, blocking=False)
qDet = device.getOutputQueue(name="nn", maxSize=250, blocking=False)
frame = None
detections = []
startTime = time.monotonic()
counter = 0
color2 = (255, 255, 255)
# nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
def frameNorm(frame, bbox):
normVals = np.full(len(bbox), frame.shape[0])
normVals[::2] = frame.shape[1]
return (np.clip(np.array(bbox), 0, 1) \* normVals).astype(int)
def displayFrame(name, frame):
color = (255, 0, 0)
global count
for detection in detections:
bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, f"{int(detection.confidence \* 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
count += 1
if count == 30:
a = str(deviceName) + '-' + datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
cv2.imwrite(str(pathC) + "/data/small/" + str(a) + str(dtypesmall) + '.jpg', crop_frame)
# Show the frame
cv2.imshow(name, frame)
while True:
if syncNN:
inRgb = qRgb.get()
inDet = qDet.get()
else:
inRgb = qRgb.tryGet()
inDet = qDet.tryGet()
if inRgb is not None:
frame = inRgb.getCvFrame()
cv2.putText(frame, "NN fps: {:.2f}".format(counter / (time.monotonic() - startTime)),
(2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color2)
if inDet is not None:
detections = inDet.detections
counter += 1
if frame is not None:
displayFrame("rgb", frame)
if cv2.waitKey(1) == ord('q'):
break