Hey, I am trying to make predictions on the area that the user defines by drawing a rectangle. Right now, I am making predictions on the full image, and if the center of a detected object falls inside the user-defined rectangle, I am displaying it.
I don't think this is the best way. I want to make predictions only in the given area, not for the full image.
Is it possible?
Here is my Code
import cv2
import numpy as np
import depthai as dai
# path to .blob model
nnPath = "Models/chess-yolov5n-blob/last_openvino_2022.1_6shave.blob"
# labels
labelMap = ['black-bishop', 'black-king', 'black-knight', 'black-pawn',
'black-queen', 'black-rook', 'white-bishop', 'white-king', 'white-knight',
'white-pawn', 'white-queen', 'white-rook']
previewSize = (640, 640)
# Create pipeline
pipeline = dai.Pipeline()
# Define source and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
camRgb.setPreviewSize(previewSize)
camRgb.setInterleaved(False)
# Define a neural network that will make predictions based on the source frames
nn = pipeline.create(dai.node.YoloDetectionNetwork)
nn.setConfidenceThreshold(0.8)
nn.setIouThreshold(0.9)
nn.setBlobPath(nnPath)
nn.setNumInferenceThreads(2)
nn.input.setBlocking(False)
nn.setAnchors([10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319])
nn.setAnchorMasks({
'side80': [0, 1, 2],
'side40': [3, 4, 5],
'side20': [6, 7, 8]
})
nn.setNumClasses(12)
nn.setCoordinateSize(4)
camRgb.preview.link(nn.input)
# Linking
xoutRgb = pipeline.create(dai.node.XLinkOut)
xoutRgb.setStreamName("rgb")
camRgb.preview.link(xoutRgb.input)
nnOut = pipeline.create(dai.node.XLinkOut)
nnOut.setStreamName("nn")
nn.out.link(nnOut.input)
class InferenceRegion:
step = 10
position = (0, 0)
size = (100, 100)
maxDims = previewSize[0], previewSize[1]
def grow(self, x=0, y=0):
self.size = (
max(1, self.size[0] + x),
max(1, self.size[1] + y)
)
def move(self, x=0, y=0):
self.position = (
max(0, self.position[0] + x),
max(0, self.position[1] + y)
)
def endPosition(self):
return (
min(self.position[0] + self.size[0], self.maxDims[0]),
min(self.position[1] + self.size[1], self.maxDims[1]),
)
# Connect to device and start pipeline
with dai.Device(pipeline) as device:
qRgb = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
qDet = device.getOutputQueue(name="nn", maxSize=4, blocking=False)
frame = None
detections = []
region = InferenceRegion()
def frameNorm(frame, bbox):
normVals = np.full(len(bbox), frame.shape[0])
normVals[::2] = frame.shape[1]
return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
def displayFrame(name, frame):
for detection in detections:
bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
center_x = (bbox[0] + bbox[2]) // 2
center_y = (bbox[1] + bbox[3]) // 2
# Check if the center of the detected object is within the region
if (region.position[0] <= center_x <= region.endPosition()[0] and
region.position[1] <= center_y <= region.endPosition()[1]):
cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 2)
cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.rectangle(frame, region.position, region.endPosition(), (0, 255, 0), 2)
cv2.imshow(name, frame)
while True:
inRgb = qRgb.tryGet()
inDet = qDet.tryGet()
if inRgb is not None:
frame = inRgb.getCvFrame()
if inDet is not None:
detections = inDet.detections
if frame is not None:
displayFrame("rgb", frame)
key = cv2.waitKey(1)
if key == ord('w'):
region.move(y=-region.step)
elif key == ord('s'):
region.move(y=region.step)
elif key == ord('a'):
region.move(x=-region.step)
elif key == ord('d'):
region.move(x=region.step)
elif key == ord('+'):
region.grow(x=10, y=10)
region.step = region.step + 1
elif key == ord('-'):
region.grow(x=-10, y=-10)
region.step = max(region.step - 1, 1)
if key == ord('q'):
break
cv2.destroyAllWindows()