I have a similar use case to the Gen2 License Plates Recognition example, so I have been attempting to get a version of it working using my own yolov5 weights for both detection and OCR. I am new to object oriented programming (I took a few C classes in the late 90s but besides that I have not done much coding) but I am powering through (I tend to find a destination and work my way there by throwing myself into the deep end, with mixed results, but that's besides the point).
I have re-written the example as much as I could, but am unable to figure out how certain sections work due to a combination my inexperience and lack of any comments in the code and use of confusing (to me) variable names. I know this isn't the place for 'general programing questions' but since the entire extent of my programming knowledge at this point is 'depthai' then I have no idea what is 'general' and what is 'depthai'. So please excuse me for that.
My questions:
- What is the difference between using MobileDetectionNetwork and NeuralNetwork in the original code?
- How are the detected plates being passed to the OCR network?
- How do I get the results to appear on my screen (if I try to draw a frame it pops up a grey box and crashes)
- Anything else I am doing wrong?
Thanks
import depthai as dai
import threading
import time
from pathlib import Path
import numpy as np
from depthai_sdk.fps import FPSHandler
blobPath = str((Path('c:/users/user/code/models/best_openvino_2022.1_6shave.blob')).resolve().absolute())
ocrBlobPath = str((Path('c:/users/user/code/models/ocr_openvino_2022.1_6shave.blob')).resolve().absolute())
#variables
fps = FPSHandler()
running = True
plateDetections = []
ocrPlates = []
frameDetSeq = 0
frameSeqMap = {}
licLastSeq = 0
labelMap = ["license plate"]
ocrLabelMap = ["0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"A",
"B",
"C",
"D",
"E",
"F",
"G",
"H",
"I",
"J",
"K",
"L",
"M",
"N",
"O",
"P",
"Q",
"R",
"S",
"T",
"U",
"V",
"W",
"X",
"Y",
"Z"
]
shape = 416
shaves = 6
p = dai.Pipeline()
#create piplines
cam = p.create(dai.node.ColorCamera)
camOut = p.create(dai.node.XLinkOut)
detNn = p.create(dai.node.YoloDetectionNetwork)
detNnOut = p.create(dai.node.XLinkOut)
ocrNn = p.create(dai.node.YoloDetectionNetwork)
ocrNnOut = p.create(dai.node.XLinkOut)
ocrNnPass = p.create(dai.node.XLinkOut)
ocrNnIn = p.create(dai.node.XLinkIn)
#set params
cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
cam.setInterleaved(False)
cam.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
cam.setPreviewSize(shape, shape)
#Yolo params
detNn.setBlobPath(blobPath)
detNn.input.setBlocking(False)
detNn.setNumInferenceThreads(2)
ocrNn.setBlobPath(ocrBlobPath)
ocrNn.input.setBlocking(False)
ocrNn.setNumInferenceThreads(2)
detNn.setNumClasses(1)
detNn.setConfidenceThreshold(0.5)
detNn.setCoordinateSize(4)
detNn.setIouThreshold(0.5)
detNn.setAnchors([
4.703125,
3.501953125,
7.5703125,
5.8203125,
13.46875,
9.1953125,
27.671875,
16.53125,
61.4375,
31.984375,
111.625,
62.09375,
184.125,
83.8125,
251.75,
129.25,
344.5,
185.875
])
detNn.setAnchorMasks({
"side52": [
0,
1,
2
],
"side26": [
3,
4,
5
],
"side13": [
6,
7,
8
]})
ocrNn.setNumClasses(36)
ocrNn.setConfidenceThreshold(0.5)
ocrNn.setCoordinateSize(4)
ocrNn.setIouThreshold(0.5)
ocrNn.setAnchors(
[0.0,
13.0,
16.0,
30.0,
33.0,
23.0,
30.0,
61.0,
62.0,
45.0,
59.0,
119.0,
116.0,
90.0,
156.0,
198.0,
373.0,
326.0
])
ocrNn.setAnchorMasks({
"side52": [
0,
1,
2
],
"side26": [
3,
4,
5
],
"side13": [
6,
7,
8
]})
#create links
cam.preview.link(detNn.input)
cam.preview.link(camOut.input)
detNn.out.link(detNnOut.input)
ocrNn.out.link(ocrNnOut.input)
ocrNn.passthrough.link(ocrNnPass.input)
ocrNnIn.out.link(ocrNn.input)
#name streams
camOut.setStreamName("cam")
detNnOut.setStreamName("det")
ocrNnOut.setStreamName("ocr")
ocrNnPass.setStreamName("pass")
ocrNnIn.setStreamName("in")
#define functions
def frameNorm(frame, bbox):
return (np.clip(np.array(bbox), 0, 1) * np.array([*frame.shape[:2], *frame.shape[:2]])[::-1]).astype(int)
def toPlanar(arr: np.ndarray, shape: tuple) -> list:
return cv2.resize(arr, shape).transpose(2, 0, 1).flatten()
def detPlate(qDet, qOcr):
global plateDetections, licLastSeq
color = (255, 0, 0)
while running:
try:
inDet = qDet.get()
dets = inDet.detections
frame = frameSeqMap.get(inDet.getSequenceNum(), None)
if frame is None:
continue
plateDetections = [detection for detection in dets]
for detection in plateDetections:
bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
cropFrame = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]
tStamp = time.monotonic
plate = dai.ImgFrame()
plate.setTimestamp(tStamp)
plate.setType(dai.RawImgFrame.Type.BGR888p)
plate.setData(toPlanar(cropFrame, (94,24)))
plate.setWidth(94)
plate.setHeight(24)
qOcr.send(plate)
fps.tick('lic')
except RuntimeError:
continue
def ocrPlate(qOcr, qPass):
global ocrResults
while running:
try:
ocrData = qOcr.get().getFirstLayerInt32()
ocrFrame = qPass.get().getCvFrame()
except RuntimeError:
continue
ocrPlate = ""
for idx in ocrData:
if idx == -1:
break
ocrPlate += ocrLabelMap[int(idx)]
ocrResults = [(cv2.resize(ocrFrame, (200, 64)), ocrPlate)] + ocrResults[:9]
fps.tick('ocr')
with dai.Device(p) as device:
frame = None
#plateDetections = []
#startTime = time.monotonic()
#counter = 0
#color2 = (255, 255, 255)
#setup queues
qCam = device.getOutputQueue(name="cam", maxSize=1, blocking=False)
qDet = device.getOutputQueue(name="det", maxSize=1, blocking=False)
#qIn = device.getOutputQueue(name="in", maxSize=1, blocking=False)
qPass = device.getOutputQueue(name="pass", maxSize=1, blocking=False)
qOcr = device.getOutputQueue(name="ocr", maxSize=1, blocking=False)
#give queues to functions
detThread = threading.Thread(target=detPlate, args=(qDet, qOcr))
detThread.start()
ocrThread = threading.Thread(target=ocrPlate, args=(qOcr, qPass))
ocrThread.start()
#functions
def shouldRun():
return True
def getFrame():
global frameDetSeq
inCam = qCam.get()
frame = inCam.getCvFrame()
frameSeqMap[inCam.getSequenceNum()] = frame
return True, frame
#execute
try:
while shouldRun():
readCorrectly, frame = getFrame()
if not readCorrectly:
break
for mapKey in list(filter(lambda item: item <= min(licLastSeq), frameSeqMap.keys())):
del frameSeqMap[mapKey]
fps.nextIter()
except KeyboardInterrupt:
pass
running = False
detThread.join()
ocrThread.join()
print("FPS: {:.2f}".format(fps.fps()))`