I am trying to use a Yolo model for OCR. It has a label for each character. I am passing a detection into the model and would like to have the ocr detect a character and have that label appended to a string and when all characters have been detected to return the string. I can't figure out how to parse the label from the NN once the packet has been passed. How can I do this?
(FYI your in browser editor incorrectly implements the 'code' block function. It gives one backtick and three are required.)
Thanks.
from depthai_sdk.oak_camera import OakCamera
from depthai_sdk.visualize.configs import BboxStyle, TextPosition
from depthai_sdk.visualize import *
import time
def ocr(packet):
timestamp = int(time.time() * 10000)
i = 0
for detection in packet.detections:
#store label as string in a string (detection.label[0] == r, decrec == r; detection.label[1] == a, decrec == ra)
#i +=1
break
with OakCamera() as oak:
color = oak.create_camera('color')
det = oak.create_nn('c:/users/user/code/models/best.blob', color, nn_type='Yolo', tracker=True)
rec = oak.create_nn('c:/users/user/code/models/ocr.json', input=det, nn_type='Yolo')
det.config_yolo(num_classes=1, coordinate_size=4, anchors=[4.703125,3.501953125,7.5703125,5.8203125,13.46875,9.1953125,27.671875,16.53125,61.4375,31.984375,
111.625,62.09375,184.125,83.8125,251.75,129.25,344.5,185.875],
masks={"side52": [0,1,2],"side26": [3,4,5],"side13": [6,7,8]}, iou_threshold=0.5, conf_threshold=0.5)
rec.config_yolo(num_classes=36, coordinate_size=4, anchors=[10.0,13.0,16.0,30.0,33.0,23.0,30.0,61.0,62.0,45.0,59.0,119.0,116.0,90.0,156.0,198.0,373.0,326.0],
conf_threshold=0.5, iou_threshold=0.5, masks={"side52": [0,1,2],"side26": [3,4,5],"side13": [6,7,8]})
rec.config_multistage_nn(debug=True)
det.config_nn(resize_mode='crop')
#oak.visualize(det.out.passthrough)
oak.visualize(rec.out.twostage_crops, scale=2.0)
visualizer = oak.visualize(rec.out.main, fps=True)
visualizer.detections(
color=(0, 255, 0),
thickness=2,
bbox_style=BboxStyle.RECTANGLE,
label_position=TextPosition.MID,
).text(
font_color=(255, 255, 0),
auto_scale=True
).tracking(
line_thickness=5
)
oak.callback(rec, callback=ocr)
oak.start(blocking=True) `