Share of the code for other people.
main.py
import threading
import image_processing as IP
if name == "main":
camera_type ="rgb"
roi = IP.ROI(1700,1520)
thread = threading.Thread(target=IP.process_images,args=(camera_type,roi))
thread.start()
thread.join()
image_processin.py
from pathlib import Path
import cv2
import numpy as np
import depthai as dai
import east
import blobconverter
import threading
class ROI:
def __init__(self, x, y):
self.x = x
self.y = y
self.width = 1024
self.height = 1024
class HostSeqSync:
def __init__(self):
self.imfFrames = []
def add_msg(self, msg):
self.imfFrames.append(msg)
def get_msg(self, target_seq):
for i, imgFrame in enumerate(self.imfFrames):
if target_seq == imgFrame.getSequenceNum():
self.imfFrames = self.imfFrames[i:]
break
return self.imfFrames[0]
def process_images(camera_type, roi):
pipeline = dai.Pipeline()
version = "2022.1"
pipeline.setOpenVINOVersion(version=dai.OpenVINO.Version.VERSION_2022_1)
colorCam = pipeline.create(dai.node.ColorCamera)
colorCam.setPreviewSize(256, 256)
#colorCam.setVideoSize(2048, 2048) # 4 times larger in both axis
colorCam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP)
colorCam.setInterleaved(False)
colorCam.setBoardSocket(dai.CameraBoardSocket.RGB)
colorCam.setFps(10)
#MODIFICATION -> creation of the video Imagemanip
manip_video= pipeline.create(dai.node.ImageManip)
RrVideo = dai.RotatedRect()
RrVideo.center.x,RrVideo.center.y = int(roi.x+(roi.height/2)),int(roi.y+(roi.width/2))
RrVideo.size.width,RrVideo.size.height = 1024,1024
manip_video.initialConfig.setCropRotatedRect(RrVideo,False)
manip_video.setResize(1024,1024)
manip_video.setMaxOutputFrameSize(1024\*1024\*3)
manip_video.setFrameType(dai.RawImgFrame.Type.BGR888p)
colorCam.isp.link(manip_video.inputImage)
#MODIFICATION -> creation of the preview video Imagemanip
manip_preview= pipeline.create(dai.node.ImageManip)
manip_preview.setResize(256,256)
manip_preview.setMaxOutputFrameSize(int(256\*256\*3))
manip_preview.setFrameType(dai.RawImgFrame.Type.BGR888p)
manip_video.out.link(manip_preview.inputImage)
controlIn = pipeline.create(dai.node.XLinkIn)
controlIn.setStreamName('control')
controlIn.out.link(colorCam.inputControl)
cam_xout = pipeline.create(dai.node.XLinkOut)
cam_xout.setStreamName('video')
manip_video.out.link(cam_xout.input)
# ---------------------------------------
# 1st stage NN - text-detection
# ---------------------------------------
nn = pipeline.create(dai.node.NeuralNetwork)
nn.setBlobPath(blobconverter.from_zoo(name="east_text_detection_256x256",zoo_type="depthai",shaves=6, version=version))
manip_preview.out.link(nn.input)
nn_xout = pipeline.create(dai.node.XLinkOut)
nn_xout.setStreamName('detections')
nn.out.link(nn_xout.input)
# ---------------------------------------
# 2nd stage NN - text-recognition-0012
# ---------------------------------------
manip = pipeline.create(dai.node.ImageManip)
manip.setWaitForConfigInput(True)
manip_img = pipeline.create(dai.node.XLinkIn)
manip_img.setStreamName('manip_img')
manip_img.out.link(manip.inputImage)
manip_cfg = pipeline.create(dai.node.XLinkIn)
manip_cfg.setStreamName('manip_cfg')
manip_cfg.out.link(manip.inputConfig)
manip_xout = pipeline.create(dai.node.XLinkOut)
manip_xout.setStreamName('manip_out')
nn2 = pipeline.create(dai.node.NeuralNetwork)
nn2.setBlobPath(blobconverter.from_zoo(name="text-recognition-0012", shaves=6, version=version))
nn2.setNumInferenceThreads(2)
manip.out.link(nn2.input)
manip.out.link(manip_xout.input)
nn2_xout = pipeline.create(dai.node.XLinkOut)
nn2_xout.setStreamName("recognitions")
nn2.out.link(nn2_xout.input)
def to_tensor_result(packet):
return {
name: np.array(packet.getLayerFp16(name))
for name in [tensor.name for tensor in packet.getRaw().tensors]
}
def to_planar(frame):
return frame.transpose(2, 0, 1).flatten()
with dai.Device(pipeline) as device:
q_vid = device.getOutputQueue("video", 4, blocking=False)
#q_prev = device.getOutputQueue("preview", 4, blocking=False)
#q1 = device.getOutputQueue("crop", 4, blocking=False)
q_det = device.getOutputQueue("detections", 4, blocking=False)
q_rec = device.getOutputQueue("recognitions", 4, blocking=True)
q_manip_img = device.getInputQueue("manip_img")
q_manip_cfg = device.getInputQueue("manip_cfg")
q_manip_out = device.getOutputQueue("manip_out", 4, blocking=False)
controlQueue = device.getInputQueue('control')
frame = None
cropped_stacked = None
rotated_rectangles = []
rec_pushed = 0
rec_received = 0
host_sync = HostSeqSync()
class CTCCodec(object):
""" Convert between text-label and text-index """
def __init__(self, characters):
dict_character = list(characters)
self.dict = {}
for i, char in enumerate(dict_character):
self.dict[char] = i + 1
self.characters = dict_character
def decode(self, preds):
""" convert text-index into text-label. """
texts = []
index = 0
preds = preds.astype(np.float16)
preds_index = np.argmax(preds, 2)
preds_index = preds_index.transpose(1, 0)
preds_index_reshape = preds_index.reshape(-1)
preds_sizes = np.array([preds_index.shape[1]] \* preds_index.shape[0])
for l in preds_sizes:
t = preds_index_reshape[index:index + l]
if t.shape[0] == 0:
continue
char_list = []
for i in range(l):
if not (i > 0 and t[i - 1] == t[i]):
if self.characters[t[i]] != '#':
char_list.append(self.characters[t[i]])
text = ''.join(char_list)
texts.append(text)
index += l
return texts
characters = '0123456789abcdefghijklmnopqrstuvwxyz#'
codec = CTCCodec(characters)
ctrl = dai.CameraControl()
ctrl.setAutoFocusMode(dai.CameraControl.AutoFocusMode.CONTINUOUS_VIDEO)
ctrl.setAutoFocusTrigger()
controlQueue.send(ctrl)
while True:
vid_in = q_vid.tryGet()
#prev_in = q_prev.tryGet()
if vid_in is not None:
host_sync.add_msg(vid_in)
#if prev_in is not None:
#host_sync.add_msg(prev_in)
while True:
#framepreview = host_sync.get_msg(prev_in.getSequenceNum()).getCvFrame().copy()
#cv2.imshow("preview ", framepreview)
in_rec = q_rec.tryGet()
if in_rec is None:
break
rec_data = bboxes = np.array(in_rec.getFirstLayerFp16()).reshape(30,1,37)
decoded_text = codec.decode(rec_data)[0]
pos = rotated_rectangles[rec_received]
print("{:2}: {:20}".format(rec_received, decoded_text),
"center({:3},{:3}) size({:3},{:3}) angle{:5.1f} deg".format(
int(pos[0][0]), int(pos[0][1]), pos[1][0], pos[1][1], pos[2]))
if cropped_stacked is not None:
cv2.putText(cropped_stacked, decoded_text,
(120 + 10 , 32 \* rec_received + 24),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2)
cv2.imshow(f"cropped_stacked {camera_type} ", cropped_stacked)
rec_received += 1
if cv2.waitKey(1) == ord('q'):
break
if rec_received >= rec_pushed:
in_det = q_det.tryGet()
if in_det is not None:
frame = host_sync.get_msg(in_det.getSequenceNum()).getCvFrame().copy()
scores, geom1, geom2 = to_tensor_result(in_det).values()
scores = np.reshape(scores, (1, 1, 64, 64))
geom1 = np.reshape(geom1, (1, 4, 64, 64))
geom2 = np.reshape(geom2, (1, 1, 64, 64))
bboxes, confs, angles = east.decode_predictions(scores, geom1, geom2)
boxes, angles = east.non_max_suppression(np.array(bboxes), probs=confs, angles=np.array(angles))
rotated_rectangles = [
east.get_cv_rotated_rect(bbox, angle \* -1)
for (bbox, angle) in zip(boxes, angles)
]
rec_received = 0
rec_pushed = len(rotated_rectangles)
if rec_pushed:
print("====== Pushing for recognition, count:", rec_pushed)
cropped_stacked = None
for idx, rotated_rect in enumerate(rotated_rectangles):
rotated_rect[0][0] = rotated_rect[0][0] \* 4
rotated_rect[0][1] = rotated_rect[0][1] \* 4
rotated_rect[1][0] = rotated_rect[1][0] \* 4
rotated_rect[1][1] = rotated_rect[1][1] \* 4
points = np.int0(cv2.boxPoints(rotated_rect))
print(rotated_rect)
cv2.polylines(frame, [points], isClosed=True, color=(255, 0, 0), thickness=1, lineType=cv2.LINE_8)
rr = dai.RotatedRect()
rr.center.x = rotated_rect[0][0]
rr.center.y = rotated_rect[0][1]
rr.size.width = rotated_rect[1][0]
rr.size.height = rotated_rect[1][1]
rr.angle = rotated_rect[2]
cfg = dai.ImageManipConfig()
cfg.setCropRotatedRect(rr, False)
cfg.setResize(120, 32)
if idx == 0:
w,h,c = frame.shape
imgFrame = dai.ImgFrame()
imgFrame.setData(to_planar(frame))
imgFrame.setType(dai.ImgFrame.Type.BGR888p)
imgFrame.setWidth(w)
imgFrame.setHeight(h)
q_manip_img.send(imgFrame)
else:
cfg.setReusePreviousImage(True)
q_manip_cfg.send(cfg)
transformed = q_manip_out.get().getCvFrame()
rec_placeholder_img = np.zeros((32, 200, 3), np.uint8)
transformed = np.hstack((transformed, rec_placeholder_img))
if cropped_stacked is None:
cropped_stacked = transformed
else:
cropped_stacked = np.vstack((cropped_stacked, transformed))
if cropped_stacked is not None:
cv2.imshow(f"cropped_stacked {camera_type} ", cropped_stacked)
if frame is not None:
cv2.imshow(f'frame {camera_type}', frame)
key = cv2.waitKey(1)
if key == ord('q'):
break
elif key == ord('t'):
print("Autofocus trigger (and disable continuous)")
ctrl = dai.CameraControl()
ctrl.setAutoFocusMode(dai.CameraControl.AutoFocusMode.AUTO)
ctrl.setAutoFocusTrigger()
controlQueue.send(ctrl)
Don't forget east.py provided into the github.