Hi @yishu_corpex
I removed the sync node since it was causing issues; didn't change anything else though but the RGB and NN seem to be in sync:
import cv2
import depthai as dai
import numpy as np
import argparse
import json
import time
import sys
from pathlib import Path
import blobconverter
from datetime import timedelta
def getDisparityFrame(frame, cvColorMap):
maxDisp = stereo.initialConfig.getMaxDisparity()
disp = (frame * (255.0 / maxDisp)).astype(np.uint8)
disp = cv2.applyColorMap(disp, cvColorMap)
return disp
# parse arguments
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", help="Provide model name or model path for inference",
default='models/best_ckpt_openvino_2022.1_6shave.blob', type=str)
parser.add_argument("-c", "--config", help="Provide config path for inference",
default='models/best_ckpt.json', type=str)
args = parser.parse_args()
# parse config
configPath = Path(args.config)
if not configPath.exists():
raise ValueError("Path {} does not exist!".format(configPath))
with configPath.open() as f:
config = json.load(f)
nnConfig = config.get("nn_config", {})
# parse input shape
if "input_size" in nnConfig:
W, H = tuple(map(int, nnConfig.get("input_size").split('x')))
# extract metadata
metadata = nnConfig.get("NN_specific_metadata", {})
classes = metadata.get("classes", {})
coordinates = metadata.get("coordinates", {})
anchors = metadata.get("anchors", {})
anchorMasks = metadata.get("anchor_masks", {})
iouThreshold = metadata.get("iou_threshold", {})
confidenceThreshold = metadata.get("confidence_threshold", {})
#print(metadata)
# parse labels
nnMappings = config.get("mappings", {})
labels = nnMappings.get("labels", {})
#print("Labels: ", labels)
# get model path
nnPath = args.model
if not Path(nnPath).exists():
print("No blob found at {}. Looking into DepthAI model zoo.".format(nnPath))
nnPath = str(blobconverter.from_zoo(args.model, shaves = 6, zoo_type = "depthai", use_cache=True))
# sync outputs
syncNN = True
# Depth and Disparity
outRectified = False # Output and display rectified streams
lrcheck = True # Better handling for occlusions
extended = False # Closer-in minimum depth, disparity range is doubled
subpixel = True # Better accuracy for longer distance, fractional disparity 32-levels
depth = True # Display depth frames
swap_left_right = False #Swap left right frames
alpha = None
resolution = dai.MonoCameraProperties.SensorResolution.THE_400_P #THE_800_P
median = dai.StereoDepthProperties.MedianFilter.KERNEL_7x7 # 7x7
# Create pipeline
pipeline = dai.Pipeline()
# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
spatialdetectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork)
#xoutRgb = pipeline.create(dai.node.XLinkOut)
nnOut = pipeline.create(dai.node.XLinkOut)
# By Yishu
xoutISP = pipeline.create(dai.node.XLinkOut)
manip = pipeline.create(dai.node.ImageManip)
xoutManip = pipeline.create(dai.node.XLinkOut)
# Depth and Disparity
camLeft = pipeline.create(dai.node.MonoCamera)
camRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
xoutLeft = pipeline.create(dai.node.XLinkOut)
xoutRight = pipeline.create(dai.node.XLinkOut)
xoutDepth = pipeline.create(dai.node.XLinkOut)
xoutRectifLeft = pipeline.create(dai.node.XLinkOut)
xoutRectifRight = pipeline.create(dai.node.XLinkOut)
# Sync node
#xoutRgb.setStreamName("rgb")
xoutISP.setStreamName("ISP")
nnOut.setStreamName("nn")
xoutManip.setStreamName("Manip")
xoutLeft.setStreamName("left")
xoutRight.setStreamName("right")
xoutDepth.setStreamName("depth")
xoutRectifLeft.setStreamName("rectifiedLeft")
xoutRectifRight.setStreamName("rectifiedRight")
# Properties
camRgb.setPreviewSize(W, H)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) #THE_1080_P
camRgb.setInterleaved(False)
camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR) #BGR color channels rather than RGB, experimented works best less false positives. In accorfdance with model training.
camRgb.setFps(30) #40
camRgb.setIspNumFramesPool(10)
# By Yishu
manip.initialConfig.setKeepAspectRatio(False) #True
manip.initialConfig.setResize(W, H)
# Change to RGB image than BGR - Yishu
manip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p) #dai.ImgFrame.Type.RGB888p
# setMaxOutputFrameSize to avoid image bigger than max frame size error - Yishu
manip.setMaxOutputFrameSize(1228800)
# By Yishu
nnOut.input.setBlocking(False)
xoutISP.input.setBlocking(False)
xoutManip.input.setBlocking(False)
# By Yishu
nnOut.input.setQueueSize(10)
xoutISP.input.setQueueSize(10)
xoutManip.input.setQueueSize(10)
spatialdetectionNetwork.input.setQueueSize(10)
# Network specific settings
spatialdetectionNetwork.setConfidenceThreshold(confidenceThreshold)
spatialdetectionNetwork.setNumClasses(classes)
spatialdetectionNetwork.setCoordinateSize(coordinates)
spatialdetectionNetwork.setAnchors(anchors)
spatialdetectionNetwork.setAnchorMasks(anchorMasks)
spatialdetectionNetwork.setIouThreshold(iouThreshold)
spatialdetectionNetwork.setBlobPath(nnPath)
spatialdetectionNetwork.setNumInferenceThreads(2)
spatialdetectionNetwork.input.setBlocking(False)
# SpatialYolo specific parameters
spatialdetectionNetwork.setBoundingBoxScaleFactor(0.5)
spatialdetectionNetwork.setDepthLowerThreshold(100)
spatialdetectionNetwork.setDepthUpperThreshold(5000)
# Depth and Disparity Properties
if swap_left_right:
camLeft.setCamera("right")
camRight.setCamera("left")
else:
camLeft.setCamera("left")
camRight.setCamera("right")
for monoCam in (camLeft, camRight): # Common config
monoCam.setResolution(resolution)
monoCam.setFps(2)
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
stereo.initialConfig.setMedianFilter(median) # KERNEL_7x7 default
stereo.setRectifyEdgeFillColor(0) # Black, to better see the cutout
stereo.setLeftRightCheck(lrcheck)
stereo.setExtendedDisparity(extended)
stereo.setSubpixel(subpixel)
if alpha is not None:
stereo.setAlphaScaling(alpha)
config = stereo.initialConfig.get()
config.postProcessing.brightnessFilter.minBrightness = 0
stereo.initialConfig.set(config)
# Alignment with RGB - Ahmad
stereo.setDepthAlign(dai.CameraBoardSocket.RGB)
# # 4056x3040
stereo.setOutputSize(1248, 936)
# Linking
camRgb.video.link(manip.inputImage)
manip.out.link(spatialdetectionNetwork.input)
# By Yishu
manip.out.link(xoutManip.input)
# Linking stereo depth as an input to the spatial detection network
stereo.depth.link(spatialdetectionNetwork.inputDepth)
spatialdetectionNetwork.out.link(nnOut.input)
camRgb.isp.link(xoutISP.input)
# Depth and Disparity Linking
camLeft.out.link(stereo.left)
camRight.out.link(stereo.right)
stereo.syncedLeft.link(xoutLeft.input)
stereo.syncedRight.link(xoutRight.input)
if depth:
stereo.depth.link(xoutDepth.input)
if outRectified:
stereo.rectifiedLeft.link(xoutRectifLeft.input)
stereo.rectifiedRight.link(xoutRectifRight.input)
# nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
def frameNorm(frame, bbox):
normVals = np.full(len(bbox), frame.shape[0])
normVals[::2] = frame.shape[1]
print(np.clip(np.array(bbox), 0, 1))
print(normVals)
print((np.clip(np.array(bbox), 0, 1) * normVals).astype(int))
return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
def displayFrame(name, frame, detections, i):
color_spool = (255, 0, 0)
color_person = (0, 255, 0)
color = ''
#print("Detections: ", [[d.label, d.confidence *100, d.xmin, d.ymin, d.xmax, d.ymax] for d in detections])
#text_file = open(text_output_path, 'w')
for detection in detections:
bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
if labels[detection.label] == 'person':
color = color_person
else:
color = color_spool
cv2.putText(frame, labels[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255,255,255))
cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255,255,255))
cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
cv2.imwrite("E:\\Cameras_Live\\Test_writing_imgs\\Image{}.bmp".format(j), frame)
cv2.namedWindow(name, cv2.WINDOW_NORMAL)
cv2.imshow(name, frame)
print("show complete")
streams = ["left", "right", "ISP","nn"] #Change by ahmad
if outRectified:
streams.extend(["rectifiedLeft", "rectifiedRight"])
if depth:
streams.append("depth")
cvColorMap = cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_JET)
cvColorMap[0] = [0, 0, 0]
# TakeUp Camera Static IP address
device_info = dai.DeviceInfo()
try:
# Connect to device and start pipeline
with dai.Device(pipeline, device_info) as device:
# Output queues will be used to get the rgb frames and nn data from the outputs defined above
qDet = device.getOutputQueue("nn", 4, blocking=False)
qISP = device.getOutputQueue("ISP", 4, blocking=False)
qManip = device.getOutputQueue("Manip", 4, blocking=False)
qDepth = device.getOutputQueue("depth", 1, blocking=False)
qLeft = device.getOutputQueue("left", 1, blocking=False)
qRight = device.getOutputQueue("right", 1, blocking=False)
frame = None
m_frame = None
Depthframe = None
LeftFrame = None
RightFrame = None
detections = []
startTime = time.monotonic()
counter = 0
color2 = (255, 255, 255)
# nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
def frameNorm(frame, bbox):
normVals = np.full(len(bbox), frame.shape[0])
normVals[::2] = frame.shape[1]
print(np.clip(np.array(bbox), 0, 1))
print(normVals)
print((np.clip(np.array(bbox), 0, 1) * normVals).astype(int))
return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
def displayFrame(name, frame, detections, i):
color_spool = (255, 0, 0)
color_person = (0, 255, 0)
color = ''
color_blank_img = (255, 255, 255)
blank_image = np.zeros((3040,4056,1), np.uint8)
text_output_path = "D:\\Cameras_Live\\TakeUp\\" + "Label{}".format(j) + ".txt"
#print("Detections: ", [[d.label, d.confidence *100, d.xmin, d.ymin, d.xmax, d.ymax] for d in detections])
#text_file = open(text_output_path, 'w')
for detection in detections:
bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
if labels[detection.label] == 'person':
color = color_person
else:
color = color_spool
cv2.putText(frame, labels[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255,255,255))
cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255,255,255))
cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
cv2.imwrite("E:\\Cameras_Live\\Test_writing_imgs\\Image{}.bmp".format(j), frame)
cv2.namedWindow(name, cv2.WINDOW_NORMAL)
cv2.imshow(name, frame)
print("show complete")
j=1
###Create the folder that will contain capturing
folder_name = "D:\\Cameras_Live\\TakeUp"
path = Path(folder_name)
syncframe = None
sync_detections = None
while True:
###Create the folder that will contain capturing
# Delay to match DA reading fps
#time.sleep(4)
import time
start_time = time.time()
# Print and reset timing
end_time = time.time()
print("1 - Time taken: {:.6f} seconds".format(end_time - start_time))
start_time = end_time
# By Yishu
inISP = qISP.get()
end_time = time.time()
print("2: ", inISP, " - Time taken: {:.6f} seconds".format(end_time - start_time))
start_time = end_time
inManip = qManip.get()
end_time = time.time()
print("3: ", inManip, " - Time taken: {:.6f} seconds".format(end_time - start_time))
start_time = end_time
inDet = qDet.get()
end_time = time.time()
print("4: ", inDet, " - Time taken: {:.6f} seconds".format(end_time - start_time))
start_time = end_time
inLeft = qLeft.get()
end_time = time.time()
print("5 - Time taken: {:.6f} seconds".format(end_time - start_time))
start_time = end_time
inRight = qRight.get()
end_time = time.time()
print("6 - Time taken: {:.6f} seconds".format(end_time - start_time))
start_time = end_time
inDepth = qDepth.get()
end_time = time.time()
print("7 - Time taken: {:.6f} seconds".format(end_time - start_time))
start_time = end_time
displayFrame("Color", inISP.getCvFrame(), inDet.detections, j)
end_time = time.time()
print("8 - Time taken: {:.6f} seconds".format(end_time - start_time))
if inManip is not None:
m_frame = inManip.getCvFrame()
print("9")
if inLeft is not None:
LeftFrame = inLeft.getCvFrame()
#cv2.imwrite("D:\\Cameras_Live\\TakeUp\\Left{}.bmp".format(j),LeftFrame)
print("10")
if inRight is not None:
RightFrame = inRight.getCvFrame()
#cv2.imwrite("D:\\Cameras_Live\\TakeUp\\Right{}.bmp".format(j),RightFrame)
print("11")
if inDepth is not None:
Depthframe = inDepth.getCvFrame()
Depthframe = Depthframe.astype(np.uint16)
#cv2.imwrite("D:\\Cameras_Live\\TakeUp\\Depth{}.png".format(j),Depthframe)
print("12")
if cv2.waitKey(1) == ord('q'):
break
j=j+1
'''if j==5:
j=1'''
except Exception as e:
print(f"An error occurred: {e}")
sys.exit(0)