- Edited
Hi erik , here is my MRE.
https://filetransfer.io/data-package/oCBNoI32#link
I've opened an Issue on Github: https://github.com/luxonis/depthai/issues/787
Thank you,
Pierre
Hi erik , here is my MRE.
https://filetransfer.io/data-package/oCBNoI32#link
I've opened an Issue on Github: https://github.com/luxonis/depthai/issues/787
Thank you,
Pierre
Hi pierreia ,
The code looks good to me (normalization of bounding boxes), I think the problem is with the model and/or deocding (so anchors/masks). I would also check for incorrect color order and/or incorrect channel layout (both mentioned here).
Thanks, Erik
We tried with the latest MobileNet "from_zoo", and a YOLO model, we got the same results. So I'm not sure it comes from the model itself.
Regarding the channel layout, since we put the frame though an OpenCv manipulation before passing it to the model, the frame should be in BGR channel layout, which is what we set in the ImgFrame
.
Update: it seems to be working with Yolo tiny, despite it is very slow
Hi erik , you can download the code and the mobilenet here: https://filetransfer.io/data-package/oCBNoI32#link
Hi pierreia .
The only problem in the code above is that it's not synced. You could either sync frames+detections with host-side syncing, or just use passthrough frame like I did below:
#!/usr/bin/env python3
from pathlib import Path
import sys
import cv2
import depthai as dai
import numpy as np
from time import monotonic
import blobconverter
# Get argument first
nnPath = 'mobilenet-ssd_openvino_2021.4_8shave.blob'
videoPath = 'traffic_5mn.mp4'
if len(sys.argv) > 2:
nnPath = sys.argv[1]
videoPath = sys.argv[2]
if not Path(nnPath).exists() or not Path(videoPath).exists():
import sys
raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')
# MobilenetSSD label texts
labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
"diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
# Create pipeline
pipeline = dai.Pipeline()
# Define sources and outputs
nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
xinFrame = pipeline.create(dai.node.XLinkIn)
xinFrame.setStreamName("inFrame")
xinFrame.out.link(nn.input)
# Properties
nn.setConfidenceThreshold(0.5)
nn.setBlobPath(nnPath)
nn.setNumInferenceThreads(2)
nn.input.setBlocking(True)
# Linking
nnOut = pipeline.create(dai.node.XLinkOut)
nnOut.setStreamName("nn")
nn.out.link(nnOut.input)
nnPass = pipeline.create(dai.node.XLinkOut)
nnPass.setStreamName("pass")
nn.passthrough.link(nnPass.input)
# Connect to device and start pipeline
with dai.Device(pipeline) as device:
# Input queue will be used to send video frames to the device.
qIn = device.getInputQueue(name="inFrame")
# Output queue will be used to get nn data from the video frames.
qDet = device.getOutputQueue(name="nn", maxSize=6, blocking=True)
qPass = device.getOutputQueue("pass")
frame = None
detections = []
# nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
def frameNorm(frame, bbox):
normVals = np.full(len(bbox), frame.shape[0])
normVals[::2] = frame.shape[1]
return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
def to_planar(arr: np.ndarray, shape: tuple) -> np.ndarray:
return cv2.resize(arr, shape).transpose(2, 0, 1).flatten()
def displayFrame(name, frame):
for detection in detections:
bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
cv2.putText(frame, labelMap[detection.label], (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.putText(frame, f"{int(detection.confidence * 100)}%", (bbox[0] + 10, bbox[1] + 40), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 2)
# Show the frame
cv2.imshow(name, frame)
cap = cv2.VideoCapture(videoPath)
while cap.isOpened():
read_correctly, frame = cap.read()
if not read_correctly:
break
img = dai.ImgFrame()
resized = to_planar(frame, (300, 300))
img.setTimestamp(monotonic())
img.setType(dai.RawImgFrame.Type.BGR888p)
img.setSize(300, 300)
img.setData(resized)
qIn.send(img)
inDet = qDet.tryGet()
if inDet is not None:
detections = inDet.detections
frame = qPass.get().getCvFrame()
displayFrame("passthrough", frame)
if cv2.waitKey(1) == ord('q'):
break
erik
i'm trying live inferencing and video inferencing on the yolov8 nano
pipeline
def init_pipeline():
pipeline = depthai.Pipeline()
cam_rgb = pipeline.createColorCamera()
detection_nn = pipeline.createYoloDetectionNetwork()
cam_rgb.setResolution(
depthai.ColorCameraProperties.SensorResolution.THE_4_K)
cam_rgb.setPreviewSize(640, 640)
cam_rgb.setInterleaved(True)
xout_rgb = pipeline.createXLinkOut()
xout_rgb.setStreamName("rgb")
cam_rgb.preview.link(xout_rgb.input)
cam_rgb.setPreviewKeepAspectRatio(False)
manip1 = pipeline.createImageManip()
manip1.setMaxOutputFrameSize(1244160)
manip1.initialConfig.setResize(sizeX, sizeY)
cam_rgb.preview.link(manip1.inputImage)
manip1.initialConfig.setFrameType(depthai.ImgFrame.Type.BGR888p)
manip1.inputImage.setBlocking(True)
if args.videoPath is not None:
xinFrame = pipeline.create(depthai.node.XLinkIn)
xinFrame.setStreamName("inFrame")
xinFrame.out.link(manip1.inputImage)
xinFrame.setMaxDataSize(1920\*1080\*3)
nnPass = pipeline.create(depthai.node.XLinkOut)
nnPass.setStreamName("pass")
detection_nn.passthrough.link(xout_rgb.input)
else:
xinFrame = None
# Extract the values from the JSON
num_classes = config['nn_config']['NN_specific_metadata']['classes']
coordinates = config['nn_config']['NN_specific_metadata']['coordinates']
anchors = config['nn_config']['NN_specific_metadata']['anchors']
anchor_masks = config['nn_config']['NN_specific_metadata']['anchor_masks']
iou_threshold = config['nn_config']['NN_specific_metadata']['iou_threshold']
# Set the values
detection_nn.setNumClasses(num_classes)
detection_nn.setCoordinateSize(coordinates)
detection_nn.setAnchors(anchors)
detection_nn.setAnchorMasks(anchor_masks)
detection_nn.setIouThreshold(iou_threshold)
detection_nn.setConfidenceThreshold(0.5)
# detection_nn.setNumInferenceThreads(2)
detection_nn.input.setBlocking(True)
# Blob is the Neural Network file, compiled for MyriadX. It contains both the definition and weights of the model
# We're using a blobconverter tool to retreive the MobileNetSSD blob automatically from OpenVINO Model Zoo
# detection_nn.setBlobPath(blobconverter.from_zoo(name='mobilenet-ssd', shaves=6))
# Next, we filter out the detections that are below a confidence threshold. Confidence can be anywhere between <0..1>
# Next, we link the camera 'preview' output to the neural network detection input, so that it can produce detections
manip1.out.link(detection_nn.input)
if customModel is True:
nnPath = str(
(parentDir / Path('../../data/' + model)).resolve().absolute())
# print(nnPath)
detection_nn.setBlobPath(nnPath)
print("Custom Model" + nnPath + "Size: " +
str(sizeX) + "x" + str(sizeY))
else:
detection_nn.setBlobPath(blobconverter.from_zoo(
name='person-detection-0106', shaves=6))
print("Model from OpenVINO Zoo" + "Size: " +
str(sizeX) + "x" + str(sizeY))
xout_nn = pipeline.createXLinkOut()
xout_nn.setStreamName("nn")
detection_nn.out.link(xout_nn.input)
return pipeline
def detect_and_count():
global outputFrame, lock, zones_current_count, listeners, loop
pipeline = init_pipeline()
inputFrameShape = (sizeX, sizeY)
with depthai.Device(pipeline) as device:
q_rgb = device.getOutputQueue("rgb")
q_nn = device.getOutputQueue("nn")
qPass = device.getOutputQueue("pass")
# q_manip = device.getInputQueue("")
baseTs = time.monotonic()
simulatedFps = 30
frame = None
detections = []
timestamp = datetime.utcnow()
zone_data = []
def to_planar(arr: np.ndarray, shape: tuple) -> np.ndarray:
return cv2.resize(arr, shape).transpose(2, 0, 1).flatten()
if args.videoPath is not None:
videoPath = str(
(parentDir / Path('../../data/' + video_source)).resolve().absolute())
cap = cv2.VideoCapture(videoPath, cv2.CAP_FFMPEG)
# loop over frames from the video stream
while True:
if args.videoPath is not None:
read_correctly, frame = cap.read()
if not read_correctly:
break
if args.videoPath is not None:
q_vid = device.getInputQueue(name="inFrame")
img = depthai.ImgFrame()
img.setType(depthai.RawImgFrame.Type.BGR888p)
img.setData(to_planar(frame, inputFrameShape))
img.setTimestamp(baseTs)
baseTs += 1/simulatedFps
img.setWidth(inputFrameShape[0])
img.setHeight(inputFrameShape[1])
q_vid.send(img)
# in_vid = q_vid.tryGet()
print("hello", timestamp)
if args.videoPath is not None:
print("video")
frame = qPass.get().getCvFrame()
in_rgb = q_rgb.tryGet()
in_nn = q_nn.tryGet()
if in_rgb is not None and args.videoPath is None:
print("live")
frame = in_rgb.getCvFrame()
if in_nn is not None:
print("detect")
detections = in_nn.detections
zone_data += check_overlap(frame, detections)
print("done",timestamp)
now = datetime.utcnow()
if now.second != timestamp.second:
t = threading.Thread(
target=insert_data, args=(zone_data, ))
t.daemon = True
t.start()
zone_data = []
timestamp = now
with lock:
outputFrame = frame.copy()
print("finish")
if args.videoPath is not None:
ret, frame = cap.read()
if not ret:
print("video over", timestamp)
cap.release()
break
# at any time, you can press "q" and exit the main loop, therefore exiting the program itself
if cv2.waitKey(1) == ord('q'):
break
parser = argparse.ArgumentParser()
parser.add_argument('-v', '--videoPath',
help="Path to video frame", default=None)
args = parser.parse_args()
video_source = args.videoPath
what's happening is both the live and video inferening is happening at a time and it stops after 30 seconds
any take on what i'm doing wrong?
krishnashravan Please provide minimal repro example.
erik
Hi erik i have attached the files here
the test 4 file gives a error where the video keeps on changing the size of the preview
the test 5 file gives a error where the video shifts to live inferencing in between the frames
the common error that i found was that both these stop working after 30 seconds
only live inferencing works fine but the video inferencing stops after 30 seconds
let me know if u need anything else
This isn't reproducible.
python .\test4.py
Traceback (most recent call last):
File "D:\Downloads\yolov8-testing-pt-files-New%20folder\test4.py", line 175, in <module>
parser = argparse.ArgumentParser()
NameError: name 'argparse' is not defined
Again, this is not reproducible. Did you even try running the test4.py/test5.py?
sorry erik
but i fixed it
@Unknown perhaps look at
https://docs-old.luxonis.com/projects/sdk/en/latest/features/replaying/
SDK also has good support for YOLO models.