For completeness here the (slightly adapted) scripts:
converting model:
from pathlib import Path
import torch
from torch import nn
import blobconverter
class Model(nn.Module):
def forward(self, img1, img2):
# Calculate the mean of the two input tensors
mean1 = torch.mean(img1, dim=0)
mean2 = torch.mean(img2, dim=0)
# Calculate the absolute difference between the two mean tensors
diff = torch.sqrt( torch.pow( torch.sub( mean1, mean2 ), 2 ) ).float()
print(diff.shape)
threshold = 30
# Create a binary mask where differences are higher than the threshold
mask = torch.where(diff > threshold, torch.tensor(1.0), torch.tensor(0.0))
print(mask.shape)
# Count the number of moving pixels
movingPx = torch.sum(mask)
print(movingPx)
# Calculate the ratio of moving pixels to the total number of pixels
return torch.tensor(( movingPx , torch.multiply(*diff.shape) ))
# Define the expected input shape (dummy input)
shape = (3, 720, 720) # Define the shape of the input tensor (3 channels, 720x720 resolution)
X = torch.ones(shape, dtype=torch.float32) # Create a dummy input tensor filled with ones
# Create a directory to save the ONNX file if it doesn't exist
path = Path("out/")
path.mkdir(parents=True, exist_ok=True)
# Define the path for the ONNX file
onnx_file = "out/diff_05_threshold_30.onnx"
# Print the path where the ONNX file will be saved
print(f"Writing to {onnx_file}")
# Export the PyTorch model to ONNX format
torch.onnx.export(
Model(), # Instantiate the model
( X, X ), # Pass dummy inputs to the model (two frames)
onnx_file, # Specify the path to save the ONNX file
opset_version=12, # ONNX operator set version
do_constant_folding=True, # Perform constant folding optimization
input_names=['img1', 'img2'], # Optional: Names for input nodes in the ONNX graph
output_names=['movingRatio'], # Optional: Names for output nodes in the ONNX graph
)
# No need for onnx-simplifier here
# Use blobconverter to convert ONNX model to blob format
blobconverter.from_onnx(
model=onnx_file, # Path to the ONNX model file
data_type="FP16", # Data type for the blob (FP16)
shaves=6, # Number of VPUs (Vision Processing Units) to use for optimization
use_cache=False, # Disable caching of intermediate files
output_dir="../models", # Directory to save the generated blob file
optimizer_params=[] # Additional parameters for the blob optimizer (empty in this case)
)
Code for OAK:
import numpy as np
import cv2
import depthai as dai
import time
# Create DepthAI pipeline
p = dai.Pipeline()
# Set OpenVINO version
p.setOpenVINOVersion(dai.OpenVINO.VERSION_2021_4)
# Create a color camera node
camRgb = p.create(dai.node.ColorCamera)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
#camRgb.setFps(60)
camRgb.setVideoSize(720, 720) # Set video size
camRgb.setPreviewSize(720, 720) # Set preview size
camRgb.setInterleaved(False) # Disable interleaving
# NN
nn = p.create(dai.node.NeuralNetwork)
nn.setBlobPath("./models/diff_05_threshold_30_openvino_2022.1_6shave.blob")
# Create a script node
pass_data_script = p.create(dai.node.Script)
# Configure the script node to send frames and threshold, ratio to nn
pass_data_script.setScript("""
# Initialize the 'old' variable with the first frame
old = node.io['in'].get()
# Loop to continuously process frames
while True:
# Get the current frame
frame = node.io['in'].get()
# Send the previous frame ('old') to the output
node.io['img1'].send(old)
# Send the current frame to the output
node.io['img2'].send(frame)
# Update the 'old' variable with the current frame for the next iteration
old = frame
""")
# Link the script outputs to the neural network inputs 'threshold', 'ratio'
pass_data_script.outputs['img1'].link(nn.inputs['img1'])
pass_data_script.outputs['img2'].link(nn.inputs['img2'])
# Link the camera output to the script input
camRgb.preview.link(pass_data_script.inputs['in'])
# Send nn output to the host
nn_xout = p.create(dai.node.XLinkOut)
nn_xout.setStreamName("nn")
nn.out.link(nn_xout.input)
# Send image to host
rgb_xout = p.create(dai.node.XLinkOut)
rgb_xout.setStreamName("rgb")
camRgb.video.link(rgb_xout.input)
# Pipeline is defined, now we can connect to the device
with dai.Device(p) as device:
# Set debugging level
#device.setLogLevel(dai.LogLevel.DEBUG)
#device.setLogOutputLevel(dai.LogLevel.DEBUG)
# Get output queues for the neural network and the camera
qNn = device.getOutputQueue(name="nn", maxSize=1, blocking=False)
qCam = device.getOutputQueue(name="rgb", maxSize=2, blocking=False)
timestemp = time.monotonic()
cnt = 0
# Main processing loop
while cnt < 10:
cnt += 1
nnData = qNn.get()
#print(nnData.getAllLayerNames())
# 'movingRatio' is the output of the neural network
movingRatio = np.array(nnData.getLayerFp16('movingRatio'))
print(movingRatio)
'''
# Get cv frame from queue
frame = qCam.get().getCvFrame()
# Get and display the color frame from the camera
cv2.imshow("Color", frame)
# Break loop on 'q' key press
key = cv2.waitKey(1)
if key == ord('q') or key == 27:
break
cv2.destroyWindow("Color")
#cv2.destroyAllWindows()
'''
print('Done!')
I seem to misunderstand how the conversion of the "neural network" works..