If I subtract 2 StereoDepth frames from each other how to output in OpenCV
Thanks for reply.
I got images like this with the diff process above:
AdamPolak This is the "final" version to do a diff between 2 depth map images:
And I added time_diff code:
timestamp = dai.Clock.now();
with dai.Device(p) as device:
…
time_diff = depthDiff.getTimestamp() - timestamp
print('time_diff = '
, time_diff)
timestamp = depthDiff.getTimestamp()
Which the output is always 0.0
I'm confused now.
- Edited
AdamPolak
Thank you Adam.
The python code is as following:
import numpy as np
import cv2
import depthai as dai
resolution = (1280, 800) # 24 FPS (without visualization)
lrcheck = False # Better handling for occlusions
extended = False # Closer-in minimum depth, disparity range is doubled
subpixel = True # True # Better accuracy for longer distance, fractional disparity 32-levels
p = dai.Pipeline()
# Configure Mono Camera Properties
left = p.createMonoCamera()
left.setResolution(dai.MonoCameraProperties.SensorResolution.THE_800_P)
left.setBoardSocket(dai.CameraBoardSocket.LEFT)
right = p.createMonoCamera()
right.setResolution(dai.MonoCameraProperties.SensorResolution.THE_800_P)
right.setBoardSocket(dai.CameraBoardSocket.RIGHT)
stereo = p.createStereoDepth()
left.out.link(stereo.left)
right.out.link(stereo.right)
# Set stereo depth options
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
config = stereo.initialConfig.get()
config.postProcessing.speckleFilter.enable = False
# config.postProcessing.speckleFilter.speckleRange = 60
config.postProcessing.temporalFilter.enable = False
config.postProcessing.spatialFilter.enable = False
# config.postProcessing.spatialFilter.holeFillingRadius = 2
# config.postProcessing.spatialFilter.numIterations = 1
config.postProcessing.thresholdFilter.minRange = 1000 # mm
config.postProcessing.thresholdFilter.maxRange = 10000 # mm
config.censusTransform.enableMeanMode = True
# this 2 parameters should be fine-tuning
config.costMatching.linearEquationParameters.alpha = 0
config.costMatching.linearEquationParameters.beta = 2
stereo.initialConfig.set(config)
stereo.setLeftRightCheck(lrcheck)
stereo.setExtendedDisparity(extended)
stereo.setSubpixel(subpixel)
# stereo.setDepthAlign(dai.CameraBoardSocket.RGB)
stereo.setRectifyEdgeFillColor(0) # Black, to better see the cutout
# Depth -> Depth Diff
nn = p.createNeuralNetwork()
nn.setBlobPath("diff_images_simplified_openvino_2022.1_4shave.blob")
script = p.create(dai.node.Script)
stereo.disparity.link(script.inputs['in'])
timestamp = dai.Clock.now()
print("ts1 = ", timestamp)
script.setScript("""
old = node.io['in'].get()
while True:
frame = node.io['in'].get()
node.io['img1'].send(old)
node.io['img2'].send(frame)
old = frame
""")
script.outputs['img1'].link(nn.inputs['input2'])
script.outputs['img2'].link(nn.inputs['input1'])
# stereo.disparity.link(nn.inputs["input1"])
depthDiffOut = p.createXLinkOut()
depthDiffOut.setStreamName("depth_diff")
nn.out.link(depthDiffOut.input)
with dai.Device(p) as device:
qDepthDiff = device.getOutputQueue(name="depth_diff", maxSize=4, blocking=False)
while True:
depthDiff = qDepthDiff.get()
print("ts0 = ", timestamp)
time_diff = depthDiff.getTimestamp() - timestamp
print('time_diff = ', time_diff)
timestamp = depthDiff.getTimestamp()
print("ts 2 = ", timestamp)
# Shape it here
floatVector = depthDiff.getFirstLayerFp16()
diff = np.array(floatVector).reshape(resolution[0], resolution[1])
colorize = cv2.normalize(diff, None, 255, 0, cv2.NORM_INF, cv2.CV_8UC1)
cv2.applyColorMap(colorize, cv2.COLORMAP_JET)
cv2.imshow("Diff", colorize)
if cv2.waitKey(1) == ord('q'):
break
AdamPolak
And the model code is:
from pathlib import Path
import torch
from torch import nn
import blobconverter
import onnx
from onnxsim import simplify
import sys
class DiffImgs(nn.Module):
def forward(self, img1, img2):
img1DepthFP16 = 256.0 \* img1[:,:,:,1::2] + img1[:,:,:,::2]
img2DepthFP16 = 256.0 \* img2[:,:,:,1::2] + img2[:,:,:,::2]
# Create binary masks for each image
# A pixel in the mask is 1 if the corresponding pixel in the image is 0, otherwise it's 0
img1Mask = (img1DepthFP16 == 0)
img2Mask = (img2DepthFP16 == 0)
# If a pixel is 0 in either image, set the corresponding pixel in both images to 0
img1DepthFP16 = img1DepthFP16 \* (\~img1Mask & \~img2Mask)
img2DepthFP16 = img2DepthFP16 \* (\~img1Mask & \~img2Mask)
# Compute the difference between the two images
diff = torch.sub(img1DepthFP16, img2DepthFP16)
return diff
\# Instantiate the model
model = DiffImgs()
\# Create dummy input for the ONNX export
input1 = torch.randn(1, 1, 800, 1280 \* 2, dtype=torch.float16)
input2 = torch.randn(1, 1, 800, 1280 \* 2, dtype=torch.float16)
onnx_file = **"diff_images.onnx"**
\# Export the model
torch.onnx.export(model, # model being run
(input1, input2), # model input (or a tuple for multiple inputs)
onnx_file, # where to save the model (can be a file or file-like object)
opset_version=12, # the ONNX version to export the model to
do_constant_folding=True, # whether to execute constant folding for optimization
input_names = [**'input1'**, **'input2'**], # the model's input names
output_names = [**'output'**])
\# Simplify the model
onnx_model = onnx.load(onnx_file)
onnx_simplified, check = simplify(onnx_file)
onnx.save(onnx_simplified, **"diff_images_simplified.onnx"**)
\# Use blobconverter to convert onnx->IR->blob
blobconverter.from_onnx(
model=**"diff_images_simplified.onnx"**,
data_type=**"FP16"**,
shaves=4,
use_cache=False,
output_dir=**"../"**,
optimizer_params=[],
compile_params=[**'-ip U8'**],
)
It I am not sure how the information is being represented but it looks like HH:MM:SS
Because the depth frame is usually coming in at ~9/second that would be around a .1 second difference. So python is rounding it down to 0 seconds.
With things like this, you usually want to represent time as time since a point in time in milliseconds. You want to capture or display the time in a different format.
Try running: getTimestamp().time_since_epoch (this should display time in milliseconds since it started)
Normalization on a depth frame using diff is a bit weird, it doesn't scale very well. Normalizing with different values might do it to highlight the differences better. Changing the frame so that anything that is not 0 is 1, and then displaying that will make all pixels that are the same black, and all pixels that are different white.
It will be a lot more noisy but it will highlight the difference in a louder way.
- depth values go into diff model at: (0..65535) depth data in millimeters
- they are calculated from 96 disparity steps
- so you can only have 96 different depth readings
- the readings at long distances "jump" a lot (it is not linear)
- so at from step 1 to 2 it might be 10 millimeters, but from step 90 to 91 it might be 10,000 millimeters
- so at far away distances (around 7 meters) the depth reading may have read it at step 90 and then even though nothing has changed it then reads it at step 91
- this means even though nothing changed, there might be a diff reading of 1000 millimeters
- when you wave your hand the distance of the camera to whatever was behind your hand and the distance to your hand (diff) might be small maybe 1,000 millimeters
- So the depth_diff has a range of values of 0 (because either it didn't have a confident reading or it is the exact same reading), or 1,000 (because of your hand movement), or 10,000 (because a far wall changed a step reading even though nothing changed)
- So when you run
colorize = cv2.normalize(diff, None, 255, 0, cv2.NORM_INF, cv2.CV
8UC1)
you are asking opencv to change the values that you have in the depth_diff (normalize) so that the smallest value you have in the dept_ diff (0) is 0, and the largest value you have in the depth_diff (10,000) is 255. - From the depth_diff you sent I am guessing that in the top right of the image it is far away walls, and the bottom left of the image is also far away
- The largest values are "white" (the far away things)
- So your small depth_diff values (like your hand moving) get squashed to be near black (very dark grey), and have a value of 25, only 10% of the largest value (which will be white)
- if you set up your depth camera so that there was a large poster only a few meters away from it, then waved your hand infront of THAT, the values of the depth_diff of your hand moving would be larger relative to all readings so it would be more clear
- try changing to
colorize = cv2.normalize(diff, None, 1, 0, cv2.NORM_INF, cv2.CV_8UC1)
and see what happens - it should make it so pixels are only white or black, all 0 values (no difference or unconfident) will be black, and ANY depth diff values that are not 0 will be white