jakaskerl
Yes I did use the image aligned node within my script. I used the tof-rgb alignment script that you provided, I added a key function that will allow me to capture and save the tof-rgb depth as a ply file. There are other ways that I can approach this however, I simply wanted to see how the rgb would look.
import numpy as np
import cv2
import depthai as dai
import time
from datetime import timedelta
# This example is intended to run unchanged on an OAK-D-SR-PoE camera
FPS = 30.0
RGB_SOCKET = dai.CameraBoardSocket.CAM_C
TOF_SOCKET = dai.CameraBoardSocket.CAM_A
ALIGN_SOCKET = RGB_SOCKET
# Define intrinsic parameters
RGB_INTRINSICS = np.array([
[494.3519287109375, 0.0, 321.8478088378906],
[0.0, 499.4835205078125, 258.3044128417969],
[0.0, 0.0, 1.0]
])
TOF_INTRINSICS = np.array([
[842.6837768554688, 0.0, 673.1340942382812],
[0.0, 851.867431640625, 412.4818115234375],
[0.0, 0.0, 1.0]
])
# Define camera resolutions
RGB_WIDTH, RGB_HEIGHT = 640, 480
TOF_WIDTH, TOF_HEIGHT = 1280, 800
class FPSCounter:
def __init__(self):
self.frameTimes = []
def tick(self):
# record the current time for the FPS calculation
now = time.time()
self.frameTimes.append(now)
self.frameTimes = self.frameTimes[-100:]
def getFps(self):
if len(self.frameTimes) <= 1:
return 0
# Calculate the FPS based on the recorded frame times
return (len(self.frameTimes) - 1) / (self.frameTimes[-1] - self.frameTimes[0])
def save_ply(rgb_image, depth_image, filename):
# Get dimensions of the RGB image
height, width, _ = rgb_image.shape
# Save as PLY file
with open(filename, 'w') as ply_file:
#write ply header
ply_file.write('ply\\n')
ply_file.write('format ascii 1.0\\n')
ply_file.write(f'element vertex {height \* width}\\n')
ply_file.write('property float x\\n')
ply_file.write('property float y\\n')
ply_file.write('property float z\\n')
ply_file.write('property uchar red\\n')
ply_file.write('property uchar green\\n')
ply_file.write('property uchar blue\\n')
ply_file.write('end_header\\n')
# Convert depth image to point cloud using ToF intrinsics
fx = TOF_INTRINSICS[0, 0]
fy = TOF_INTRINSICS[1, 1]
cx = TOF_INTRINSICS[0, 2]
cy = TOF_INTRINSICS[1, 2]
for v in range(height):
for u in range(width):
z = depth_image[v, u] # Depth value
if z > 0: # Ignore zero
# calculate 3d coordinates
x = (u - cx) \* z / fx
y = (v - cy) \* z / fy
r, g, b = rgb_image[v, u] # RGB color
#write the vertex data to PLY file
ply_file.write(f'{x} {y} {z} {r} {g} {b}\\n')
pipeline = dai.Pipeline()
# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
tof = pipeline.create(dai.node.ToF)
camTof = pipeline.create(dai.node.Camera)
sync = pipeline.create(dai.node.Sync)
align = pipeline.create(dai.node.ImageAlign)
out = pipeline.create(dai.node.XLinkOut)
# ToF settings
camTof.setFps(FPS)
camTof.setImageOrientation(dai.CameraImageOrientation.ROTATE_180_DEG)
camTof.setBoardSocket(TOF_SOCKET)
# rgb settings
camRgb.setBoardSocket(RGB_SOCKET)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_800_P)
camRgb.setFps(FPS)
camRgb.setIspScale(1, 1)
# defines image sizes
depthSize = (TOF_WIDTH, TOF_HEIGHT)
rgbSize = camRgb.getIspSize()
# set output stream name
out.setStreamName("out")
# configure synchronization threshold
sync.setSyncThreshold(timedelta(seconds=0.5 / FPS))
# Linking
camRgb.isp.link(sync.inputs["rgb"]) #link RGB camera output to sync node
camTof.raw.link(tof.input) #link TOF camera raw output to TOF node
tof.depth.link(align.input) #link TOF depth output to align node
align.outputAligned.link(sync.inputs["depth_aligned"]) #link aligned depth to sync node
sync.inputs["rgb"].setBlocking(False) #set rgb input as non-blocking
camRgb.isp.link(align.inputAlignTo) #link sync output to XLinkOut node
sync.out.link(out.input)
def colorizeDepth(frameDepth):
invalidMask = frameDepth == 0
try:
minDepth = np.percentile(frameDepth[frameDepth != 0], 3)
maxDepth = np.percentile(frameDepth[frameDepth != 0], 95)
logDepth = np.log(frameDepth, where=frameDepth != 0)
logMinDepth = np.log(minDepth)
logMaxDepth = np.log(maxDepth)
np.nan_to_num(logDepth, copy=False, nan=logMinDepth)
logDepth = np.clip(logDepth, logMinDepth, logMaxDepth)
depthFrameColor = np.interp(logDepth, (logMinDepth, logMaxDepth), (0, 255))
depthFrameColor = np.nan_to_num(depthFrameColor)
depthFrameColor = depthFrameColor.astype(np.uint8)
depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_JET)
depthFrameColor[invalidMask] = 0
except IndexError:
depthFrameColor = np.zeros((frameDepth.shape[0], frameDepth.shape[1], 3), dtype=np.uint8)
except Exception as e:
raise e
return depthFrameColor
rgbWeight = 0.4
depthWeight = 0.6
def updateBlendWeights(percentRgb):
global depthWeight
global rgbWeight
rgbWeight = float(percentRgb) / 100.0
depthWeight = 1.0 - rgbWeight
# Connect to device and start pipeline
remapping = True
save_ply_flag = False
with dai.Device(pipeline) as device:
queue = device.getOutputQueue("out", 8, False)
rgbDepthWindowName = "rgb-depth"
cv2.namedWindow(rgbDepthWindowName)
cv2.createTrackbar(
"RGB Weight %",
rgbDepthWindowName,
int(rgbWeight \* 100),
100,
updateBlendWeights,
)
try:
calibData = device.readCalibration2()
M1 = np.array(calibData.getCameraIntrinsics(ALIGN_SOCKET, \*depthSize))
D1 = np.array(calibData.getDistortionCoefficients(ALIGN_SOCKET))
M2 = np.array(calibData.getCameraIntrinsics(RGB_SOCKET, \*rgbSize))
D2 = np.array(calibData.getDistortionCoefficients(RGB_SOCKET))
# Use the predefined intrinsics if available, otherwise use the calibration data
if TOF_INTRINSICS is not None:
M1 = TOF_INTRINSICS
if RGB_INTRINSICS is not None:
M2 = RGB_INTRINSICS
try:
T = np.array(calibData.getCameraTranslationVector(ALIGN_SOCKET, RGB_SOCKET, False)) \* 10
except RuntimeError:
T = np.array([0.0, 0.0, 0.001])
try:
R = np.array(calibData.getCameraExtrinsics(ALIGN_SOCKET, RGB_SOCKET, False))[0:3, 0:3]
except RuntimeError:
R = np.eye(3)
TARGET_MATRIX = M1
lensPosition = calibData.getLensPosition(RGB_SOCKET)
except:
raise
fpsCounter = FPSCounter()
while True:
messageGroup = queue.get()
fpsCounter.tick()
assert isinstance(messageGroup, dai.MessageGroup)
frameRgb = messageGroup["rgb"]
assert isinstance(frameRgb, dai.ImgFrame)
frameDepth = messageGroup["depth_aligned"]
assert isinstance(frameDepth, dai.ImgFrame)
sizeRgb = frameRgb.getData().size
sizeDepth = frameDepth.getData().size
if frameDepth is not None:
rgbFrame = frameRgb.getCvFrame()
alignedDepthColorized = colorizeDepth(frameDepth.getFrame())
cv2.putText(
alignedDepthColorized,
f"FPS: {fpsCounter.getFps():.2f}",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 255, 255),
2,
)
cv2.imshow("depth", alignedDepthColorized)
key = cv2.waitKey(1)
if key == ord("m"):
remapping = not remapping
print(f"Remap turned {'ON' if remapping else 'OFF'}.")
elif key == ord('s'):
save_ply_flag = True
if remapping:
mapX, mapY = cv2.initUndistortRectifyMap(
M2, D2, None, M2, rgbSize, cv2.CV_32FC1
)
rgbFrame = cv2.remap(rgbFrame, mapX, mapY, cv2.INTER_LINEAR)
blended = cv2.addWeighted(
rgbFrame, rgbWeight, alignedDepthColorized, depthWeight, 0
)
cv2.imshow(rgbDepthWindowName, blended)
if save_ply_flag:
rgb_frame = cv2.cvtColor(rgbFrame, cv2.COLOR_BGR2RGB)
save_ply(rgb_frame, frameDepth.getFrame(), 'george_rgb1.ply')
print("PLY file saved as 'george_rgb.ply'")
save_ply_flag = False
if key == ord("q"):
break
cv2.destroyAllWindows()
I ran this script :https://docs.luxonis.com/software/depthai/examples/calibration_reader/
Here are the results from it, however if its extrinsic. Then I may have to recalibrate the camera with the method you recommended to me :
RGB Camera Default intrinsics...
[[494.3519287109375, 0.0, 321.8478088378906], [0.0, 499.4835205078125, 258.3044128417969], [0.0, 0.0, 1.0]]
640
480
RGB Camera Default intrinsics...
[[494.3519287109375, 0.0, 321.8478088378906], [0.0, 499.4835205078125, 258.3044128417969], [0.0, 0.0, 1.0]]
640
480
RGB Camera resized intrinsics... 3840 x 2160
[[2.96611157e+03 0.00000000e+00 1.93108691e+03]
[0.00000000e+00 2.99690112e+03 1.18982642e+03]
[0.00000000e+00 0.00000000e+00 1.00000000e+00]]
RGB Camera resized intrinsics... 4056 x 3040
[[3.13295532e+03 0.00000000e+00 2.03971057e+03]
[0.00000000e+00 3.16547681e+03 1.63600427e+03]
[0.00000000e+00 0.00000000e+00 1.00000000e+00]]
LEFT Camera Default intrinsics...
[[842.6837768554688, 0.0, 673.1340942382812], [0.0, 851.867431640625, 412.4818115234375], [0.0, 0.0, 1.0]]
1280
800
LEFT Camera resized intrinsics... 1280 x 720
[[842.68377686 0. 673.13409424]
[ 0. 851.86743164 372.48181152]
[ 0. 0. 1. ]]
RIGHT Camera resized intrinsics... 1280 x 720
[[836.24615479 0. 656.42828369]
[ 0. 845.62658691 399.05911255]
[ 0. 0. 1. ]]
LEFT Distortion Coefficients...
k1: -9.116254806518555
k2: 262.5550842285156
p1: 0.007134947460144758
p2: -0.0009857615223154426
k3: 1347.274169921875
k4: -9.195904731750488
k5: 260.98687744140625
k6: 1308.9786376953125
s1: 0.0
s2: 0.0
s3: 0.0
s4: 0.0
Ï„x: 0.0
Ï„y: 0.0
RIGHT Distortion Coefficients...
k1: -5.861973762512207
k2: 5.3061065673828125
p1: 0.005871884059160948
p2: 0.00142634566873312
k3: 88.46317291259766
k4: -6.072614669799805
k5: 7.037742614746094
k6: 83.25321960449219
s1: 0.0
s2: 0.0
s3: 0.0
s4: 0.0
Ï„x: 0.0
Ï„y: 0.0
RGB FOV 71.86000061035156, Mono FOV 71.86000061035156
LEFT Camera stereo rectification matrix...
[[ 9.94211665e-01 8.86633717e-03 -1.81476751e+01]
[-4.90145546e-03 9.94452611e-01 2.86947005e+01]
[ 2.85166444e-06 4.52051103e-06 9.96386328e-01]]
RIGHT Camera stereo rectification matrix...
[[ 1.00186534e+00 8.93177200e-03 -6.82440986e+00]
[-4.93918803e-03 1.00179181e+00 -7.21055399e-01]
[ 2.87361723e-06 4.55387305e-06 9.96286100e-01]]
Transformation matrix of where left Camera is W.R.T right Camera's optical center
[[ 9.99597728e-01 -7.52320397e-04 -2.83513945e-02 -3.98795390e+00]
[ 5.31902653e-04 9.99969602e-01 -7.78123224e-03 -2.61692833e-02]
[ 2.83563845e-02 7.76302209e-03 9.99567747e-01 -1.03633568e-01]
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.00000000e+00]]
Transformation matrix of where left Camera is W.R.T RGB Camera's optical center
[[ 9.99843597e-01 -8.23507272e-03 1.56521089e-02 -7.51402760e+00]
[ 8.25367495e-03 9.99965310e-01 -1.12427305e-03 -1.49354547e-01]
[-1.56423096e-02 1.25328498e-03 9.99876857e-01 4.59043831e-01]
[ 0.00000000e+00 0.00000000e+00 0.00000000e+00 1.00000000e+00]]