SaiyamShah

SaiyamShah · 10 Feb

Code to collect Pointclouds:
import cv2

import numpy as np

import open3d as o3d

from pathlib import Path

# Camera intrinsics (replace with your OAK-D Lite parameters)fx = 450.9061279296875fy = 450.810546875cx = 322.48406982421875cy = 248.485595703125
# Baseline (distance between stereo cameras, in meters)baseline = 0.075 # Example value, adjust based on your camera
# Paths to RGB and disparity imagesrgb_folder = Path('data/rgb')disparity_folder = Path('data/disparity')output_folder = Path('data/pointclouds')output_folder.mkdir(parents=True, exist_ok=True)
# Get sorted image listsrgb_images = sorted(rgb_folder.glob('*.png'), key=lambda x: int(x.stem))disparity_images = sorted(disparity_folder.glob('*.png'), key=lambda x: int(x.stem))
# Function to convert disparity to depthdef disparity_to_depth(disparity_map): # Disparity values are scaled to 0-255; rescale them disparity_map = disparity_map.astype(np.float32) disparity_map[disparity_map == 0] = 0.1 # Avoid division by zero depth_map = (fx * baseline) / disparity_map return depth_map
# Process each image pairfor rgb_path, disp_path in zip(rgb_images, disparity_images): # Load images rgb_image = cv2.cvtColor(cv2.imread(str(rgb_path)), cv2.COLOR_BGR2RGB) disparity_image = cv2.imread(str(disp_path), cv2.IMREAD_GRAYSCALE)

Resize disparity to match RGB size disparity_image = cv2.resize(disparity_image, (rgb_image.shape[1], rgb_image.shape[0]))

Convert disparity to depth depth_map = disparity_to_depth(disparity_image)

Create point cloud h, w = depth_map.shape i, j = np.meshgrid(np.arange(w), np.arange(h)) z = depth_map x = (i - cx) * z / fx # Removed negation to correct mirror inversion y = -(j - cy) * z / fy # Retained negation to keep proper vertical orientation

Stack into Nx3 array points = np.stack((x, y, z), axis=-1).reshape(-1, 3)

Get colors colors = rgb_image.reshape(-1, 3) / 255.0 # Normalize to [0, 1]

Remove points with invalid depth mask = (z > 0).reshape(-1) points = points[mask] colors = colors[mask]

Create Open3D point cloud pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(points) pcd.colors = o3d.utility.Vector3dVector(colors)

Save point cloud output_path = output_folder / f"{rgb_path.stem}.pcd" o3d.io.write_point_cloud(str(output_path), pcd)

print(f"Saved point cloud: {output_path}")

code that combine pontclouds:
import open3d as o3d

from pathlib import Path

import numpy as np

import json

import cv2

import time

# Load rvec and tvec datapose_file = Path('data/pose_data.json')with open(pose_file) as f: pose_data = json.load(f)
# Path to RGB and disparity imagesrgb_folder = Path('data/rgb')disparity_folder = Path('data/disparity')
# Camera parameters (replace with your actual values)baseline = 0.075 # in meters (example value)focal_length = 450.0 # in pixels (example value)
# Function to convert disparity to depthdef disparity_to_depth(disparity, baseline, focal_length): disparity = np.asarray(disparity, dtype=np.float32) disparity[disparity == 0] = 0.1 # Avoid division by zero depth = (baseline * focal_length) / disparity return o3d.geometry.Image(depth.astype(np.uint16))
# Resize depth image to match RGB sizedef resize_depth_image(depth, target_size=(300, 300)): depth_resized = cv2.resize(np.asarray(depth), target_size, interpolation=cv2.INTER_NEAREST) return o3d.geometry.Image(depth_resized.astype(np.uint16))
# Load point clouds from RGB and disparity imagesdef load_point_clouds(voxel_size=0.05): pcds = [] for i in range(len(pose_data)): color_path = rgb_folder / f"{i + 1}.png" disparity_path = disparity_folder / f"{i + 1}.png"
color = o3d.io.read_image(str(color_path)) disparity = cv2.imread(str(disparity_path), cv2.IMREAD_UNCHANGED)
if disparity is None or color is None or str(i + 1) not in pose_data: print(f"Skipping frame {i} due to missing data or pose information.") continue
depth = disparity_to_depth(disparity, baseline, focal_length) depth_resized = resize_depth_image(depth)
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth( color, depth_resized, depth_trunc=4.0, convert_rgb_to_intensity=False )
intrinsic = o3d.camera.PinholeCameraIntrinsic( 300, 300, 450.0, 450.0, 150.0, 150.0 )
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, intrinsic) pcd_down = pcd.voxel_down_sample(voxel_size=voxel_size) pcd_down.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.05, max_nn=30)) pcds.append(pcd_down) return pcds
# Feature extractiondef compute_fpfh(pcd, voxel_size): radius_normal = voxel_size * 2 pcd.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius_normal, max_nn=30))
radius_feature = voxel_size * 5 fpfh = o3d.pipelines.registration.compute_fpfh_feature( pcd, o3d.geometry.KDTreeSearchParamHybrid(radius=radius_feature, max_nn=100) ) return fpfh
# Fast global registrationdef execute_fast_global_registration(source_down, target_down, source_fpfh, target_fpfh, voxel_size): distance_threshold = voxel_size * 0.5 print(f":: Apply fast global registration with distance threshold {distance_threshold:.3f}") result = o3d.pipelines.registration.registration_fgr_based_on_feature_matching( source_down, target_down, source_fpfh, target_fpfh, o3d.pipelines.registration.FastGlobalRegistrationOption( maximum_correspondence_distance=distance_threshold) ) return result
if name == "main": voxel_size = 0.05 # means 5cm for the dataset pcds_down = load_point_clouds(voxel_size)
if len(pcds_down) < 2: print("Not enough point clouds for registration.") exit(0)
source_down, target_down = pcds_down[0], pcds_down[1] source_fpfh = compute_fpfh(source_down, voxel_size) target_fpfh = compute_fpfh(target_down, voxel_size)
start = time.time() result_fast = execute_fast_global_registration(source_down, target_down, source_fpfh, target_fpfh, voxel_size) print(f"Fast global registration took {time.time() - start:.3f} sec.\n")
o3d.visualization.draw_geometries([source_down.paint_uniform_color([1, 0, 0]), target_down.paint_uniform_color([0, 1, 0])], window_name='Initial Point Clouds') o3d.visualization.draw_geometries([source_down.transform(result_fast.transformation), target_down], window_name='Fast Global Registration Result')

SaiyamShah · 8 Feb

Hi Luxonis Community,

I’m currently working on a project involving the OAK-D Lite camera, and I’m facing challenges while trying to implement real-time RGB-D SLAM and point cloud merging.

Here’s my setup:

Device: OAK-D Lite
Software Stack: DepthAI + Open3D + Python
Process:
- Capturing RGB and depth frames at 5 FPS.
- Converting frames to point clouds.
- Using ICP (Iterative Closest Point) to align and merge consecutive point clouds.

Issues I’m Encountering:

Point Cloud Merging Accuracy:
The merged point clouds often drift or misalign after several frames, even after applying ICP and downsampling.
Real-Time Performance:
Processing in real-time is challenging due to computational load, and I’m looking for ways to optimize or parallelize the pipeline.

What I’ve Tried So Far:

Using intrinsic calibration data from the OAK-D Lite for aligning RGB and depth frames.
Adjusting ICP parameters (thresholds, initial transformations).
Applying voxel downsampling and outlier removal techniques.

Looking for Guidance On:

Best practices for accurate point cloud merging.
Approaches or libraries that could help implement real-time SLAM using the OAK-D Lite.
Any firmware or DepthAI configurations that might help improve performance.

If anyone has tackled a similar challenge or has suggestions, I’d love to hear your thoughts. Thanks in advance for your help!