SaiyamShah

  • a day ago
  • Joined 8 Feb
  • 0 best answers
  • Code to collect Pointclouds:
    import cv2

    import numpy as np

    import open3d as o3d

    from pathlib import Path

    # Camera intrinsics (replace with your OAK-D Lite parameters)fx = 450.9061279296875fy = 450.810546875cx = 322.48406982421875cy = 248.485595703125
    # Baseline (distance between stereo cameras, in meters)baseline = 0.075 # Example value, adjust based on your camera
    # Paths to RGB and disparity imagesrgb_folder = Path('data/rgb')disparity_folder = Path('data/disparity')output_folder = Path('data/pointclouds')output_folder.mkdir(parents=True, exist_ok=True)
    # Get sorted image listsrgb_images = sorted(rgb_folder.glob('*.png'), key=lambda x: int(x.stem))disparity_images = sorted(disparity_folder.glob('*.png'), key=lambda x: int(x.stem))
    # Function to convert disparity to depthdef disparity_to_depth(disparity_map): # Disparity values are scaled to 0-255; rescale them disparity_map = disparity_map.astype(np.float32) disparity_map[disparity_map == 0] = 0.1 # Avoid division by zero depth_map = (fx * baseline) / disparity_map return depth_map
    # Process each image pairfor rgb_path, disp_path in zip(rgb_images, disparity_images): # Load images rgb_image = cv2.cvtColor(cv2.imread(str(rgb_path)), cv2.COLOR_BGR2RGB) disparity_image = cv2.imread(str(disp_path), cv2.IMREAD_GRAYSCALE)

    Resize disparity to match RGB size disparity_image = cv2.resize(disparity_image, (rgb_image.shape[1], rgb_image.shape[0]))

    Convert disparity to depth depth_map = disparity_to_depth(disparity_image)

    Create point cloud h, w = depth_map.shape i, j = np.meshgrid(np.arange(w), np.arange(h)) z = depth_map x = (i - cx) * z / fx # Removed negation to correct mirror inversion y = -(j - cy) * z / fy # Retained negation to keep proper vertical orientation

    Stack into Nx3 array points = np.stack((x, y, z), axis=-1).reshape(-1, 3)

    Get colors colors = rgb_image.reshape(-1, 3) / 255.0 # Normalize to [0, 1]

    Remove points with invalid depth mask = (z > 0).reshape(-1) points = points[mask] colors = colors[mask]

    Create Open3D point cloud pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(points) pcd.colors = o3d.utility.Vector3dVector(colors)

    Save point cloud output_path = output_folder / f"{rgb_path.stem}.pcd" o3d.io.write_point_cloud(str(output_path), pcd)

    print(f"Saved point cloud: {output_path}")

    code that combine pontclouds:
    import open3d as o3d

    from pathlib import Path

    import numpy as np

    import json

    import cv2

    import time

    # Load rvec and tvec datapose_file = Path('data/pose_data.json')with open(pose_file) as f: pose_data = json.load(f)
    # Path to RGB and disparity imagesrgb_folder = Path('data/rgb')disparity_folder = Path('data/disparity')
    # Camera parameters (replace with your actual values)baseline = 0.075 # in meters (example value)focal_length = 450.0 # in pixels (example value)
    # Function to convert disparity to depthdef disparity_to_depth(disparity, baseline, focal_length): disparity = np.asarray(disparity, dtype=np.float32) disparity[disparity == 0] = 0.1 # Avoid division by zero depth = (baseline * focal_length) / disparity return o3d.geometry.Image(depth.astype(np.uint16))
    # Resize depth image to match RGB sizedef resize_depth_image(depth, target_size=(300, 300)): depth_resized = cv2.resize(np.asarray(depth), target_size, interpolation=cv2.INTER_NEAREST) return o3d.geometry.Image(depth_resized.astype(np.uint16))
    # Load point clouds from RGB and disparity imagesdef load_point_clouds(voxel_size=0.05): pcds = [] for i in range(len(pose_data)): color_path = rgb_folder / f"{i + 1}.png" disparity_path = disparity_folder / f"{i + 1}.png"
    color = o3d.io.read_image(str(color_path)) disparity = cv2.imread(str(disparity_path), cv2.IMREAD_UNCHANGED)
    if disparity is None or color is None or str(i + 1) not in pose_data: print(f"Skipping frame {i} due to missing data or pose information.") continue
    depth = disparity_to_depth(disparity, baseline, focal_length) depth_resized = resize_depth_image(depth)
    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth( color, depth_resized, depth_trunc=4.0, convert_rgb_to_intensity=False )
    intrinsic = o3d.camera.PinholeCameraIntrinsic( 300, 300, 450.0, 450.0, 150.0, 150.0 )
    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, intrinsic) pcd_down = pcd.voxel_down_sample(voxel_size=voxel_size) pcd_down.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.05, max_nn=30)) pcds.append(pcd_down) return pcds
    # Feature extractiondef compute_fpfh(pcd, voxel_size): radius_normal = voxel_size * 2 pcd.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=radius_normal, max_nn=30))
    radius_feature = voxel_size * 5 fpfh = o3d.pipelines.registration.compute_fpfh_feature( pcd, o3d.geometry.KDTreeSearchParamHybrid(radius=radius_feature, max_nn=100) ) return fpfh
    # Fast global registrationdef execute_fast_global_registration(source_down, target_down, source_fpfh, target_fpfh, voxel_size): distance_threshold = voxel_size * 0.5 print(f":: Apply fast global registration with distance threshold {distance_threshold:.3f}") result = o3d.pipelines.registration.registration_fgr_based_on_feature_matching( source_down, target_down, source_fpfh, target_fpfh, o3d.pipelines.registration.FastGlobalRegistrationOption( maximum_correspondence_distance=distance_threshold) ) return result
    if name == "main": voxel_size = 0.05 # means 5cm for the dataset pcds_down = load_point_clouds(voxel_size)
    if len(pcds_down) < 2: print("Not enough point clouds for registration.") exit(0)
    source_down, target_down = pcds_down[0], pcds_down[1] source_fpfh = compute_fpfh(source_down, voxel_size) target_fpfh = compute_fpfh(target_down, voxel_size)
    start = time.time() result_fast = execute_fast_global_registration(source_down, target_down, source_fpfh, target_fpfh, voxel_size) print(f"Fast global registration took {time.time() - start:.3f} sec.\n")
    o3d.visualization.draw_geometries([source_down.paint_uniform_color([1, 0, 0]), target_down.paint_uniform_color([0, 1, 0])], window_name='Initial Point Clouds') o3d.visualization.draw_geometries([source_down.transform(result_fast.transformation), target_down], window_name='Fast Global Registration Result')

    • Hi Luxonis Community,

      I’m currently working on a project involving the OAK-D Lite camera, and I’m facing challenges while trying to implement real-time RGB-D SLAM and point cloud merging.

      Here’s my setup:

      • Device: OAK-D Lite

      • Software Stack: DepthAI + Open3D + Python

      • Process:

        • Capturing RGB and depth frames at 5 FPS.

        • Converting frames to point clouds.

        • Using ICP (Iterative Closest Point) to align and merge consecutive point clouds.

      Issues I’m Encountering:

      1. Point Cloud Merging Accuracy:
        The merged point clouds often drift or misalign after several frames, even after applying ICP and downsampling.

      2. Real-Time Performance:
        Processing in real-time is challenging due to computational load, and I’m looking for ways to optimize or parallelize the pipeline.

      What I’ve Tried So Far:

      • Using intrinsic calibration data from the OAK-D Lite for aligning RGB and depth frames.

      • Adjusting ICP parameters (thresholds, initial transformations).

      • Applying voxel downsampling and outlier removal techniques.

      Looking for Guidance On:

      • Best practices for accurate point cloud merging.

      • Approaches or libraries that could help implement real-time SLAM using the OAK-D Lite.

      • Any firmware or DepthAI configurations that might help improve performance.

      If anyone has tackled a similar challenge or has suggestions, I’d love to hear your thoughts. Thanks in advance for your help!