Retrieving synced pointcloud and image in real-time via Script node

Ggherodes · Sep 23, 2024

Hi,

I am attempting to create a pipeline that outputs a pointcloud and an image when a HTTP request is sent to a server hosted in the Script node of the camera pipeline. Due to the camera operating on a somewhat unstable network, I am trying to run the camera in 'Standalone' mode.

I get the pipeline up and running properly, but I face the following issues:
1) When a request is sent, the retrieved image is no real-time and has ~5sec latency. How can this be mitigated and images retrieved closer to real-time?
2) The image and pipeline seem to not be properly synced, I had to remove the sync node in order to use the Script node. Is there any known way to link them up to receive synced pointclouds and images in the Script node?

The code I used is as follows:

Setup camera and pipeline in standalone mode
import depthai as dai

import time

from datetime import timedelta
def create_pipeline(camera_ip, camera_port, script_path):
pipeline = dai.Pipeline()
RGB_SOCKET = dai.CameraBoardSocket.CAM_C
TOF_SOCKET = dai.CameraBoardSocket.CAM_A
# Create nodes
camRgb = pipeline.create(dai.node.ColorCamera)
tof = pipeline.create(dai.node.ToF)
camTof = pipeline.create(dai.node.Camera)
align = pipeline.create(dai.node.ImageAlign)
pointcloud = pipeline.create(dai.node.PointCloud)
script = pipeline.create(dai.node.Script)

# Configure nodes
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_800_P) camRgb.setBoardSocket(RGB_SOCKET)
camRgb.setIspScale(1,2)
FPS = 1
camRgb.setFps(FPS)
# ToF settings
camTof.setFps(FPS)
camTof.setImageOrientation(dai.CameraImageOrientation.ROTATE_180_DEG) camTof.setBoardSocket(TOF_SOCKET)
# Higher number => faster processing. 1 shave core can do 30FPS.
tof.setNumShaves(1)
# Median filter, kernel size 3x3
tof.initialConfig.setMedianFilter(dai.MedianFilter.KERNEL_3x3)
tofConfig = tof.initialConfig.get() # Temporal filter averages shuffle/non-shuffle frequencies tofConfig.enablePhaseShuffleTemporalFilter = True # Phase unwrapping, for longer range. tofConfig.phaseUnwrappingLevel = 2
tofConfig.phaseUnwrapErrorThreshold = 500
tof.initialConfig.set(tofConfig)

pointcloud.initialConfig.setSparse(False)
script.setProcessor(dai.ProcessorType.LEON_CSS)
with open(script_path, "r") as f:

script_content = f.read()
script_content = script_content.replace("IP_PLACEHOLDER",camera_ip).replace("PORT_PLACEHOLDER",str(camera_port))

print(f"Script Content: {script_content}")
script.setScript(script_content)
# Linking
print("Linking nodes...")
camRgb.isp.link(script.inputs["rgb"])
camRgb.isp.link(align.inputAlignTo)
camTof.raw.link(tof.input)
tof.depth.link(align.input)
align.outputAligned.link(pointcloud.inputDepth) pointcloud.outputPointCloud.link(script.inputs["pcl"])
script.outputs['out'].link(camRgb.inputControl)
print("Nodes linked") return pipeline

if __name__ == "__main__":
camera_ip = '192.168.100.100'
camera_port = '8080'
script_path = "./http_server_script.py"

pipeline = create_pipeline(camera_ip, camera_port, script_path)
devices = dai.DeviceBootloader.getAllAvailableDevices()
print(devices)
for device in devices:
if device.name == camera_ip:
bootloader = dai.DeviceBootloader(device)
progress = lambda p : print(f'Flashing progress: {p*100:.1f}%')
bootloader.flash(progress, pipeline)

HTTP script for Script node:
from http.server import BaseHTTPRequestHandler

import socketserver

import socket

import fcntl

import struct
ctrl = CameraControl() # Assuming CameraControl can handle point cloudsctrl.setCaptureStill(True)
def get_ip_address(ifname):
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
return socket.inet_ntoa(fcntl.ioctl( s.fileno(), -1071617759, # SIOCGIFADDR struct.pack('256s', ifname[:15].encode()) )[20:24])

class HTTPHandler(BaseHTTPRequestHandler):
def do_GET(self):
if self.path == '/':
self.send_response(200)
self.end_headers()
self.wfile.write(b'<h1>[DepthAI] Hello, world!</h1><p>Click <a href="img">here</a> for an image</p><p>Click <a href="pointcloud">here</a> for a pointcloud</p>')
elif self.path == '/img':
# Capture the image from the camera
node.io['out'].send(ctrl)
image = node.io['rgb'].get()
self.send_response(200)
self.send_header('Content-Type', 'image/jpeg')
self.send_header('Content-Length', str(len(image.getData())))
self.end_headers()
print(f"Image size: {len(image.getData())}")
self.wfile.write(image.getData())
elif self.path == '/pointcloud':
# Capture the point cloud data
node.io['out'].send(ctrl)
pointcloud = node.io['pcl'].get()
self.send_response(200)
self.send_header('Content-Type', 'application/octet-stream') # Binary file self.send_header('Content-Length', str(len(pointcloud.getData()))) self.end_headers()
self.wfile.write(pointcloud.getData())
else:
self.send_response(404)
self.end_headers()
self.wfile.write(b'Url not found...')
with socketserver.TCPServer(("IP_PLACEHOLDER", PORT_PLACEHOLDER), HTTPHandler) as httpd:
node.warn(f"Serving at {get_ip_address('re0')}:{PORT_PLACEHOLDER}")
httpd.serve_forever()

jakaskerl · Sep 23, 2024

gherodes When a request is sent, the retrieved image is no real-time and has _5sec latency. How can this be mitigated and images retrieved closer to real-time?

Make sure you clear the queues constantly or set queue sizes to 1 to avoid reading old frames. Each node input has a queue size and blocking toggle. Eg:

script.inputs["rgb"].setBlocking(False)
script.inputs["rgb"].setQueueSize(1)

Sync them based on timestamps. If they differ too much, make sure you discard the frames.

Thanks,
Jaka

Ggherodes · Sep 24, 2024

Hi Jaka,
Thanks for the fast reply, I applied your suggestions and it seems to have both synced up the frame and pointcloud aswell as now it retrieves them relatively real-time.

If I wanted to add logging using the SystemLogger node, should I just link it up to the Script Node identically to the previous nodes?

Thanks,
Georg

Ggherodes · Sep 24, 2024

Hi again Jaka,

I have encountered a new issue with the pipeline above after implementing the suggested fixes:

The camera appears to be restarting exactly every 6min 55sec. I confirmed this by querying the HTTP server for an image-pointcloud pair with a 1sec interval and retrieving the timestamps of the images.
Initially I suspected there might have been an issue with the PoE power delivery so I tested with multiple PoE switches and injectors (90W Dahua PoE switch, 240W Planet Industrial PoE switch, 24W Ubiquity PoE injector) and saw the same behaviour on all these devices. I further tested this on 2 separate OAK-D Short Range PoE with ToF cameras in separate systems (separate switches/injectors/etc) to rule out faults in any of the hardware.

Finally I suspect this is a RAM/memory issue. With the FPS of the pipeline set to 10, the crashes occurred approximately every 7 minutes, after lowering the FPS to 1 the pipeline has thus far not crashed for approx 40minutes. As you can see, it appears that some sort of cache or buffer is building up and not being cleared.

Do you have any suggestions on how to clear any buffers that might be building up in this pipeline in standalone mode?
I am implementing some more logging functionality and hope to be able to provide more logs/RAM usage shortly.

Kind regards,
Georg

jakaskerl · Sep 24, 2024

gherodes

gherodes If I wanted to add logging using the SystemLogger node, should I just link it up to the Script Node identically to the previous nodes?

Not sure if needed. The logger should have timestamped data. You probably don't need to perfectly sync the logs to the frames.

gherodes Do you have any suggestions on how to clear any buffers that might be building up in this pipeline in standalone mode?

Can you create a mre for both side, so I can check locally with tested POE switch? You can mail the code if you want.

Ggherodes · Sep 26, 2024

Hi Jakaskerl,

For the tests I ran the 2 scripts above on the camera (along with the suggestions you provided in your first comment applied to both RGB and PCL inputs to the Script node). It runs on an OAK-D SR ToF camera with bootloader version 0.0.28 and depthai-python version 2.28.0.

To retrieve images from the camera the script attached to this comment is used.

import requests

import cv2

import numpy as np

import open3d as o3d

import time

def yuv420_to_bgr(yuv_bytes, width, height):

# Size of Y, U, and V planes

y_size = width * height

uv_size = (width // 2) * (height // 2)

# Split the YUV data into Y, U, and V planes

y_plane = np.frombuffer(yuv_bytes[0:y_size], dtype=np.uint8).reshape(

(height, width)

)

u_plane = np.frombuffer(

yuv_bytes[y_size : y_size + uv_size], dtype=np.uint8

).reshape((height // 2, width // 2))

v_plane = np.frombuffer(yuv_bytes[y_size + uv_size :], dtype=np.uint8).reshape(

(height // 2, width // 2)

)

# Upsample the U and V planes to the size of Y plane

u_plane_up = cv2.resize(u_plane, (width, height), interpolation=cv2.INTER_LINEAR)

v_plane_up = cv2.resize(v_plane, (width, height), interpolation=cv2.INTER_LINEAR)

# Stack the Y, U, and V planes into a YUV image

yuv_img = np.stack((y_plane, u_plane_up, v_plane_up), axis=-1)

# Convert YUV to RGB using OpenCV

bgr_img = cv2.cvtColor(yuv_img, cv2.COLOR_YUV2BGR)

return bgr_img

def retrieve_image_and_pointcloud(server_url):

# Define the endpoints for image and pointcloud

image_url = f"{server_url}/img"

pointcloud_url = f"{server_url}/pointcloud"

ping_url = f"{server_url}/ping"

# Retrieve the image

# print(f"Requesting image from {image_url}")

t0 = time.time()

image_response = requests.get(image_url)

t1 = time.time()

print(f"Time to retrieve image: {t1 - t0:.4f} seconds")

t2 = time.time()

pointcloud_response = requests.get(pointcloud_url)

t3 = time.time()

print(f"Time to retrieve pointcloud: {t3 - t2:.4f} seconds")

t4 = time.time()

ping_response = requests.get(ping_url)

t5 = time.time()

print(f"Time to ping server: {t5 - t4:.4f} seconds")

print(f"Total retrieval time: {t5 - t0:.4f} seconds")

if image_response.status_code == 200:

height = 400

width = 640

yuv_bytes = image_response.content

# Convert YUV420 to RGB

bgr_image_array = yuv420_to_bgr(yuv_bytes, width, height)

cv2.imwrite("image.jpg", bgr_image_array)

else:

print(f"Failed to retrieve image. Status code: {image_response.status_code}")

if pointcloud_response.status_code == 200:

# Save the point cloud as a PLY file

pcl_buffer = np.frombuffer(pointcloud_response.content, np.float32)

pcl_np = np.reshape(pcl_buffer, (-1, 3))

print(f"Point cloud shape: {pcl_np.shape}")

colors_np = (

np.reshape(cv2.cvtColor(bgr_image_array, cv2.COLOR_BGR2RGB), (-1, 3))

/ 255.0

)

# Save the point cloud as a PLY file

pcl = o3d.geometry.PointCloud()

pcl.points = o3d.utility.Vector3dVector(pcl_np)

pcl.colors = o3d.utility.Vector3dVector(colors_np)

o3d.io.write_point_cloud("pointcloud.ply", pcl)

if __name__ == "__main__":

# Example usage

server_url = "http://192.168.100.100:8080" # Replace this with the actual server address

while True:

t0 = time.time()

retrieve_image_and_pointcloud(server_url)

time.sleep(1)

t1 = time.time()

print("-----" * 10)

I hope this helps to reproduce the error, in the meanwhile I have observed that with FPS set to 1 the camera restarts after approximately 3 hours, while with the FPS at 3 the camera restarts after 23 minutes.

Kind regards,

Georg

jakaskerl · Sep 29, 2024

gherodes
Looking into it. I'll let you know when I have more info.

Ggherodes · Oct 7, 2024

jakaskerl
Hi,

Any luck reproducing the issue? I can send some additional information on my dev environment if required.

Georg

jakaskerl · Oct 8, 2024

gherodes
I could not reproduce the issue even on 10FPS running for 3h. I ran it in peripheral mode (not standalone). Does the issue only arise in standalone?

Thanks
Jaka

Ggherodes · Oct 8, 2024

jakaskerl
Yes, it only appears in Standalone mode with the scripts provided above. As mentioned in the original post, the network that the camera is running on is quite unstable so running in peripheral mode causes the host-camera connection to be lost frequently, making peripheral mode un-usable for my usecase.

Georg

jakaskerl · Oct 8, 2024

gherodes
Ok. What is the bandwidth of the network. How common are the connection losses - could it be related to crashes - do they coincide?

Thanks,
Jaka

Ggherodes · Oct 8, 2024

jakaskerl

The network has a bandwidth of 1Gbps.
The connection losses are pretty inconsistent (varying intervals between disconnections from 2min to 30min) and do not coincide with the crashes in standalone mode. The crashes in standalone mode occur very consistently (to the second) every 2 minutes and 56 seconds at 10FPS and every 23 minutes at 3FPS.
Georg

jakaskerl · Oct 8, 2024

gherodes

This part:

 class HTTPHandler(BaseHTTPRequestHandler):
                def do_GET(self):
                    if self.path == '/':
                        self.send_response(200)
                        self.end_headers()
                        self.wfile.write(b'<h1>[DepthAI] Hello, world!</h1><p>Click <a href="img">here</a> for an image</p><p>Click <a href="pointcloud">here</a> for a pointcloud</p>')
                    elif self.path == '/img':
                        # Capture the image from the camera
                        node.io['out'].send(ctrl)
                        image = node.io['rgb'].get()
                        self.send_response(200)
                        self.send_header('Content-Type', 'image/jpeg')
                        self.send_header('Content-Length', str(len(image.getData())))
                        self.end_headers()
                        print(f"Image size: {{len(image.getData())}}")
                        self.wfile.write(image.getData())
                    elif self.path == '/pointcloud':
                        # Capture the point cloud data
                        node.io['out'].send(ctrl)
                        pointcloud = node.io['pcl'].get()
                        self.send_response(200)
                        self.send_header('Content-Type', 'application/octet-stream')  # Binary file
                        self.send_header('Content-Length', str(len(pointcloud.getData())))
                        self.end_headers()
                        self.wfile.write(pointcloud.getData())
                    else:
                        self.send_response(404)
                        self.end_headers()
                        self.wfile.write(b'Url not found...')

is only reading the messages if get requests are sent. If I don't get the /img /pointcloud, then messages on device will not be read so queue will fill up. This is because the input queues are not discarding the frames (possibly blocking behaviour on either side).

Make sure .get is called outside of the conditional statements.

Thanks,
Jaka

Ggherodes · Oct 8, 2024

jakaskerl

Hi Jaka, thanks for the fast reply! Just to check that I understand this correctly, the issue should be fixed if I combine the /img and /pointcloud endpoints and use .get() to retrieve both RGB image and pointcloud every time the camera receives a request?

Georg

jakaskerl · Oct 8, 2024

gherodes
No need to combine the endpoints.
It is only important that you always do .get even if no request was sent in order to clean the queue. Otherwise it will fill up and crash your device.

Thanks,
Jaka

Ggherodes · Oct 8, 2024

jakaskerl

I see, so essentially I should have a function running in a separate thread that constantly does .get() for both images and pointclouds?

Georg

jakaskerl · Oct 8, 2024

gherodes
Basically, yes. That or make sure the queues are all set to non blocking.

Thanks,
Jaka

Ggherodes · Oct 10, 2024

Hi Jaka,
I am attempting to set all queues to non-blocking mode and while I see some improvement, the camera still crashes.
Previously it was crashing every 2min56 sec at 10FPS, now it's crashing every 6min55sec (consistently). I am using the following pipeline structure:

# Create nodes
camRgb = pipeline.create(dai.node.ColorCamera)
tof = pipeline.create(dai.node.ToF)
camTof = pipeline.create(dai.node.Camera)
align = pipeline.create(dai.node.ImageAlign)
pointcloud = pipeline.create(dai.node.PointCloud)
script = pipeline.create(dai.node.Script)

#Node setup is omitted here

# Link nodes
camRgb.isp.link(script.inputs["rgb"])
camRgb.isp.link(align.inputAlignTo)
camTof.raw.link(tof.input)
tof.depth.link(align.input)
align.outputAligned.link(pointcloud.inputDepth)
pointcloud.outputPointCloud.link(script.inputs["pcl"]
script.outputs['out'].link(camRgb.inputControl)

To set all queues to non-blocking I found all the input queues of each Node in the docs and set them to Non-blocking and queue-size with the following commands:

# set nodes to non-blocking
align.inputAlignTo.setBlocking(False)
align.inputAlignTo.setQueueSize(1)
    
align.input.setBlocking(False)
align.input.setQueueSize(1)
    
tof.input.setBlocking(False)
tof.input.setQueueSize(1)
    
pointcloud.inputDepth.setBlocking(False)
pointcloud.inputDepth.setQueueSize(1)
    
script.inputs["pcl"].setBlocking(False)
script.inputs["pcl"].setQueueSize(1)
    
script.inputs["rgb"].setBlocking(False)
script.inputs["rgb"].setQueueSize(1)
    
camRgb.inputControl.setBlocking(False)
camRgb.inputControl.setQueueSize(1)
    
camTof.inputControl.setBlocking(False)
camTof.inputControl.setQueueSize(1)

As mentioned, this alleviates the restarting issue a bit, but the camera in standalone mode still crashes very frequently.
Are there any other queues in this pipeline that I can set to non-blocking? As far as I could find from the Docs only the input queues for nodes can be set to non-blocking and I have all of the node inputs set to non-blocking but am still encountering regular crashes.

Kind regards
Georg

jakaskerl · Oct 10, 2024

gherodes
Okay, seems like there is an improvement which means that is indeed the problem.

I'll be at the office tomorrow, and will test your script in standalone to see what is going on.

Thanks,
Jaka

jakaskerl · Oct 14, 2024

gherodes
Have ran this script below (not sure if I even changed anything besides the blocking) and I have waited god know how long and I can not get it to crash. Even after multiple hours. Script is flashed to the device, and your retrieval script is always running.

import depthai as dai
import time
from datetime import timedelta

def create_pipeline(camera_ip, camera_port, script_path): 
    pipeline = dai.Pipeline()
    RGB_SOCKET = dai.CameraBoardSocket.CAM_C 
    TOF_SOCKET = dai.CameraBoardSocket.CAM_A
    
    # Create nodes 
    camRgb = pipeline.create(dai.node.ColorCamera) 
    tof = pipeline.create(dai.node.ToF) 
    camTof = pipeline.create(dai.node.Camera) 
    align = pipeline.create(dai.node.ImageAlign) 
    pointcloud = pipeline.create(dai.node.PointCloud) 
    script = pipeline.create(dai.node.Script)

    # Configure nodes 
    camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_800_P) 
    camRgb.setBoardSocket(RGB_SOCKET) 
    camRgb.setIspScale(1,2)
    FPS = 10
    camRgb.setFps(FPS)

    # ToF settings 
    camTof.setFps(FPS) 
    camTof.setImageOrientation(dai.CameraImageOrientation.ROTATE_180_DEG) 
    camTof.setBoardSocket(TOF_SOCKET)

    # Higher number => faster processing. 1 shave core can do 30FPS. 
    tof.setNumShaves(1)

    # Median filter, kernel size 3x3 
    tof.initialConfig.setMedianFilter(dai.MedianFilter.KERNEL_3x3)
    tofConfig = tof.initialConfig.get()

    # Temporal filter averages shuffle/non-shuffle frequencies 
    tofConfig.enablePhaseShuffleTemporalFilter = True 

    # Phase unwrapping, for longer range. 
    tofConfig.phaseUnwrappingLevel = 2 
    tofConfig.phaseUnwrapErrorThreshold = 500 
    tof.initialConfig.set(tofConfig) 

    pointcloud.initialConfig.setSparse(False)
    script.setProcessor(dai.ProcessorType.LEON_CSS) 

    script.setScript(f"""
            from http.server import BaseHTTPRequestHandler
            import socketserver
            import socket
            import fcntl
            import struct

            ctrl = CameraControl()  # Assuming CameraControl can handle point clouds
            ctrl.setCaptureStill(True)

            def get_ip_address(ifname):
                s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                return socket.inet_ntoa(fcntl.ioctl(
                    s.fileno(), -1071617759,  # SIOCGIFADDR
                    struct.pack('256s', ifname[:15].encode())
                )[20:24])

            class HTTPHandler(BaseHTTPRequestHandler):
                def do_GET(self):
                    if self.path == '/':
                        self.send_response(200)
                        self.end_headers()
                        self.wfile.write(b'<h1>[DepthAI] Hello, world!</h1><p>Click <a href="img">here</a> for an image</p><p>Click <a href="pointcloud">here</a> for a pointcloud</p>')
                    elif self.path == '/img':
                        # Capture the image from the camera
                        node.io['out'].send(ctrl)
                        image = node.io['rgb'].get()
                        self.send_response(200)
                        self.send_header('Content-Type', 'image/jpeg')
                        self.send_header('Content-Length', str(len(image.getData())))
                        self.end_headers()
                        print(f"Image size: {{len(image.getData())}}")
                        self.wfile.write(image.getData())
                    elif self.path == '/pointcloud':
                        # Capture the point cloud data
                        node.io['out'].send(ctrl)
                        pointcloud = node.io['pcl'].get()
                        self.send_response(200)
                        self.send_header('Content-Type', 'application/octet-stream')  # Binary file
                        self.send_header('Content-Length', str(len(pointcloud.getData())))
                        self.end_headers()
                        self.wfile.write(pointcloud.getData())
                    else:
                        self.send_response(404)
                        self.end_headers()
                        self.wfile.write(b'Url not found...')

            with socketserver.TCPServer(("0.0.0.0", {camera_port}), HTTPHandler) as httpd:
                print("Server started")
                httpd.serve_forever()
            """)

    # Linking 
    print("Linking nodes...") 
    camRgb.isp.link(script.inputs["rgb"]) 
    script.inputs["rgb"].setBlocking(False)
    script.inputs["rgb"].setQueueSize(1)
    camRgb.isp.link(align.inputAlignTo) 
    camTof.raw.link(tof.input) 
    tof.depth.link(align.input) 
    align.outputAligned.link(pointcloud.inputDepth) 
    pointcloud.outputPointCloud.link(script.inputs["pcl"])
    script.outputs['out'].link(camRgb.inputControl) 
    print("Nodes linked") 
    return pipeline

if __name__ == "__main__": 
    camera_ip = '10.12.118.118' 
    camera_port = '8080' 

    pipeline = create_pipeline(camera_ip, camera_port, None) 
    ret, device_info = dai.Device.getFirstAvailableDevice()

    if not ret:
        print("No available devices")
        exit(1)

    (f, bl) = dai.DeviceBootloader.getFirstAvailableDevice()
    bootloader = dai.DeviceBootloader(bl)
    progress = lambda p : print(f'Flashing progress: {p*100:.1f}%')
    (r, errmsg) = bootloader.flash(progress, pipeline, memory=dai.DeviceBootloader.Memory.FLASH, compress=True)
    if r: print("Flash OK")
    else: print("Flash ERROR:", errmsg)
    # device = dai.Device(pipeline)
    # while True:
    #     time.sleep(1)
    #     pass
    #     # retrieve_image_and_pointcloud(server_url)

Thanks,
Jaka