How to index and modify depth pixels (like OpenCV) inside a DepthAI Script node?

ZZhengkunLi · 30 May

Hi all,

I'm using an OAK-D-Pro-W-PoE camera and working inside a DepthAI Script node to modify the depth image before syncing.

On the host side, I can easily access and process the depth image using OpenCV or NumPy:

depth = frame.getFrame() # as np.ndarray

depth[50:320, 200:400] = 65535 # white ROI border

My goal:

In the Script node, I want to:

Access the 2D pixel values of the depth frame (16UC1),
Modify pixels inside a specific ROI (e.g., rows 50–320, cols 200–400),
Set the ROI boundary pixels to white (65535),
Send the modified frame downstream (e.g., into a Sync node).

What I’ve tried so far:

1. Using `frame.getFrame()`

$$
pythonCopyEditframe = node.io['depth'].get()
depth = frame.getFrame()
depth[50][200] = 65535 # intended access
frame.setFrame(depth)
node.io['out'].send(frame)
$$

Error: 'ImgFrame' object has no attribute 'getFrame'

I now understand that Script nodes do not support getFrame(), only getData().

2. Using `frame.getData()`

$$
pythonCopyEditraw = frame.getData() # flat byte list
$$

Now I’m unsure how to safely:

Interpret this byte array as uint16 (depth pixel values),
Index pixel at (row, col) like OpenCV (e.g., depth[100,150]),
Modify values, then write back with setData().

I tried:

$$
pythonCopyEditval = raw | (raw[i+1] << 8)
$$

but it feels clunky to write full 2D processing this way.

So my question is:

What’s the best practice to access and modify depth pixels inside a Script node like in OpenCV (e.g., depth[row][col] = value) — especially when working with 16-bit depth?

Specifically:

Is there a utility function or a safer idiom to convert getData() to a 2D grid of uint16?
Is looping over large areas (like ROI drawing) practical inside Script?
Are there performance or memory limits I should be aware of?
Would using setData() with re-packed uint16 values be the recommended approach?

Thanks so much in advance!

ZZhengkunLi · 30 May

ZhengkunLi

This is the script:

#!/usr/bin/env python3

import depthai as dai
import numpy as np
import rospy
from sensor_msgs.msg import Image
from cv_bridge import CvBridge
from datetime import timedelta
import threading

bridge = CvBridge()

def create_pipeline():
pipeline = dai.Pipeline()

monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
color = pipeline.create(dai.node.ColorCamera)
stereo = pipeline.create(dai.node.StereoDepth)
script = pipeline.create(dai.node.Script)
sync = pipeline.create(dai.node.Sync)
xout = pipeline.create(dai.node.XLinkOut)
xout.setStreamName("xout")

# Mono cameras config
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setCamera("left")
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setCamera("right")

# Color camera config
color.setBoardSocket(dai.CameraBoardSocket.CAM_A)
color.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
color.setIspScale(1, 3)
color.setInterleaved(False)
color.setFps(15)

# Stereo depth config
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.DEFAULT)
stereo.setSubpixel(True)
stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)

config = stereo.initialConfig.get()
config.postProcessing.speckleFilter.enable = False
config.postProcessing.temporalFilter.enable = True
config.postProcessing.spatialFilter.enable = True
config.postProcessing.spatialFilter.holeFillingRadius = 2
config.postProcessing.spatialFilter.numIterations = 1
config.postProcessing.thresholdFilter.minRange = 400
config.postProcessing.thresholdFilter.maxRange = 5000
config.postProcessing.decimationFilter.decimationFactor = 1
stereo.initialConfig.set(config)

# Sync config
sync.setSyncThreshold(timedelta(milliseconds=50))

# Script node logic (no numpy)
script.setScript("""
# Get depth frame
frame = node.io['depth'].get()
node.warn("Got frame: " + str(frame))
                 
depth = frame.getFrame()  # 2D list of uint16

# ROI configuration
r1, r2 = 50, 320
c1, c2 = 200, 400

# Draw vertical borders (left and right)
for r in range(r1, r2):
    depth[r][c1] = 65535
    depth[r][c2 - 1] = 65535

# Draw horizontal borders (top and bottom)
for c in range(c1, c2):
    depth[r1][c] = 65535
    depth[r2 - 1][c] = 65535

# Write modified frame back
frame.setFrame(depth)
node.io['out'].send(frame)

""")

script.inputs["depth"].setBlocking(False)
script.setProcessor(dai.ProcessorType.LEON_CSS)

# Link nodes
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
stereo.depth.link(script.inputs["depth"])
script.outputs["out"].link(sync.inputs["depth"])
color.isp.link(sync.inputs["isp"])
sync.out.link(xout.input)

return pipeline

def run_camera(name, ip, rgb_topic, depth_topic):
rospy.loginfo(f"[{name}] Starting camera at {ip}")
pipeline = create_pipeline()

try:
    device_info = dai.DeviceInfo(ip)
    with dai.Device(pipeline, device_info) as device:
        device.setIrLaserDotProjectorIntensity(0.8)
        device.setIrFloodLightIntensity(0.5)

        queue = device.getOutputQueue(name="xout", maxSize=2, blocking=False)
        rgb_pub = rospy.Publisher(rgb_topic, Image, queue_size=1)
        depth_pub = rospy.Publisher(depth_topic, Image, queue_size=1)

        while not rospy.is_shutdown():
            msgGrp = queue.get()
            rgb_frame = None
            depth_frame = None

            for name_, msg in msgGrp:
                frame = msg.getCvFrame()
                if name_ == "isp":
                    rgb_frame = frame
                elif name_ == "depth":
                    depth_frame = frame

            timestamp = rospy.Time.now()

            if rgb_frame is not None:
                rgb_msg = bridge.cv2_to_imgmsg(rgb_frame, encoding="bgr8")
                rgb_msg.header.stamp = timestamp
                rgb_pub.publish(rgb_msg)

            if depth_frame is not None:
                depth_msg = bridge.cv2_to_imgmsg(depth_frame, encoding="16UC1")
                depth_msg.header.stamp = timestamp
                depth_pub.publish(depth_msg)

except Exception as e:
    rospy.logerr(f"[{name}] Failed to connect or stream: {e}")

if name == "main":
rospy.init_node("multi_oak_rgb_depth", anonymous=True)

cam_configs = {
    "fl_cam": {
        "ip": "10.0.1.22",
        "rgb_topic": "/fl_cam/rgb/image_raw",
        "depth_topic": "/fl_cam/depth/image_raw"
    },

}

threads = []
for cam_name, config in cam_configs.items():
    t = threading.Thread(target=run_camera, args=(
        cam_name, config["ip"], config["rgb_topic"], config["depth_topic"]))
    t.start()
    threads.append(t)

rospy.spin()
for t in threads:
    t.join()

My goal:

What I’ve tried so far:

1. Using frame.getFrame()

2. Using frame.getData()

So my question is:

1. Using `frame.getFrame()`

2. Using `frame.getData()`