Hi Luxonis Team,
I'm running into memory issues when trying to run my custom models on the OAK-FFC-4P and I'd like to better understand how memory is being allocated. Here is the error from the logs:
[14442C1021F1D5D600] [1.12] [5.607] [NeuralNetwork(0)] [error] Tried to allocate '306438208'B out of '29515775'B available.
[14442C1021F1D5D600] [1.12] [5.609] [NeuralNetwork(0)] [error] Neural network executor '1' out of '2' error: OUT_OF_MEMORY
[14442C1021F1D5D600] [1.12] [6.412] [system] [info] Memory Usage - DDR: 305.24 / 333.39 MiB, CMX: 2.07 / 2.50 MiB, LeonOS Heap: 9.61 / 82.31 MiB, LeonRT Heap: 5.04 / 40.50 MiB / NOC ddr: 49 MB/s
[14442C1021F1D5D600] [1.12] [6.412] [system] [info] Temperatures - Average: 42.35C, CSS: 43.73C, MSS 41.89C, UPA: 41.89C, DSS: 41.89C
[14442C1021F1D5D600] [1.12] [6.412] [system] [info] Cpu Usage - LeonOS 8.54%, LeonRT: 89.76%
For context, I'm trying to get host side images sent to the device for inference. My NN blob is ~9.5MB, and my input and output images are of size 768x768x1 float16. Given this, I'm not really sure what's taking up up all of the memory.
Here is a snippet of the script I'm using for host side inference:
def create_myriadx_nn_pipeline(nn_path):
pipeline = dai.Pipeline()
detection_nn = pipeline.create(dai.node.NeuralNetwork)
detection_nn.setBlobPath(nn_path)
detection_nn.setNumPoolFrames(2),
detection_nn.input.setBlocking(False)
detection_nn.setNumInferenceThreads(1)
img_in = pipeline.create(dai.node.XLinkIn)
img_in.setMaxDataSize(768*768*2)
img_in.setNumFrames(1)
img_in.setStreamName("img_in")
img_in.out.link(detection_nn.input)
xout_rgb = pipeline.create(dai.node.XLinkOut)
xout_rgb.setStreamName("nn_input")
xout_rgb.input.setBlocking(False)
detection_nn.passthrough.link(xout_rgb.input)
xout_nn = pipeline.create(dai.node.XLinkOut)
xout_nn.setStreamName("nn")
xout_nn.input.setBlocking(False)
detection_nn.out.link(xout_nn.input)
device = dai.Device(pipeline)
device.setLogLevel(dai.LogLevel.DEBUG)
device.setLogOutputLevel(dai.LogLevel.DEBUG)
print('Device pipeline created')
return device
def myriad_run(device, inp):
nn_data = dai.NNData()
nn_data.setLayer("fp16", ims.astype(float).flatten().tolist())
nn_data.getAllLayerNames()
result = []
img_in_q = device.getInputQueue(name="img_in")
q_nn = device.getOutputQueue(name="nn", maxSize=4, blocking=False)
img_in_q.send(nn_data)
in_nn = q_nn.get()
layers = in_nn.getAllLayers()
layer1 = in_nn.getLayerFp16(layers[0].name)
result = np.asarray(layer1, dtype=np.float32).reshape((1,768,768,1))
return result
img = np.load(r"path\to\inputimg")
nn_path = r"path\to\blob"
device = create_myriadx_nn_pipeline(nn_path)
pred = myriad_run(device, img)