Hello,
I am trying to implement a pipeline that utilizes the PLACES365 pretrained models for scene classification.
Since the model requires the image to pre-processed and converted to a tensor and normalized (code for inference is given here), I thought to do this pre processing on the host. Also, I will be using STILL images captured from camRGB. My pipeline looks as follows:
XlinkIn (to get capture command) --> camRGB --> xlinkOut (send frames to host)
Then, pre-processing is done on host, and the tensor is sent back to device
XlinkIn (to get tensors) --> NN--> xlinkOut (send NN predictions to host)
Finally, the results are decoded in the host
The problem is that I get this error and wasn't able to figure how to solve the issue
[14442C1001AB47D700] [20.463] [NeuralNetwork(4)] [error] Input tensor 'data' (0) exceeds available data range. Data size (0B), tensor offset (0), size (150528B) - skipping inference
It seems to me there is an issue with either the pre-processing, or with sending the tensors back to the device.
I would be glad if anyone could help me with this
The code is given below:
import torch
from torch.autograd import Variable as V
import torchvision.models as models
from torchvision import transforms as trn
from torch.nn import functional as F
from PIL import Image
# (other imports such as cv2, depthai as dai, numpy, etc..)
# labels path
path = ".............../categories_places365.txt"
file = open(path, "r")
fileData = file.read().splitlines()
# nn path
nnPath = ".............../googlenet_places365.blob"
# dummy image
x = cv2.imread("blue.png")
# Create the pipeline
pipeline = dai.Pipeline()
# 1 Create input control node to acquire capture command
xinCaptureCommand = pipeline.create(dai.node.XLinkIn)
xinCaptureCommand.setStreamName("capture")
# 2 Create Camera node and give its properties
camRGB = pipeline.create(dai.node.ColorCamera)
camRGB.setResolution(dai.ColorCameraProperties.SensorResolution.THE_4_K)
camRGB.setStillSize(1080, 1080)
camRGB.setPreviewSize(1080, 1080)
camRGB.setVideoSize(1080, 1080)
# camRGB.setInterleaved(False)
camRGB.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB)
# 3 Create output node for still images
outStillRGB = pipeline.create(dai.node.XLinkOut)
outStillRGB.setStreamName("rgbStill")
# 4 Create input control node to acquire modified images
inDataToNN = pipeline.create(dai.node.XLinkIn)
inDataToNN.setStreamName("modifiedImages")
# 5 Create NN nodes
nn = pipeline.create(dai.node.NeuralNetwork)
nn.setBlobPath(nnPath)
nn.setNumInferenceThreads(2)
nn.input.setBlocking(False)
# 6 Create output node for predictions
outPredictions = pipeline.create(dai.node.XLinkOut)
outPredictions.setStreamName("predictions")
# Linking
# Link output of xinCaptureCommand to camera input control
xinCaptureCommand.out.link(camRGB.inputControl)
# Link output of camRGB to input of outStillRGB
camRGB.still.link(outStillRGB.input)
# # Link output of inDataToNN to nn
inDataToNN.out.link(nn.input)
# Link output of nn to input of outPredictions
nn.out.link(outPredictions.input)
#######################################################################################
# Connect to device and start the pipeline
with dai.Device(pipeline) as device:
# load the image transformer
centre_crop = trn.Compose([
trn.Resize((256,256)),
trn.CenterCrop(224),
trn.ToTensor(),
trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# Create input queue to device, that receives capture command
captureInputQueue = device.getInputQueue("capture")
# Create output queue that will get RGB frame (Output from device, and input to host)
stillQueue = device.getOutputQueue(name="rgbStill")
# Create input queue to device, that receives modified frames
modifiedImagesQueue = device.getInputQueue("modifiedImages")
# Create output queue that will get predictions (Output from device, and input to host)
qDet = device.getOutputQueue(name="predictions")
frame = None
detections = []
cv2.imshow("x",x)
while True:
# try to get a frame from the device
stillFrame = stillQueue.tryGet()
if stillFrame is not None:
# B (i) - host get gets the frame and modifies it
frame = stillFrame.getCvFrame()
#frame = cv2.imdecode(stillFrame.getData(), cv2.IMREAD_UNCHANGED)
cv2.imshow("frame before modyfying it", frame)
# B (ii) modify frame
modifiedFrame1 = Image.fromarray(frame)
modifiedFrame2 = V(centre_crop(modifiedFrame1).unsqueeze(0))
print("frame modified successfuly", modifiedFrame2.size)
# C - host sends modified frame to device
nnMsg = dai.NNData()
nnMsg.setData(modifiedFrame2)
modifiedImagesQueue.send(nnMsg)
# D - host gets predictions from device
inDet = qDet.tryGet()
if inDet is not None:
detections = inDet.detections
h_x = F.softmax(detections, 1).data.squeeze()
probs, idx = h_x.sort(0, True)
print('{} prediction on {}'.format(arch,img_name))
# output the prediction
for i in range(0, 5):
print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))
# A - host sends capture command to device
key = cv2.waitKey(1)
if key == ord("q"):
break
elif key == ord('c'):
ctrl = dai.CameraControl()
ctrl.setCaptureStill(True)
captureInputQueue.send(ctrl)
#print("captured")
Note that the related to pre-processing the image and decoding the results is taken from here.