• DepthAI
  • Converting running code to standalone mode

Hey guys, how are you doing? I have this code, and how can I run it in standalone mode? I have both OAK-1 PoE and OAK-D PoE cameras, I want to run it in standalone mode to get the maximum performance as possible. Is there any step-by-step documentation or tutorial on how to convert into a standalone mode? I read the official website, But couldn't understand well exactly how to do it. I'll leave the code below, hope you will help me thanks 🙂

from pathlib import Path
import cv2
import depthai as dai
import time
from environs import Env

env = Env()
env.read_env()

MxID = env('MxID')
# Set custom ROI coordinates (x, y, width, height)
custom_roi = (350/640, 250/640, 640/640, 640/640)

# Coordinates of the counting line
line_start = (320, 0)
line_end = (320, 640)

# tiny yolo v4 label texts
labelMap = ["person",]

nnPath = str((Path(__file__).parent / Path('model/yolov6n_coco_640x640_openvino_2022.1_6shave.blob')).resolve().absolute())

# Creating pipeline
pipeline = dai.Pipeline()

# Sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
detectionNetwork = pipeline.create(dai.node.YoloDetectionNetwork)
objectTracker = pipeline.create(dai.node.ObjectTracker)

xlinkOut   = pipeline.create(dai.node.XLinkOut)
trackerOut = pipeline.create(dai.node.XLinkOut)
fullFrame  = pipeline.create(dai.node.XLinkOut) # creating new pipline for full_frame

xlinkOut.setStreamName("preview")
trackerOut.setStreamName("tracklets")
fullFrame.setStreamName("full_frame") # To get Full Frame

# Creating Manip node
manip = pipeline.create(dai.node.ImageManip)
# Setting CropRect for the Region of Interest
# manip.initialConfig.setCropRect(*custom_roi)
# Setting Resize for the neural network input size
manip.initialConfig.setResize(640, 640)
# Setting maximum output frame size based on the desired output dimensions
max_output_width = 640
max_output_height = 640
max_output_frame_size = 3 * max_output_width * max_output_height # Assuming 3 channels for BGR image
manip.setMaxOutputFrameSize(max_output_frame_size)

# Properties
if MxID == "14442C10C1AD3FD700":
    camRgb.setImageOrientation(dai.CameraImageOrientation.HORIZONTAL_MIRROR)
camRgb.setPreviewSize(640, 640)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
camRgb.setInterleaved(False)
camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
camRgb.setFps(40)

# Network specific settings
detectionNetwork.setConfidenceThreshold(0.5)
detectionNetwork.setNumClasses(80)
detectionNetwork.setCoordinateSize(4)
# detectionNetwork.setAnchors([10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319]) #for YOLOv4
# detectionNetwork.setAnchorMasks({"side26": [1, 2, 3], "side13": [3, 4, 5]})
detectionNetwork.setAnchors([10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326]) #for YOLOv5
detectionNetwork.setAnchorMasks({"side52": [0,1,2], "side26": [3,4,5], "side13": [6,7,8]})
detectionNetwork.setIouThreshold(0.5)
detectionNetwork.setBlobPath(nnPath)
detectionNetwork.setNumInferenceThreads(2)
detectionNetwork.input.setBlocking(False)

objectTracker.setDetectionLabelsToTrack([0])  # track only person
# possible tracking types: ZERO_TERM_COLOR_HISTOGRAM, ZERO_TERM_IMAGELESS, SHORT_TERM_IMAGELESS, SHORT_TERM_KCF
objectTracker.setTrackerType(dai.TrackerType.ZERO_TERM_COLOR_HISTOGRAM)
# take the smallest ID when new object is tracked, possible options: SMALLEST_ID, UNIQUE_ID
objectTracker.setTrackerIdAssignmentPolicy(dai.TrackerIdAssignmentPolicy.UNIQUE_ID)

#Linking
# Connecting Manip node to ColorCamera
camRgb.preview.link(manip.inputImage)
camRgb.preview.link(fullFrame.input)
# Connecting Manip node to YoloDetectionNetwork
manip.out.link(detectionNetwork.input)
# camRgb.preview.link(detectionNetwork.input)
objectTracker.passthroughTrackerFrame.link(xlinkOut.input)

detectionNetwork.passthrough.link(objectTracker.inputTrackerFrame)

detectionNetwork.passthrough.link(objectTracker.inputDetectionFrame)
detectionNetwork.out.link(objectTracker.inputDetections)
objectTracker.out.link(trackerOut.input)

device = dai.DeviceInfo(MxID)

# Connecting to device and starting pipeline
with dai.Device(pipeline, device) as device:

    preview = device.getOutputQueue("preview", 4, False)
    tracklets = device.getOutputQueue("tracklets", 4, False)
    previewFull = device.getOutputQueue("full_frame", 4, False)

    startTime = time.monotonic()
    counter = 0
    fps = 0
    frame = None

    h_line = 320
    pos = {}
    going_in = 0
    going_out = 0
    obj_counter = [0, 0, 0, 0]  # left, right, up, down

    while(True):
        imgFrame = preview.get()
        track = tracklets.get()
        imgFull = previewFull.get()

        counter+=1
        current_time = time.monotonic()
        if (current_time - startTime) > 1 :
            fps = counter / (current_time - startTime)
            counter = 0
            startTime = current_time

        color = (255, 0, 0)
        text_color = (0, 0, 255)
        rectangle = (111, 147, 26)

        frame = imgFrame.getCvFrame()
        trackletsData = track.tracklets
        # Draw the counting line on the frame
        cv2.line(frame, line_start, line_end, (0, 255, 0), 2)

        for t in trackletsData:
            if t.status.name == "TRACKED":
                roi = t.roi.denormalize(frame.shape[1], frame.shape[0])
                x1 = int(roi.topLeft().x)
                y1 = int(roi.topLeft().y)
                x2 = int(roi.bottomRight().x)
                y2 = int(roi.bottomRight().y)

                # Calculate centroid
                centroid = (int((x2-x1)/2+x1), int((y2-y1)/2+y1))

                # Calculate the buffer zone boundaries
                right_boundary = h_line + 15
                left_boundary = h_line - 15

                try:
                    if not (left_boundary <= centroid[0] <= right_boundary):
                        pos[t.id] = {
                            'previous': pos[t.id]['current'],
                            'current': centroid[0]      }
                    if pos[t.id]['current'] > right_boundary and pos[t.id]['previous'] < right_boundary:

                        obj_counter[1] += 1 #Right side
                        going_in += 1
                    if pos[t.id]['current'] < left_boundary and pos[t.id]['previous'] > left_boundary:
                    
                        obj_counter[0] += 1 #Left side
                        going_out += 1
                except:
                    pos[t.id] = {'current': centroid[0]}

                try:
                    label = labelMap[t.label]
                except:
                    label = t.label

                cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, text_color)
                cv2.putText(frame, f"ID: {[t.id]}", (x1 + 10, y1 + 45), cv2.FONT_HERSHEY_TRIPLEX, 0.5, text_color)
                cv2.putText(frame, t.status.name, (x1 + 10, y1 + 70), cv2.FONT_HERSHEY_TRIPLEX, 0.5, text_color)
                cv2.rectangle(frame, (x1, y1), (x2, y2), rectangle, cv2.FONT_HERSHEY_SIMPLEX)
                cv2.circle(frame, (centroid[0], centroid[1]), 4, (255, 255, 255), -1)
    

        cv2.putText(frame, f'Left: {obj_counter[0]}; Right: {obj_counter[1]}', (10, 35), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX)
        cv2.putText(frame, "FPS: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.6, text_color)

        # Displaying full frame
        frameFull = imgFull.getCvFrame()
        cv2.imshow("full_frame", frameFull)

        # Displaying cropped frame with tracked objects
        cv2.imshow("tracker", frame)

        if cv2.waitKey(1) == ord('q'):
            break`

    Fakhrullo preview = device.getOutputQueue("preview", 4, False)
    tracklets = device.getOutputQueue("tracklets", 4, False)
    previewFull = device.getOutputQueue("full_frame", 4, False)

    These need to go since you don't have direct connection to the host. This also means that to view any results, you need a way for the host to see them. You could create a Http or MJPEG server, or stream the data using TCP. We have examples on our experiments repo - link to examples.

    You can also check the cumulative object counting example which has code for both standard and standalone mode of operation, so you can compare the functionality.

    Thanks,
    Jaka