Hello erik,
I am providing the MRE with the converted model and the pipeline creation script (please find the  yolo7 model files through this repo: https://github.com/Alp-1/oakdyolov7) I would appreciate your comments.
Thank you.
Pipeline Creation Script:
import sys
import cv2
import depthai as dai
import numpy as np
import time
nnBlobPath = "best_openvino_2022.1_4shave.blob"
labelMap = ["trunk"]
syncNN = True
# Create pipeline
pipeline = dai.Pipeline()
# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
spatialDetectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork)
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
nnNetworkOut = pipeline.create(dai.node.XLinkOut)
xoutRgb = pipeline.create(dai.node.XLinkOut)
xoutNN = pipeline.create(dai.node.XLinkOut)
xoutDepth = pipeline.create(dai.node.XLinkOut)
xoutRgb.setStreamName("rgb")
xoutNN.setStreamName("detections")
xoutDepth.setStreamName("depth")
nnNetworkOut.setStreamName("nnNetwork")
# Properties
camRgb.setPreviewSize(640, 640)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_12_MP)
camRgb.setInterleaved(False)
camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
camRgb.setFps(2)
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)
# setting node configs
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
# Align depth map to the perspective of RGB camera, on which inference is done
stereo.setDepthAlign(dai.CameraBoardSocket.RGB)
stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())
spatialDetectionNetwork.setBlobPath(nnBlobPath)
spatialDetectionNetwork.setConfidenceThreshold(0.5)
spatialDetectionNetwork.input.setBlocking(False)
spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
spatialDetectionNetwork.setDepthLowerThreshold(100)
spatialDetectionNetwork.setDepthUpperThreshold(5000)
# Yolo specific parameters
spatialDetectionNetwork.setNumClasses(len(labelMap))
spatialDetectionNetwork.setCoordinateSize(4)
spatialDetectionNetwork.setAnchors([10.0,
                13.0,
                16.0,
                30.0,
                33.0,
                23.0,
                30.0,
                61.0,
                62.0,
                45.0,
                59.0,
                119.0,
                116.0,
                90.0,
                156.0,
                198.0,
                373.0,
                326.0])
spatialDetectionNetwork.setAnchorMasks({"side80": [
                    0,
                    1,
                    2
                ],
                "side40": [
                    3,
                    4,
                    5
                ],
                "side20": [
                    6,
                    7,
                    8
                ]})
spatialDetectionNetwork.setIouThreshold(0.5)
# Linking
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
camRgb.preview.link(spatialDetectionNetwork.input)
if syncNN:
    spatialDetectionNetwork.passthrough.link(xoutRgb.input)
else:
    camRgb.preview.link(xoutRgb.input)
spatialDetectionNetwork.out.link(xoutNN.input)
stereo.depth.link(spatialDetectionNetwork.inputDepth)
spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
spatialDetectionNetwork.outNetwork.link(nnNetworkOut.input)
# Connect to device and start pipeline
with dai.Device(pipeline) as device:
    # Output queues will be used to get the rgb frames and the spatial detection data from the outputs defined above
    qRgb = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
    qDet = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
    qDepth = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
    frame = None
    detections = []
    startTime = time.monotonic()
    counter = 0
    while True:
        if syncNN:
            inRgb = qRgb.get()
            inDet = qDet.get()
            inDepth = qDepth.get()
            frame = inRgb.getCvFrame()
            depthFrame = inDepth.getFrame()
            detections = inDet.detections
        else:
            inRgb = qRgb.tryGet()
            inDet = qDet.tryGet()
            inDepth = qDepth.tryGet()
            if inRgb is not None:
                frame = inRgb.getCvFrame()
            if inDet is not None:
                detections = inDet.detections
            if inDepth is not None:
                depthFrame = inDepth.getFrame()
        if frame is not None:
            # Display the resulting frame
            for detection in detections:
                x1, y1, x2, y2 = int(detection.xmin), int(detection.ymin), int(detection.xmax), int(detection.ymax)
                centerX = int((detection.xmin + detection.xmax) / 2)
                centerY = int((detection.ymin + detection.ymax) / 2)
                if detection.label < len(labelMap):
                    label = labelMap[detection.label]
                else:
                    label = "unknown"
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                cv2.circle(frame, (centerX, centerY), 2, (0, 0, 255), 2)
                # Get depth value for the center point of the object
                depth_value = depthFrame[centerY, centerX]
                cv2.putText(frame, f"Depth: {depth_value}mm", (x1, y2 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            cv2.imshow("Spatial Detection", frame)
        counter += 1
        if (time.monotonic() - startTime) > 1:
            print(f"FPS: {counter / (time.monotonic() - startTime)}")
            counter = 0
            startTime = time.monotonic()
        # Press 'q' to exit the loop
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
# Release resources and close windows
cv2.destroyAllWindows()