Hi there!

I'd like to maximise my POV before feeding it into my YOLO model. At the moment, when I feed it in, it just defaults to cropping the centre 640x640. This isn't very useful. When I resize it, the image looks like this:

{Output image?}

However, I'd like to remove the letterboxing and instead squeeze the vertical (originally horizontal, but vertical now because I've rotated it 90 degrees) so that it theoretically ends up something like this (poor quality mockup done in MS paint).

{Theoretical Image?}

Basically, what I'm trying to do is to vertically squeeze the image rather than letterbox it (horizontally).

This is my ImageManip config so far.

image_manip = pipeline.create(dai.node.ImageManip)
image_manip.initialConfig.setKeepAspectRatio(False)
image_manip.initialConfig.setResize(640, 640)  # Resize to 640x640
image_manip.initialConfig.setRotationDegrees(90)

Thanks in advance for your help!

    TheOracle
    I guess you can do a resize on ColorCamera node with setPreviewSize and setKeepAspectRatio, then use a manip to rotate it. Or perhaps the ColorCamera does the operations in the right order so imageManip is not needed...

    Thanks,
    Jaka

      5 days later

      jakaskerl

      I'm sorry, can you please elaborate on that? Can you provide an example (ChatGPT doesn't produce a satisfactory output)?

        TheOracle

        image_manip = pipeline.create(dai.node.ImageManip)
        image_manip.initialConfig.setKeepAspectRatio(False)
        image_manip.initialConfig.setResize(640, 640)  # Resize to 640x640
        image_manip.setMaxOutputFrameSize(1228800)
        camRgb.preview.link(image_manip.inputImage)
        
        manip2 = pipeline.create(dai.node.ImageManip)
        manip2.initialConfig.setRotationDegrees(90)
        manip2.setMaxOutputFrameSize(1228800)
        image_manip.out.link(manip2.inputImage)

        Thanks,
        Jaka

        Thanks for your reply.

        This is my program so far:

        # Create DepthAI pipeline
        pipeline = dai.Pipeline()
        camRgb = pipeline.create(dai.node.ColorCamera)
        detectionNetwork = pipeline.create(dai.node.YoloDetectionNetwork)
        objectTracker = pipeline.create(dai.node.ObjectTracker)
        #imageManip = pipeline.create(dai.node.ImageManip)  # Node for rotation
        sysLog = pipeline.create(dai.node.SystemLogger)
        # sysLog.setRate(1)  # 1 Hz
        
        linkOut = pipeline.create(dai.node.XLinkOut)
        xlinkOut = pipeline.create(dai.node.XLinkOut)
        trackerOut = pipeline.create(dai.node.XLinkOut)
        
        xlinkOut.setStreamName("preview")
        trackerOut.setStreamName("tracklets")
        linkOut.setStreamName("sysinfo")
        
        # Camera prop
        # camRgb.setVideoSize(1920, 1080)  # Use full frame to allow cropping from the top
        camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
        camRgb.setInterleaved(False)
        camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
        camRgb.setFps(30)
        
        """
        # Configure the ImageManip node to rotate the image 90 degrees clockwise
        imageManip.initialConfig.setRotationDegrees(90)
        # imageManip.initialConfig.setVerticalFlip(True)
        # imageManip.initialConfig.setHorizontalFlip(True)
        imageManip.inputImage.setBlocking(False)
        # imageManip.initialConfig.setCropRect(0, 0, 640, 640)  # Crop top 640x640
        imageManip.initialConfig.setKeepAspectRatio(False)  # Force squeeze instead of letterbox
        imageManip.setMaxOutputFrameSize(640 * 640 * 3)  # Ensure output size is correct
        imageManip.initialConfig.setResize(640, 640)  # Resize to match NN input
        # Linking nodes shebang
        camRgb.preview.link(imageManip.inputImage)  # Send camera preview to ImageManip
        imageManip.out.link(detectionNetwork.input)  # Pass rotated image to detection
        
        """
        # First ImageManip - Resizing
        image_manip = pipeline.create(dai.node.ImageManip)
        image_manip.initialConfig.setKeepAspectRatio(False)
        image_manip.initialConfig.setResize(640, 640)
        image_manip.setMaxOutputFrameSize(640 * 640 * 3)
        
        # Second ImageManip - Rotation
        manip2 = pipeline.create(dai.node.ImageManip)
        manip2.initialConfig.setRotationDegrees(90)
        manip2.setMaxOutputFrameSize(640 * 640 * 3)
        
        # Link the pipeline
        camRgb.preview.link(image_manip.inputImage)  # Camera → Resize
        image_manip.out.link(manip2.inputImage)      # Resize → Rotate
        manip2.out.link(detectionNetwork.input)      # Rotate → Neural Network
        
        detectionNetwork.setAnchorMasks({})
        detectionNetwork.setIouThreshold(0.3)
        
        detectionNetwork.passthrough.link(objectTracker.inputTrackerFrame)
        detectionNetwork.out.link(objectTracker.inputDetections)
        objectTracker.passthroughTrackerFrame.link(xlinkOut.input)
        objectTracker.out.link(trackerOut.input)
        
        sysLog.out.link(linkOut.input)
        
        # Use long-term object tracking for better accuracy
        objectTracker.setDetectionLabelsToTrack([0])  # Adjust label index for the target class
        objectTracker.setTrackerType(dai.TrackerType.SHORT_TERM_IMAGELESS)
        objectTracker.setTrackerIdAssignmentPolicy(dai.TrackerIdAssignmentPolicy.UNIQUE_ID)

        This doesn't have an effect at all on the image. Can you verify my implementation please?

        Can you perhaps assist me in another program.

        When attempting to run this script (with compiled .blob files for the object detector and embedder), I get this error:

        [2025-04-01 13:23:41.317] [depthai] [error] Callback with id: 1 throwed an exception: AttributeError: 'depthai.NNData' object has no attribute 'detections'
        
        At:
          /home/big-server/.local/lib/python3.10/site-packages/depthai_sdk/oak_outputs/xout/xout_nn.py(320): new_msg
          /home/big-server/.local/lib/python3.10/site-packages/depthai_sdk/oak_device.py(36): new_msg
          /home/big-server/.local/lib/python3.10/site-packages/depthai_sdk/oak_device.py(31): <lambda>

        Is this because there are no detections at that time? If so, how can I solve this?

        Thank you in advance.

          TheOracle

          #!/usr/bin/env python3
          
          from os import pipe
          import cv2
          import depthai as dai
          
          # Create DepthAI pipeline
          pipeline = dai.Pipeline()
          camRgb = pipeline.create(dai.node.ColorCamera)
          
          
          linkOut = pipeline.create(dai.node.XLinkOut)
          xlinkOut = pipeline.create(dai.node.XLinkOut)
          trackerOut = pipeline.create(dai.node.XLinkOut)
          
          xlinkOut.setStreamName("preview")
          trackerOut.setStreamName("tracklets")
          linkOut.setStreamName("sysinfo")
          
          # Camera prop
          # camRgb.setVideoSize(1920, 1080)  # Use full frame to allow cropping from the top
          camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
          camRgb.setImageOrientation()
          camRgb.setInterleaved(False)
          camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
          camRgb.setFps(30)
          
          """
          # Configure the ImageManip node to rotate the image 90 degrees clockwise
          imageManip.initialConfig.setRotationDegrees(90)
          # imageManip.initialConfig.setVerticalFlip(True)
          # imageManip.initialConfig.setHorizontalFlip(True)
          imageManip.inputImage.setBlocking(False)
          # imageManip.initialConfig.setCropRect(0, 0, 640, 640)  # Crop top 640x640
          imageManip.initialConfig.setKeepAspectRatio(False)  # Force squeeze instead of letterbox
          imageManip.setMaxOutputFrameSize(640 * 640 * 3)  # Ensure output size is correct
          imageManip.initialConfig.setResize(640, 640)  # Resize to match NN input
          # Linking nodes shebang
          camRgb.preview.link(imageManip.inputImage)  # Send camera preview to ImageManip
          imageManip.out.link(detectionNetwork.input)  # Pass rotated image to detection
          
          """
          # First ImageManip - Resizing
          image_manip = pipeline.create(dai.node.ImageManip)
          image_manip.initialConfig.setKeepAspectRatio(False)
          image_manip.initialConfig.setResize(640, 640)
          image_manip.setMaxOutputFrameSize(640 * 640 * 3)
          
          # Second ImageManip - Rotation
          manip2 = pipeline.create(dai.node.ImageManip)
          manip2.initialConfig.setRotationDegrees(90)
          manip2.setMaxOutputFrameSize(640 * 640 * 3)
          
          # Link the pipeline
          camRgb.preview.link(image_manip.inputImage)  # Camera → Resize
          image_manip.out.link(manip2.inputImage)      # Resize → Rotate
          
          
          
          
          
          manipRgbOut = pipeline.create(dai.node.XLinkOut)
          manipRgbOut.setStreamName("manip_rgb")
          
          manipLeftOut = pipeline.create(dai.node.XLinkOut)
          manipLeftOut.setStreamName("manip_left")
          manip2.out.link(manipLeftOut.input)
          
          with dai.Device(pipeline) as device:
              qLeft = device.getOutputQueue(name="manip_left", maxSize=8, blocking=False)
              qRgb = device.getOutputQueue(name="manip_rgb", maxSize=8, blocking=False)
          
              while True:
                  inLeft = qLeft.tryGet()
                  if inLeft is not None:
                      cv2.imshow('Left rotated', inLeft.getCvFrame())
          
                  inRgb = qRgb.tryGet()
                  if inRgb is not None:
                      cv2.imshow('Color rotated', inRgb.getCvFrame())
          
                  if cv2.waitKey(1) == ord('q'):
                      break

          this works on my side. Perhaps you are viewing the wrong image on the host?

          Thanks,
          Jaka