Hi,

We're using a camera with the pipeline underneath. The camera runs fine for around 4 hours. Then its stops producing depth images and features.

```

pipeline = Pipeline()

color = pipeline.createColorCamera()

color.setBoardSocket(CameraBoardSocket.RGB)

color.setFps(10.0)

color.setResolution(ColorCameraProperties.SensorResolution.THE_4_K)

color.setVideoSize(parameters.color_video_width, parameters.color_video_height)

color.setInterleaved(False)

color.initialControl.setSharpness(parameters.sharpness)

color.initialControl.setLumaDenoise(parameters.luma_denoise)

color.initialControl.setChromaDenoise(parameters.chroma_denoise)

color.initialControl.setAutoFocusLensRange(parameters.focus_range_infinity_position, parameters.focus_range_macro_position)

color.initialControl.setAutoExposureLimit(parameters.exposure_limit_us)

# The detection model can't handle 10fps so we drop every 2nd frame and downsample to 5fps

color_downsample_script = pipeline.createScript()

color_downsample_script.setScript(create_downsample_script(DOWNSAMPLE_SCRIPT_STREAM_IN, DOWNSAMPLE_SCRIPT_STREAM_OUT, 2))

color_resize = pipeline.createImageManip()

color_resize.initialConfig.setResize(parameters.color_preview_width, parameters.color_preview_height)

color_resize.initialConfig.setFrameType(ImgFrame.Type.BGR888p)

# Sending high resolution images at 5fps takes too much bandwidth, so we downsample the high resolution frame rate to 1 frame every 4s.

hi_res_downsample_script = pipeline.createScript()

hi_res_downsample_script.setScript(create_downsample_script(DOWNSAMPLE_SCRIPT_STREAM_IN, DOWNSAMPLE_SCRIPT_STREAM_OUT, parameters.high_resolution_image_modulo))

hi_res_encoder = pipeline.createVideoEncoder()

hi_res_encoder.setProfile(VideoEncoderProperties.Profile.MJPEG)

hi_res_encoder.setQuality(parameters.high_resolution_image_quality)

left = pipeline.createMonoCamera()

left.setBoardSocket(CameraBoardSocket.LEFT)

left.setFps(10.0)

left.setResolution(MonoCameraProperties.SensorResolution.THE_400_P)

# left.initialControl.setManualExposure(100, 400)

# left.initialControl.setAutoExposureLock(True)

right = pipeline.createMonoCamera()

right.setBoardSocket(CameraBoardSocket.RIGHT)

right.setFps(10.0)

right.setResolution(MonoCameraProperties.SensorResolution.THE_400_P)

# right.initialControl.setManualExposure(100, 400)

# right.initialControl.setAutoExposureLock(True)

depth = pipeline.createStereoDepth()

# depth.setDepthAlign(CameraBoardSocket.RIGHT)

# depth.setExtendedDisparity(True)

depth.setLeftRightCheck(True)  # LR-check required for depth alignment

depth.initialConfig.setConfidenceThreshold(parameters.depth_confidence_threshold)

depth.initialConfig.setMedianFilter(StereoDepthProperties.MedianFilter.MEDIAN_OFF)

depth.initialConfig.setSubpixel(True)

depth.initialConfig.setBilateralFilterSigma(0)

depth.initialConfig.setSubpixelFractionalBits(5)

depth.initialConfig.setDisparityShift(30)

depth.initialConfig.setDepthUnit(RawStereoDepthConfig.AlgorithmControl.DepthUnit.CUSTOM)

depth_config = depth.initialConfig.get()

depth_config.algorithmControl.customDepthUnitMultiplier = parameters.depth_unit_multiplier

depth.initialConfig.set(depth_config)

depth.enableDistortionCorrection(True)

features = pipeline.createFeatureTracker()

features.setHardwareResources(numShaves=2, numMemorySlices=2)

model_name = parameters.detection_model_name

model_config_path = MODEL_FOLDER / (model_name + '.json')

with open(model_config_path) as fp:

    model_config = json.load(fp)

model_blob_path = MODEL_FOLDER / (model_name + '.blob')

labels = model_config['labels']

coordinate_size = model_config['coordinate_size']

anchors = model_config['anchors']

anchor_masks = model_config['anchor_masks']

iou_threshold = model_config['iou_threshold']

detector_confidence_threshold = model_config['confidence_threshold']

detection = pipeline.createYoloDetectionNetwork()

detection.setBlobPath(model_blob_path)

detection.setAnchors(anchors)

detection.setAnchorMasks(anchor_masks)

detection.setConfidenceThreshold(detector_confidence_threshold)

detection.setNumClasses(len(labels))

detection.setCoordinateSize(coordinate_size)

detection.setIouThreshold(iou_threshold)

detection.setNumInferenceThreads(2)

detection.input.setBlocking(False)

detection.input.setQueueSize(1)

control_in = pipeline.createXLinkIn()

color_out = pipeline.createXLinkOut()

hi_res_out = pipeline.createXLinkOut()

depth_out = pipeline.createXLinkOut()

features_out = pipeline.createXLinkOut()

detection_out = pipeline.createXLinkOut()

control_in.setStreamName(CONTROL_STREAM_NAME)

color_out.setStreamName(COLOR_STREAM_NAME)

hi_res_out.setStreamName(HI_RES_STREAM_NAME)

depth_out.setStreamName(DEPTH_RIGHT_STREAM_NAME)

features_out.setStreamName(FEATURES_STREAM_NAME)

detection_out.setStreamName(DETECTIONS_STREAM_NAME)

# Also tried without these 2 lines but it didnt make a difference.

depth_out.input.setBlocking(False)

depth_out.input.setQueueSize(1)

control_in.out.link(color.inputControl)

color.video.link(color_downsample_script.inputs[DOWNSAMPLE_SCRIPT_STREAM_IN])

color_downsample_script.outputs[DOWNSAMPLE_SCRIPT_STREAM_OUT].link(color_resize.inputImage)

color_downsample_script.outputs[DOWNSAMPLE_SCRIPT_STREAM_OUT].link(hi_res_downsample_script.inputs[DOWNSAMPLE_SCRIPT_STREAM_IN])

color_resize.out.link(color_out.input)

color_resize.out.link(detection.input)

hi_res_downsample_script.outputs[DOWNSAMPLE_SCRIPT_STREAM_OUT].link(hi_res_encoder.input)

hi_res_encoder.bitstream.link(hi_res_out.input)

left.out.link(depth.left)

right.out.link(depth.right)

depth.depth.link(depth_out.input)

depth.rectifiedRight.link(features.inputImage)

features.outputFeatures.link(features_out.input)

detection.out.link(detection_out.input)

```

and

```

device = Device(self.__pipeline, self.__device_info)

color_queue = device.getOutputQueue(name=COLOR_STREAM_NAME, maxSize=1, blocking=False)

hi_res_queue = device.getOutputQueue(name=HI_RES_STREAM_NAME, maxSize=1, blocking=False)

depth_queue = device.getOutputQueue(name=DEPTH_RIGHT_STREAM_NAME, maxSize=1, blocking=False)

features_queue = device.getOutputQueue(name=FEATURES_STREAM_NAME, maxSize=1, blocking=False)

detections_queue = device.getOutputQueue(name=DETECTIONS_STREAM_NAME, maxSize=1, blocking=False)

color_queue.addCallback(self.__get_and_publish_color_image)

hi_res_queue.addCallback(self.__get_and_publish_hi_res_image)

depth_queue.addCallback(self.__get_and_publish_depth_image)

detections_queue.addCallback(self.__get_and_publish_detections)

features_queue.addCallback(self.__get_and_publish_features)

```

We also collected soms logs (see attached file). No errors whatsoever. Around line 84215 the camera stops producing depth images and features. The logs then only show a drop in CPU and NOC ddr. Do you have any idea what could be wrong? Thanks in advance!

Having a hard time attaching the complete log file. So here some screen shots:

    WouterOddBot
    Very difficult to know what the exact issue is. My first guess would be that some node in the pipeline runs out of frames in a pool.. This causes the node to stop producing (processing) images.

    Check at which node this stops, then up the setNumFramesPool(num) for that node and nodes before that.

    Thanks,
    Jaka

    Hi @jakaskerl ,
    Ok thanks for the tip. We'll have to find a time where we can run the camera for a long time again to see if it makes a difference. In the mean time, it seems that the camera keeps running for 20h+ in a static environment, but only 4h when it's moving. Does that make sense to you?

      WouterOddBot
      I do - Encoder, detection and features will change the amount of messages or their size when movement is introduced.

      Encoder will lower information transfer when images are static because the information in an image stays the same. When moving, the entropy increases so more info is required.

      Detector will produce more detections when objects are in the scene. This potentially increases the processing required for following nodes since they have to process each detections separately.

      Same goes for features.

      Thanks,
      Jaka

      Ok so I increased numframespool for the left and right camera nodes and the depth node from 3 (default value) to 5, but it didn't make a difference.

      What information do we need to see what breaks the pipeline? And how can I retrieve that information from the camera?

      Apart from that, I don't remember seeing this until perhaps few months. And afaik we didn't make significant code changes to the affected part of the pipeline*. Could it be due to a depthai update? Since it only happens after 4h it feels a bit like a memory leak or something? At least I can check if we forgot to update any firmware.

      *We did make a significant change in the detection part of the pipeline. We used to have the color camera node run at 5fps and the left/right camera nodes at 10fps. But then the color and depth frames aren't closely synced. So now we have the color and left/right nodes run at 10fps but we downsample the color frames to 5fps (we drop every 2nd frame). Could that still make a difference?

        WouterOddBot

        WouterOddBot What information do we need to see what breaks the pipeline? And how can I retrieve that information from the camera?

        It's not currently reported unless you are looking at a crash dump which might give some additional info, but it's difficult to find the cause. We are working on making this logging better.

        WouterOddBot Could it be due to a depthai update? Since it only happens after 4h it feels a bit like a memory leak or something? At least I can check if we forgot to update any firmware.

        It does sound like a memory leak as well, perhaps you can try some older FW where all needed features are still present. Though I would say the change you have made could affect this as well. We have seen such crashes previously as well, but many turned out to be intel's fault (MyriadX memory issues).. Hope that is not the case here.

        Thanks,
        Jaka

        Ok so I made something that we can retrieve crash dumps.

        I suggested it may be a memory leak, but the logs don't show any changes or out of memory (see images attached). Perhaps it still something else?

        Tried retrieving a crash dump, but the camera doesn't make a crash dump when it stops working.

        4 days later

        @jakaskerl We suspect that this could relate to how fast the camera moves. Which makes sense according to your entropy story. But we'd still like to get a clue for a solution? Thanks in advance.

          WouterOddBot
          If that is the case and videoEncoder is the problem then you should check the bandwidth that is consumed by the pipeline.. How about you lower the quality to a minimum. That way, even when moving the difference in bitstream size should be negligible - or perhaps use setBitrate() to lock the size.

          Thanks,
          Jaka

            5 days later

            Hi @jakaskerl Can we perhaps schedule a meeting about this? We've been trying to fix this for a few weeks now, but we can't see what's going on. And our robots are relying on your cameras.

            I have a similar problems. I run a pipeline of three cameras (rgb, two monos), 10fps, 720p, sent to jpeg encoder, and the pipeline will crash in less than 20 minutes. It crashes when there are some movements in the scene. I got crash dumps most of times and sent one to the forum already. but no much of concrete response from Luxonisc. Basically all of the dumps mentioned the words: "thread, memory pointer, synchronization". I downed the pipeline to 6fps, same thing happened.

            I never expect things like these would happen.
            Please HELP. Luxonis.

            @jkchen46033 could you perhaps send over the crash dumps you are mentioning? We'd like to repro this locally so we can debug it.

              @WouterOddBot could you pehraps provide minimal repro example, so we can try to repro it locally? feel free to send it over to erik@luxonis.com and ill set it up locally & connect debugger to the camera. Please also mention depthai version, camera used, host computer/os used (perhaps relevant), cable/powering specifics, etc. just so we can have as close to 1:1 repro as possible.
              Thanks, Erik

                Hi jakaskerl
                We can lower the quality of the hi res images but the camera sends only very few of them. Most of them are dismissed. So they should not take too much bandwidth?

                erik Here is the dump. Let me know if you need additional info. Thanks a lot for your help.

                My pipeline setup this way: poe, 3 lens, 3 jpeg encoder, all 10fps, two monos 720p, rgb 1080p, jpeg to script node, pumping out urls: /mjpeg, /left, /right, all in mjpeg multipart frames.

                This dump is produced today.

                {"crashReports":[{"crashedThreadId":167837697,"errorSource":"RTEMS_FATAL_SOURCE_EXCEPTION","errorSourceInfo":{"assertContext":{"fileName":"","functionName":"","line":0},"errorId":0,"trapContext":{"trapAddress":2204218472,"trapName":"Bad trap","trapNumber":9}},"processor":0,"threadCallstack":[{"callStack":[{"callSite":2200385356,"calledTarget":2200381832,"context":"","framePointer":2211279768},{"callSite":2205098404,"calledTarget":0,"context":"Thread handler","framePointer":2211279864},{"callSite":1881154788,"calledTarget":0,"context":"Thread handler","framePointer":2211279960},{"callSite":2205098556,"calledTarget":1881154720,"context":"","framePointer":2211280080},{"callSite":2204985004,"calledTarget":2205098536,"context":"","framePointer":2211280184},{"callSite":2205931196,"calledTarget":0,"context":"Thread handler","framePointer":2211280288},{"callSite":2204298320,"calledTarget":0,"context":"Thread handler","framePointer":2213073296},{"callSite":2204221036,"calledTarget":2204218436,"context":"","framePointer":2213073592},{"callSite":2204208956,"calledTarget":0,"context":"Thread handler","framePointer":2213073864},{"callSite":2204201828,"calledTarget":0,"context":"Thread handler","framePointer":2213074032},{"callSite":2204165556,"calledTarget":0,"context":"Thread handler","framePointer":2213074272},{"callSite":2204064028,"calledTarget":0,"context":"Thread handler","framePointer":2213074392},{"callSite":2204052024,"calledTarget":2204064100,"context":"","framePointer":2213074488},{"callSite":2204054424,"calledTarget":2204051528,"context":"","framePointer":2213074584},{"callSite":2204064028,"calledTarget":0,"context":"Thread handler","framePointer":2213074680},{"callSite":2204050908,"calledTarget":2204064100,"context":"","framePointer":2213074776},{"callSite":2200546904,"calledTarget":0,"context":"Thread handler","framePointer":2213074872},{"callSite":2200707600,"calledTarget":2200546064,"context":"","framePointer":2213074984},{"callSite":2200964480,"calledTarget":0,"context":"Thread handler","framePointer":2213075080},{"callSite":2200973128,"calledTarget":0,"context":"Thread handler","framePointer":2213075176},{"callSite":167837698,"calledTarget":1195661140,"context":"Thread exit","framePointer":0}],"instructionPointer":2200385356,"stackBottom":2212944504,"stackPointer":2211279768,"stackTop":2213075575,"threadId":167837697,"threadName":"CBTH","threadStatus":"READY"},{"callStack":[{"callSite":2205067204,"calledTarget":1881154236,"context":"","framePointer":2213731360},{"callSite":2205067016,"calledTarget":2205067040,"context":"","framePointer":2213731464},{"callSite":2201052240,"calledTarget":2205066912,"context":"","framePointer":2213731584},{"callSite":2205091048,"calledTarget":0,"context":"Thread handler","framePointer":2213731688},{"callSite":1881154668,"calledTarget":0,"context":"Thread handler","framePointer":2213731784},{"callSite":1881154572,"calledTarget":0,"context":"Thread exit","framePointer":2213731880}],"instructionPointer":2205067204,"stackBottom":2213600904,"stackPointer":2213731360,"stackTop":2213731975,"threadId":167837698,"threadName":"TWDG","threadStatus":"WAITING_FOR_EVENT"},{"callStack":[{"callSite":2205067204,"calledTarget":1881154236,"context":"","framePointer":2214125496},{"callSite":2205072104,"calledTarget":2205067040,"context":"","framePointer":2214125600},{"callSite":2205075072,"calledTarget":2205071996,"context":"","framePointer":2214125720},{"callSite":2205091048,"calledTarget":0,"context":"Thread handler","framePointer":2214125824},{"callSite":1881154668,"calledTarget":0,"context":"Thread handler","framePointer":2214125920},{"callSite":1881154572,"calledTarget":0,"context":"Thread exit","framePointer":2214126016}],"instructionPointer":2205067204,"stackBottom":2213995040,"stackPointer":2214125496,"stackTop":2214126111,"threadId":167837699,"threadName":"TIME","threadStatus":"WAITING_FOR_SYSTEM_EVENT"},{"callStack":[{"callSite":2205067204,"calledTarget":1881154236,"context":"","framePointer":2214256720},{"callSite":2205072104,"calledTarget":2205067040,"context":"","framePointer":2214256824},{"callSite":2204990304,"calledTarget":2205071996,"context":"","framePointer":2214256944},{"callSite":2205091048,"calledTarget":0,"context":"Thread handler","framePointer":2214257048},{"callSite":1881154668,"calledTarget":0,"context":"Thread handler","framePointer":2214257144},{"callSite":1881154572,"calledTarget":0,"context":"Thread exit","framePointer":2214257240}],"instructionPointer":2205067204,"stackBottom":2214126264,"stackPointer":2214256720,"stackTop":2214257335,"threadId":167837700,"threadName":"IRQS","threadStatus":"WAITING_FOR_SYSTEM_EVENT"},{"callStack":[{"callSite":2205067204,"calledTarget":1881154236,"context":"","framePointer":2214387944},{"callSite":2205067016,"calledTarget":2205067040,"context":"","framePointer":2214388048},{"callSite":2204647304,"calledTarget":2205066912,"context":"","framePointer":2214388168},{"callSite":2205091048,"calledTarget":0,"context":"Thread handler","framePointer":2214388272},{"callSite":1881154668,"calledTarget":0,"context":"Thread handler","framePointer":2214388368},{"callSite":1881154572,"calledTarget":0,"context":"Thread exit","framePointer":2214388464}],"instructionPointer":2205067204,"stackBottom":2214257488,"stackPointer":2214387944,"stackTop":2214388559,"threadId":167837701,"threadName":"swi6","threadStatus":"WAITING_FOR_EVENT"},{"callStack":[{"callSite":2203950260,"calledTarget":1881154236,"context":"","framePointer":2214519176},{"callSite":2203893028,"calledTarget":2203950744,"context":"","framePointer":2214519280},{"callSite":2204655088,"calledTarget":2203892796,"context":"","framePointer":2214519376},{"callSite":2205091048,"calledTarget":0,"context":"Thread handler","framePointer":2214519496},{"callSite":1881154668,"calledTarget":0,"context":"Thread handler","framePointer":2214519592},{"callSite":1881154572,"calledTarget":0,"context":"Thread exit","framePointer":2214519688}],"instructionPointer":2203950260,"stackBottom":2214388712,"stackPointer":2214519176,"stackTop":2214519783,"threadId":167837702,"threadName":"config","threadStatus":"WAITING_FOR_BSD_WAKEUP"},{"callStack":[{"callSite":2203950260,"calledTarget":1881154236,"context":"","framePointer":2214650400},{"callSite":2203893028,"calledTarget":2203950744,"context":"","framePointer":2214650504},{"callSite":2203951260,"calledTarget":2203892796,"context":"","framePointer":2214650600},{"callSite":2205091048,"calledTarget":0,"context":"Thread handler","framePointer":2214650720},{"callSite":1881154668,"calledTarget":0,"context":"Thread handler","framePointer":2214650816},{"callSite":1881154572,"calledTarget":0,"context":"Thread exit","framePointer":2214650912}],"instructionPointer":2203950260,"stackBottom":2214519936,"stackPointer":2214650400,"stackTop":2214651007,"threadId":167837703,"threadName":"kqueue","threadStatus":"WAITING_FOR_BSD_WAKEUP"},{"callStack":[{"callSite":2205067204,"calledTarget":1881154236,"context":"","framePointer":2214781616},{"callSite":2205067016,"calledTarget":2205067040,"context":"","framePointer":2214781720},{"callSite":2204647304,"calledTarget":2205066912,"context":"","framePointer":2214781840},{"callSite":2205091048,"calledTarget":0,"context":"Thread handler","framePointer":2214781944},{"callSite":1881154668,"calledTarget":0,"context":"Thread handler","framePointer":2214782040},{"callSite":1881154572,"calledTarget":0,"context":"Thread exit","framePointer":2214782136}],"instructionPointer":2205067204,"stackBottom":2214651160,"stackPointer":2214781616,"stackTop":2214782231,"threadId":167837704,"threadName":"swi5","threadStatus":"WAITING_FOR_EVENT"},{"callStack":[{"callSite":2203950260,"calledTarget":1881154236,"context":"","framePointer":2214912848},{"callSite":2203893028,"calledTarget":2203950
                "crash_dump.json" [Incomplete last line] 1 line, 94228 bytes

                • erik replied to this.

                  jkchen46033 could you please upload the json to gdrive and send the link? as your reply is incomplete