Hi @daniqsilva
I'm not really too familiar with decoding but try this (looks sensible to me):
import numpy as np
import cv2
def upsample_and_combine(outputs, img_width, img_height, num_classes):
# Upsample each output to the original image size
upsampled_outputs = [cv2.resize(output, (img_width, img_height), interpolation=cv2.INTER_NEAREST) for output in outputs]
# Combine the upsampled outputs
# This example simply averages the outputs, but other techniques could be applied
combined_output = np.mean(upsampled_outputs, axis=0)
return combined_output
def decode_segmentation(output, img_width, img_height, num_classes):
class_predictions = np.argmax(output, axis=-1)
segmentation_map = np.zeros((output.shape[0], output.shape[1], 3), dtype=np.uint8)
colors = np.random.randint(0, 255, size=(num_classes, 3), dtype=np.uint8)
for class_id in range(num_classes):
segmentation_map[class_predictions == class_id] = colors[class_id]
segmentation_map_resized = cv2.resize(segmentation_map, (img_width, img_height), interpolation=cv2.INTER_NEAREST)
return segmentation_map_resized
# Assuming outputs is a list of the three outputs from YOLOv5-seg, each being a (H, W, C) tensor
outputs = [output1, output2, output3] # Placeholder for actual model outputs
num_classes = 20 # Example number of classes
img_width, img_height = 1280, 720 # Example dimensions
# Process the outputs
combined_output = upsample_and_combine(outputs, img_width, img_height, num_classes)
segmentation_map = decode_segmentation(combined_output, img_width, img_height, num_classes)
# segmentation_map now holds the final segmentation result
Thanks,
Jaka