Simultaneous-Segmented-Depth-Prediction

Sleeping

File size: 4,390 Bytes

import cv2
import torch
import numpy as np
import time
from midas.model_loader import default_models, load_model
import os
import urllib.request
import spaces

MODEL_FILE_URL = {
    "midas_v21_small_256" : "https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_small_256.pt",
    "dpt_hybrid_384" : "https://github.com/isl-org/MiDaS/releases/download/v3/dpt_hybrid_384.pt",
    "dpt_large_384" : "https://github.com/isl-org/MiDaS/releases/download/v3/dpt_large_384.pt",
    "dpt_swin2_large_384" : "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_swin2_large_384.pt",
    "dpt_beit_large_512" : "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_512.pt",  
}

class MonocularDepthEstimator:
    def __init__(self,
        model_type="midas_v21_small_256",
        model_weights_path="models/", 
        optimize=False, 
        side_by_side=False, 
        height=None, 
        square=False, 
        grayscale=False):
        
        # Don't initialize any CUDA/GPU stuff here
        self.model_type = model_type
        self.model_weights_path = model_weights_path
        self.is_optimize = optimize
        self.is_square = square
        self.is_grayscale = grayscale
        self.height = height
        self.side_by_side = side_by_side
        self.model = None
        self.transform = None
        self.net_w = None
        self.net_h = None

        print("Initializing parameters...")
        if not os.path.exists(model_weights_path+model_type+".pt"):
            print("Model file not found. Downloading...")
            urllib.request.urlretrieve(MODEL_FILE_URL[model_type], model_weights_path+model_type+".pt")
            print("Model file downloaded successfully.")

    @spaces.GPU
    def load_model_if_needed(self):
        if self.model is None:
            print("Loading MiDaS model...")
            self.model, self.transform, self.net_w, self.net_h = load_model(
                'cuda',
                self.model_weights_path + self.model_type + ".pt",
                self.model_type,
                self.is_optimize,
                self.height,
                self.is_square
            )
            print("Model loaded successfully")

    @spaces.GPU
    def predict(self, image, target_size):
        self.load_model_if_needed()
        img_tensor = torch.from_numpy(image).to('cuda').unsqueeze(0)

        if self.is_optimize:
            img_tensor = img_tensor.to(memory_format=torch.channels_last)
            img_tensor = img_tensor.half()
        
        with torch.no_grad():
            prediction = self.model.forward(img_tensor)
            prediction = (
                torch.nn.functional.interpolate(
                    prediction.unsqueeze(1),
                    size=target_size[::-1],
                    mode="bicubic",
                    align_corners=False,
                )
                .squeeze()
                .cpu()
                .numpy()
            )

        return prediction

    def process_prediction(self, depth_map):
        depth_min = depth_map.min()
        depth_max = depth_map.max()
        normalized_depth = 255 * (depth_map - depth_min) / (depth_max - depth_min)
        grayscale_depthmap = np.repeat(np.expand_dims(normalized_depth, 2), 3, axis=2)
        depth_colormap = cv2.applyColorMap(np.uint8(grayscale_depthmap), cv2.COLORMAP_INFERNO)  
        return normalized_depth/255, depth_colormap/255

    @spaces.GPU
    def make_prediction(self, image):
        try:
            print("Starting depth estimation...")
            image = image.copy()
            original_image_rgb = np.flip(image, 2)
            self.load_model_if_needed()
            image_tranformed = self.transform({"image": original_image_rgb/255})["image"]
            pred = self.predict(image_tranformed, target_size=original_image_rgb.shape[1::-1]) 
            depthmap, depth_colormap = self.process_prediction(pred)
            print("Depth estimation complete")
            return depthmap, depth_colormap
        except Exception as e:
            print(f"Error in make_prediction: {str(e)}")
            import traceback
            print(traceback.format_exc())
            raise
            
    if __name__ == "__main__":
        depth_estimator = MonocularDepthEstimator(model_type="dpt_hybrid_384")
        depth_estimator.run("assets/videos/testvideo2.mp4")