Spaces:

lakshmi082024
/

sam_object

Runtime error

File size: 3,373 Bytes

import gradio as gr
import torch
import numpy as np
import cv2
from PIL import Image
import pandas as pd
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
from segment_anything import SamPredictor, sam_model_registry
import os

# Load SAM and MiDaS models
def load_models():
    sam_checkpoint = "sam_vit_b_01ec64.pth"
    if not os.path.exists(sam_checkpoint):
        raise FileNotFoundError("Please upload the SAM checkpoint file to the working directory.")

    device = "cuda" if torch.cuda.is_available() else "cpu"
    sam = sam_model_registry["vit_h"](checkpoint=sam_checkpoint).to(device)
    predictor = SamPredictor(sam)

    midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large")
    midas.eval().to(device)
    midas_transform = Compose([
        Resize(384),
        ToTensor(),
        Normalize(mean=[0.5]*3, std=[0.5]*3)
    ])
    return predictor, midas, midas_transform

predictor, midas_model, midas_transform = load_models()

# Processing function
def process_image(image_pil):
    image_np = np.array(image_pil)
    img_h, img_w = image_np.shape[:2]

    # Real-world reference dimensions (adjust as needed)
    real_image_width_cm = 100
    real_image_height_cm = 75
    assumed_max_depth_cm = 100

    pixel_to_cm_x = real_image_width_cm / img_w
    pixel_to_cm_y = real_image_height_cm / img_h

    # SAM segmentation
    predictor.set_image(image_np)
    masks, _, _ = predictor.predict(multimask_output=False)

    # MiDaS depth estimation
    input_tensor = midas_transform(image_pil).unsqueeze(0).to(next(midas_model.parameters()).device)
    with torch.no_grad():
        depth_prediction = midas_model(input_tensor).squeeze().cpu().numpy()
    depth_resized = cv2.resize(depth_prediction, (img_w, img_h))

    # Object volume computation
    volume_data = []
    for i, mask in enumerate(masks):
        x, y, w, h = cv2.boundingRect(mask.astype(np.uint8))
        width_px = w
        height_px = h
        width_cm = width_px * pixel_to_cm_x
        height_cm = height_px * pixel_to_cm_y

        depth_masked = depth_resized[mask > 0.5]
        if depth_masked.size == 0:
            continue

        normalized_depth = (depth_masked - np.min(depth_resized)) / (np.max(depth_resized) - np.min(depth_resized) + 1e-6)
        depth_cm = np.mean(normalized_depth) * assumed_max_depth_cm
        volume_cm3 = round(depth_cm * width_cm * height_cm, 2)

        volume_data.append([
            f"Object #{i+1}",
            round(depth_cm, 2),
            round(width_cm, 2),
            round(height_cm, 2),
            volume_cm3
        ])

    if not volume_data:
        return image_pil, "No objects segmented."

    df = pd.DataFrame(volume_data, columns=["Object", "Length (Depth) cm", "Breadth (Width) cm", "Height cm", "Volume cm³"])
    return image_pil, df

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# 📦 Volume Estimation using SAM + MiDaS")
    with gr.Row():
        image_input = gr.Image(type="pil", label="Upload Image")
        run_btn = gr.Button("Estimate Volume")
    with gr.Row():
        output_image = gr.Image(label="Original Image")
        volume_table = gr.Dataframe(headers=["Object", "Length (Depth) cm", "Breadth (Width) cm", "Height cm", "Volume cm³"])
    run_btn.click(fn=process_image, inputs=image_input, outputs=[output_image, volume_table])

demo.launch()