sam_object / app.py
lakshmi082024's picture
Update app.py
f48d218 verified
raw
history blame
3.37 kB
import gradio as gr
import torch
import numpy as np
import cv2
from PIL import Image
import pandas as pd
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
from segment_anything import SamPredictor, sam_model_registry
import os
# Load SAM and MiDaS models
def load_models():
sam_checkpoint = "sam_vit_b_01ec64.pth"
if not os.path.exists(sam_checkpoint):
raise FileNotFoundError("Please upload the SAM checkpoint file to the working directory.")
device = "cuda" if torch.cuda.is_available() else "cpu"
sam = sam_model_registry["vit_h"](checkpoint=sam_checkpoint).to(device)
predictor = SamPredictor(sam)
midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large")
midas.eval().to(device)
midas_transform = Compose([
Resize(384),
ToTensor(),
Normalize(mean=[0.5]*3, std=[0.5]*3)
])
return predictor, midas, midas_transform
predictor, midas_model, midas_transform = load_models()
# Processing function
def process_image(image_pil):
image_np = np.array(image_pil)
img_h, img_w = image_np.shape[:2]
# Real-world reference dimensions (adjust as needed)
real_image_width_cm = 100
real_image_height_cm = 75
assumed_max_depth_cm = 100
pixel_to_cm_x = real_image_width_cm / img_w
pixel_to_cm_y = real_image_height_cm / img_h
# SAM segmentation
predictor.set_image(image_np)
masks, _, _ = predictor.predict(multimask_output=False)
# MiDaS depth estimation
input_tensor = midas_transform(image_pil).unsqueeze(0).to(next(midas_model.parameters()).device)
with torch.no_grad():
depth_prediction = midas_model(input_tensor).squeeze().cpu().numpy()
depth_resized = cv2.resize(depth_prediction, (img_w, img_h))
# Object volume computation
volume_data = []
for i, mask in enumerate(masks):
x, y, w, h = cv2.boundingRect(mask.astype(np.uint8))
width_px = w
height_px = h
width_cm = width_px * pixel_to_cm_x
height_cm = height_px * pixel_to_cm_y
depth_masked = depth_resized[mask > 0.5]
if depth_masked.size == 0:
continue
normalized_depth = (depth_masked - np.min(depth_resized)) / (np.max(depth_resized) - np.min(depth_resized) + 1e-6)
depth_cm = np.mean(normalized_depth) * assumed_max_depth_cm
volume_cm3 = round(depth_cm * width_cm * height_cm, 2)
volume_data.append([
f"Object #{i+1}",
round(depth_cm, 2),
round(width_cm, 2),
round(height_cm, 2),
volume_cm3
])
if not volume_data:
return image_pil, "No objects segmented."
df = pd.DataFrame(volume_data, columns=["Object", "Length (Depth) cm", "Breadth (Width) cm", "Height cm", "Volume cm³"])
return image_pil, df
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# 📦 Volume Estimation using SAM + MiDaS")
with gr.Row():
image_input = gr.Image(type="pil", label="Upload Image")
run_btn = gr.Button("Estimate Volume")
with gr.Row():
output_image = gr.Image(label="Original Image")
volume_table = gr.Dataframe(headers=["Object", "Length (Depth) cm", "Breadth (Width) cm", "Height cm", "Volume cm³"])
run_btn.click(fn=process_image, inputs=image_input, outputs=[output_image, volume_table])
demo.launch()