Spaces:

lakshmi082024
/

sam_object

Runtime error

App Files Files Community

sam_object / app.py

lakshmi082024

Update app.py

921336f verified 7 months ago

raw

history blame

3.42 kB

	import gradio as gr
	import torch
	import numpy as np
	import cv2
	from PIL import Image
	import pandas as pd
	from torchvision.transforms import Compose, Resize, ToTensor, Normalize
	from segment_anything import SamPredictor, sam_model_registry
	import os

	# Load SAM and MiDaS models
	def load_models():
	sam_checkpoint = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth"
	if not os.path.exists(sam_checkpoint):
	raise FileNotFoundError("Please upload the SAM checkpoint file to the working directory.")

	device = "cuda" if torch.cuda.is_available() else "cpu"
	sam = sam_model_registry["vit_h"](checkpoint=sam_checkpoint).to(device)
	predictor = SamPredictor(sam)

	midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large")
	midas.eval().to(device)
	midas_transform = Compose([
	Resize(384),
	ToTensor(),
	Normalize(mean=[0.5]3, std=[0.5]3)
	])
	return predictor, midas, midas_transform

	predictor, midas_model, midas_transform = load_models()

	# Processing function
	def process_image(image_pil):
	image_np = np.array(image_pil)
	img_h, img_w = image_np.shape[:2]

	# Real-world reference dimensions (adjust as needed)
	real_image_width_cm = 100
	real_image_height_cm = 75
	assumed_max_depth_cm = 100

	pixel_to_cm_x = real_image_width_cm / img_w
	pixel_to_cm_y = real_image_height_cm / img_h

	# SAM segmentation
	predictor.set_image(image_np)
	masks, _, _ = predictor.predict(multimask_output=False)

	# MiDaS depth estimation
	input_tensor = midas_transform(image_pil).unsqueeze(0).to(next(midas_model.parameters()).device)
	with torch.no_grad():
	depth_prediction = midas_model(input_tensor).squeeze().cpu().numpy()
	depth_resized = cv2.resize(depth_prediction, (img_w, img_h))

	# Object volume computation
	volume_data = []
	for i, mask in enumerate(masks):
	x, y, w, h = cv2.boundingRect(mask.astype(np.uint8))
	width_px = w
	height_px = h
	width_cm = width_px * pixel_to_cm_x
	height_cm = height_px * pixel_to_cm_y

	depth_masked = depth_resized[mask > 0.5]
	if depth_masked.size == 0:
	continue

	normalized_depth = (depth_masked - np.min(depth_resized)) / (np.max(depth_resized) - np.min(depth_resized) + 1e-6)
	depth_cm = np.mean(normalized_depth) * assumed_max_depth_cm
	volume_cm3 = round(depth_cm * width_cm * height_cm, 2)

	volume_data.append([
	f"Object #{i+1}",
	round(depth_cm, 2),
	round(width_cm, 2),
	round(height_cm, 2),
	volume_cm3
	])

	if not volume_data:
	return image_pil, "No objects segmented."

	df = pd.DataFrame(volume_data, columns=["Object", "Length (Depth) cm", "Breadth (Width) cm", "Height cm", "Volume cm³"])
	return image_pil, df

	# Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("# 📦 Volume Estimation using SAM + MiDaS")
	with gr.Row():
	image_input = gr.Image(type="pil", label="Upload Image")
	run_btn = gr.Button("Estimate Volume")
	with gr.Row():
	output_image = gr.Image(label="Original Image")
	volume_table = gr.Dataframe(headers=["Object", "Length (Depth) cm", "Breadth (Width) cm", "Height cm", "Volume cm³"])
	run_btn.click(fn=process_image, inputs=image_input, outputs=[output_image, volume_table])

	demo.launch()