Spaces:

lakshmi082024
/

Object_Dimension1

Running

App Files Files Community

Object_Dimension1 / app.py

lakshmi082024

Update app.py

cbb6bb4 verified 3 months ago

raw

history blame contribute delete

4.59 kB

	import streamlit as st
	import cv2
	import numpy as np
	import pandas as pd
	from PIL import Image
	import torch
	import timm
	from torchvision.transforms import Compose, Resize, ToTensor, Normalize
	from segment_anything import SamPredictor, sam_model_registry
	import requests
	import os

	# Streamlit configuration
	st.set_page_config(page_title="Volume Estimator", layout="wide")
	st.title("Volume Estimation using SAM Segmentation + MiDaS Depth")

	@st.cache_resource
	def load_models():
	import requests
	import os

	# ✅ Use Hugging Face public model file URL
	checkpoint_url = "https://huggingface.co/HCMUE-Research/SAM-vit-h/resolve/main/sam_vit_h_4b8939.pth"
	checkpoint_path = "sam_vit_h_4b8939.pth"

	# Download only if not already present
	if not os.path.exists(checkpoint_path):
	st.info("Downloading SAM model checkpoint...")
	response = requests.get(checkpoint_url)
	with open(checkpoint_path, "wb") as f:
	f.write(response.content)

	device = "cuda" if torch.cuda.is_available() else "cpu"
	sam = sam_model_registry["vit_h"](checkpoint=checkpoint_path).to(device)
	predictor = SamPredictor(sam)

	# Load MiDaS model
	midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large")
	midas.eval()
	midas_transform = Compose([
	Resize(384),
	ToTensor(),
	Normalize(mean=[0.5]3, std=[0.5]3)
	])
	return predictor, midas, midas_transform


	predictor, midas_model, midas_transform = load_models()

	# Input source selection
	source_option = st.radio("Select input source", ("Upload Image", "Use Webcam"))

	uploaded_file = None
	image_pil = None

	if source_option == "Upload Image":
	uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
	if uploaded_file:
	image_pil = Image.open(uploaded_file).convert("RGB")

	elif source_option == "Use Webcam":
	run_camera = st.checkbox("Start Camera")

	if run_camera:
	cap = cv2.VideoCapture(0)
	stframe = st.empty()

	while run_camera and cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	stframe.image(frame_rgb, caption="Live Camera Feed", channels="RGB")

	if st.button("Capture Frame"):
	image_pil = Image.fromarray(frame_rgb)
	cap.release()
	break

	# Continue only if an image is available
	if image_pil:
	image_np = np.array(image_pil)
	img_h, img_w = image_np.shape[:2]
	st.image(image_pil, caption="Selected Image", use_container_width=True)

	# Real-world reference dimensions
	real_image_width_cm = 100
	real_image_height_cm = 75
	assumed_max_depth_cm = 100

	pixel_to_cm_x = real_image_width_cm / img_w
	pixel_to_cm_y = real_image_height_cm / img_h

	# SAM Segmentation
	predictor.set_image(image_np)
	masks, _, _ = predictor.predict(multimask_output=False)

	# MiDaS Depth Estimation
	input_tensor = midas_transform(image_pil).unsqueeze(0)
	with torch.no_grad():
	depth_prediction = midas_model(input_tensor).squeeze().cpu().numpy()
	depth_resized = cv2.resize(depth_prediction, (img_w, img_h))

	# Compute object volumes
	volume_data = []
	for i, mask in enumerate(masks):
	mask_np = mask
	x, y, w, h = cv2.boundingRect(mask_np.astype(np.uint8))
	width_px = w
	height_px = h

	width_cm = width_px * pixel_to_cm_x
	height_cm = height_px * pixel_to_cm_y

	depth_masked = depth_resized[mask_np > 0.5]
	if depth_masked.size == 0:
	continue

	normalized_depth = (depth_masked - np.min(depth_resized)) / (np.max(depth_resized) - np.min(depth_resized) + 1e-6)
	depth_cm = np.mean(normalized_depth) * assumed_max_depth_cm

	volume_cm3 = round(depth_cm * width_cm * height_cm, 2)

	volume_data.append({
	"Object": f"Object #{i+1}",
	"Length (Depth)": f"{round(depth_cm, 2)} cm",
	"Breadth (Width)": f"{round(width_cm, 2)} cm",
	"Height": f"{round(height_cm, 2)} cm",
	"Volume": f"{volume_cm3} cm³"
	})

	# Display volume results
	if volume_data:
	df = pd.DataFrame(volume_data)
	st.markdown("### Object Dimensions and Volume")
	st.dataframe(df)

	csv = df.to_csv(index=False).encode('utf-8')
	st.download_button("Download Volume Table as CSV", csv, "object_volumes_with_units.csv", "text/csv")
	else:
	st.warning("No objects were segmented.")