import altair as alt import numpy as np import pandas as pd import streamlit as st import cv2 from PIL import Image import torch from torchvision.transforms import Compose, Resize, ToTensor, Normalize from segment_anything import SamPredictor, sam_model_registry # Set Streamlit configuration st.set_page_config(page_title="Volume Estimator", layout="wide") st.title("📦 Volume Estimation using SAM Segmentation + MiDaS Depth") # Load SAM and MiDaS models @st.cache_resource def load_models(): sam_checkpoint = "C:/Users/Administrator/Desktop/streamlit_tl/models/sam_vit_h_4b8939.pth" sam = sam_model_registry["vit_h"](checkpoint=sam_checkpoint).to("cuda" if torch.cuda.is_available() else "cpu") predictor = SamPredictor(sam) midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large") midas.eval() midas_transform = Compose([ Resize(384), ToTensor(), Normalize(mean=[0.5]*3, std=[0.5]*3) ]) return predictor, midas, midas_transform predictor, midas_model, midas_transform = load_models() # Input source selection source_option = st.radio("Select input source", ("Upload Image", "Use Webcam")) uploaded_file = None image_pil = None if source_option == "Upload Image": uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) if uploaded_file: image_pil = Image.open(uploaded_file).convert("RGB") elif source_option == "Use Webcam": run_camera = st.checkbox("Start Camera") if run_camera: cap = cv2.VideoCapture(0) stframe = st.empty() capture = False while run_camera and cap.isOpened(): ret, frame = cap.read() if not ret: break frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) stframe.image(frame_rgb, caption="Live Camera Feed", channels="RGB") if st.button("📸 Capture Frame"): image_pil = Image.fromarray(frame_rgb) run_camera = False cap.release() break # Continue processing if we have an image if image_pil: image_np = np.array(image_pil) img_h, img_w = image_np.shape[:2] st.image(image_pil, caption="Selected Image", use_container_width=True) # Real-world reference dimensions real_image_width_cm = 100 real_image_height_cm = 75 assumed_max_depth_cm = 100 pixel_to_cm_x = real_image_width_cm / img_w pixel_to_cm_y = real_image_height_cm / img_h # SAM Segmentation predictor.set_image(image_np) masks, _, _ = predictor.predict(multimask_output=False) # MiDaS Depth Estimation input_tensor = midas_transform(image_pil).unsqueeze(0) with torch.no_grad(): depth_prediction = midas_model(input_tensor).squeeze().cpu().numpy() depth_resized = cv2.resize(depth_prediction, (img_w, img_h)) # Object volume computation volume_data = [] for i, mask in enumerate(masks): mask_np = mask x, y, w, h = cv2.boundingRect(mask_np.astype(np.uint8)) width_px = w height_px = h width_cm = width_px * pixel_to_cm_x height_cm = height_px * pixel_to_cm_y depth_masked = depth_resized[mask_np > 0.5] if depth_masked.size == 0: continue normalized_depth = (depth_masked - np.min(depth_resized)) / (np.max(depth_resized) - np.min(depth_resized) + 1e-6) depth_cm = np.mean(normalized_depth) * assumed_max_depth_cm volume_cm3 = round(depth_cm * width_cm * height_cm, 2) volume_data.append({ "Object": f"Object #{i+1}", "Length (Depth)": f"{round(depth_cm, 2)} cm", "Breadth (Width)": f"{round(width_cm, 2)} cm", "Height": f"{round(height_cm, 2)} cm", "Volume": f"{volume_cm3} cm³" }) # Display volume table if volume_data: df = pd.DataFrame(volume_data) st.markdown("### 📊 Object Dimensions and Volume") st.dataframe(df) csv = df.to_csv(index=False).encode('utf-8') st.download_button("📂 Download Volume Table as CSV", csv, "object_volumes_with_units.csv", "text/csv") else: st.warning("🚫 No objects were segmented.")