File size: 4,592 Bytes
fd30198 cbb6bb4 fd30198 8567cb5 bce79a8 fd30198 bce79a8 fd30198 8567cb5 fd30198 30f7811 bce79a8 30f7811 bce79a8 30f7811 bce79a8 30f7811 bce79a8 fd30198 30f7811 fd30198 30f7811 fd30198 8567cb5 fd30198 bce79a8 fd30198 bce79a8 fd30198 bce79a8 fd30198 8567cb5 fd30198 8567cb5 fd30198 8567cb5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import streamlit as st
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import torch
import timm
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
from segment_anything import SamPredictor, sam_model_registry
import requests
import os
# Streamlit configuration
st.set_page_config(page_title="Volume Estimator", layout="wide")
st.title("Volume Estimation using SAM Segmentation + MiDaS Depth")
@st.cache_resource
def load_models():
import requests
import os
# ✅ Use Hugging Face public model file URL
checkpoint_url = "https://huggingface.co/HCMUE-Research/SAM-vit-h/resolve/main/sam_vit_h_4b8939.pth"
checkpoint_path = "sam_vit_h_4b8939.pth"
# Download only if not already present
if not os.path.exists(checkpoint_path):
st.info("Downloading SAM model checkpoint...")
response = requests.get(checkpoint_url)
with open(checkpoint_path, "wb") as f:
f.write(response.content)
device = "cuda" if torch.cuda.is_available() else "cpu"
sam = sam_model_registry["vit_h"](checkpoint=checkpoint_path).to(device)
predictor = SamPredictor(sam)
# Load MiDaS model
midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large")
midas.eval()
midas_transform = Compose([
Resize(384),
ToTensor(),
Normalize(mean=[0.5]*3, std=[0.5]*3)
])
return predictor, midas, midas_transform
predictor, midas_model, midas_transform = load_models()
# Input source selection
source_option = st.radio("Select input source", ("Upload Image", "Use Webcam"))
uploaded_file = None
image_pil = None
if source_option == "Upload Image":
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
if uploaded_file:
image_pil = Image.open(uploaded_file).convert("RGB")
elif source_option == "Use Webcam":
run_camera = st.checkbox("Start Camera")
if run_camera:
cap = cv2.VideoCapture(0)
stframe = st.empty()
while run_camera and cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
stframe.image(frame_rgb, caption="Live Camera Feed", channels="RGB")
if st.button("Capture Frame"):
image_pil = Image.fromarray(frame_rgb)
cap.release()
break
# Continue only if an image is available
if image_pil:
image_np = np.array(image_pil)
img_h, img_w = image_np.shape[:2]
st.image(image_pil, caption="Selected Image", use_container_width=True)
# Real-world reference dimensions
real_image_width_cm = 100
real_image_height_cm = 75
assumed_max_depth_cm = 100
pixel_to_cm_x = real_image_width_cm / img_w
pixel_to_cm_y = real_image_height_cm / img_h
# SAM Segmentation
predictor.set_image(image_np)
masks, _, _ = predictor.predict(multimask_output=False)
# MiDaS Depth Estimation
input_tensor = midas_transform(image_pil).unsqueeze(0)
with torch.no_grad():
depth_prediction = midas_model(input_tensor).squeeze().cpu().numpy()
depth_resized = cv2.resize(depth_prediction, (img_w, img_h))
# Compute object volumes
volume_data = []
for i, mask in enumerate(masks):
mask_np = mask
x, y, w, h = cv2.boundingRect(mask_np.astype(np.uint8))
width_px = w
height_px = h
width_cm = width_px * pixel_to_cm_x
height_cm = height_px * pixel_to_cm_y
depth_masked = depth_resized[mask_np > 0.5]
if depth_masked.size == 0:
continue
normalized_depth = (depth_masked - np.min(depth_resized)) / (np.max(depth_resized) - np.min(depth_resized) + 1e-6)
depth_cm = np.mean(normalized_depth) * assumed_max_depth_cm
volume_cm3 = round(depth_cm * width_cm * height_cm, 2)
volume_data.append({
"Object": f"Object #{i+1}",
"Length (Depth)": f"{round(depth_cm, 2)} cm",
"Breadth (Width)": f"{round(width_cm, 2)} cm",
"Height": f"{round(height_cm, 2)} cm",
"Volume": f"{volume_cm3} cm³"
})
# Display volume results
if volume_data:
df = pd.DataFrame(volume_data)
st.markdown("### Object Dimensions and Volume")
st.dataframe(df)
csv = df.to_csv(index=False).encode('utf-8')
st.download_button("Download Volume Table as CSV", csv, "object_volumes_with_units.csv", "text/csv")
else:
st.warning("No objects were segmented.")
|