File size: 4,592 Bytes
fd30198
 
 
 
 
 
cbb6bb4
fd30198
8567cb5
bce79a8
 
fd30198
bce79a8
fd30198
8567cb5
fd30198
 
 
30f7811
 
 
 
bce79a8
 
30f7811
 
bce79a8
30f7811
 
bce79a8
30f7811
bce79a8
 
 
fd30198
 
30f7811
fd30198
 
 
 
 
 
 
 
 
30f7811
fd30198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8567cb5
fd30198
 
 
 
bce79a8
fd30198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bce79a8
fd30198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bce79a8
fd30198
 
8567cb5
fd30198
 
 
8567cb5
fd30198
8567cb5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import streamlit as st
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import torch
import timm
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
from segment_anything import SamPredictor, sam_model_registry
import requests
import os

# Streamlit configuration
st.set_page_config(page_title="Volume Estimator", layout="wide")
st.title("Volume Estimation using SAM Segmentation + MiDaS Depth")

@st.cache_resource
def load_models():
    import requests
    import os

    # ✅ Use Hugging Face public model file URL
    checkpoint_url = "https://huggingface.co/HCMUE-Research/SAM-vit-h/resolve/main/sam_vit_h_4b8939.pth"
    checkpoint_path = "sam_vit_h_4b8939.pth"

    # Download only if not already present
    if not os.path.exists(checkpoint_path):
        st.info("Downloading SAM model checkpoint...")
        response = requests.get(checkpoint_url)
        with open(checkpoint_path, "wb") as f:
            f.write(response.content)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    sam = sam_model_registry["vit_h"](checkpoint=checkpoint_path).to(device)
    predictor = SamPredictor(sam)

    # Load MiDaS model
    midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large")
    midas.eval()
    midas_transform = Compose([
        Resize(384),
        ToTensor(),
        Normalize(mean=[0.5]*3, std=[0.5]*3)
    ])
    return predictor, midas, midas_transform


predictor, midas_model, midas_transform = load_models()

# Input source selection
source_option = st.radio("Select input source", ("Upload Image", "Use Webcam"))

uploaded_file = None
image_pil = None

if source_option == "Upload Image":
    uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
    if uploaded_file:
        image_pil = Image.open(uploaded_file).convert("RGB")

elif source_option == "Use Webcam":
    run_camera = st.checkbox("Start Camera")

    if run_camera:
        cap = cv2.VideoCapture(0)
        stframe = st.empty()

        while run_camera and cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            stframe.image(frame_rgb, caption="Live Camera Feed", channels="RGB")

            if st.button("Capture Frame"):
                image_pil = Image.fromarray(frame_rgb)
                cap.release()
                break

# Continue only if an image is available
if image_pil:
    image_np = np.array(image_pil)
    img_h, img_w = image_np.shape[:2]
    st.image(image_pil, caption="Selected Image", use_container_width=True)

    # Real-world reference dimensions
    real_image_width_cm = 100
    real_image_height_cm = 75
    assumed_max_depth_cm = 100

    pixel_to_cm_x = real_image_width_cm / img_w
    pixel_to_cm_y = real_image_height_cm / img_h

    # SAM Segmentation
    predictor.set_image(image_np)
    masks, _, _ = predictor.predict(multimask_output=False)

    # MiDaS Depth Estimation
    input_tensor = midas_transform(image_pil).unsqueeze(0)
    with torch.no_grad():
        depth_prediction = midas_model(input_tensor).squeeze().cpu().numpy()
    depth_resized = cv2.resize(depth_prediction, (img_w, img_h))

    # Compute object volumes
    volume_data = []
    for i, mask in enumerate(masks):
        mask_np = mask
        x, y, w, h = cv2.boundingRect(mask_np.astype(np.uint8))
        width_px = w
        height_px = h

        width_cm = width_px * pixel_to_cm_x
        height_cm = height_px * pixel_to_cm_y

        depth_masked = depth_resized[mask_np > 0.5]
        if depth_masked.size == 0:
            continue

        normalized_depth = (depth_masked - np.min(depth_resized)) / (np.max(depth_resized) - np.min(depth_resized) + 1e-6)
        depth_cm = np.mean(normalized_depth) * assumed_max_depth_cm

        volume_cm3 = round(depth_cm * width_cm * height_cm, 2)

        volume_data.append({
            "Object": f"Object #{i+1}",
            "Length (Depth)": f"{round(depth_cm, 2)} cm",
            "Breadth (Width)": f"{round(width_cm, 2)} cm",
            "Height": f"{round(height_cm, 2)} cm",
            "Volume": f"{volume_cm3} cm³"
        })

    # Display volume results
    if volume_data:
        df = pd.DataFrame(volume_data)
        st.markdown("### Object Dimensions and Volume")
        st.dataframe(df)

        csv = df.to_csv(index=False).encode('utf-8')
        st.download_button("Download Volume Table as CSV", csv, "object_volumes_with_units.csv", "text/csv")
    else:
        st.warning("No objects were segmented.")