File size: 4,561 Bytes
45d3d65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import streamlit as st
st.set_page_config(page_title="Object Volume Estimator", layout="wide")

import cv2
import torch
import numpy as np
from PIL import Image
import pandas as pd
from ultralytics import YOLO
from torchvision.transforms import Compose, Resize, ToTensor, Normalize

# Load models
@st.cache_resource
def load_models():
    yolo = YOLO("yolov8n.pt")
    midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large")
    midas.eval()
    transform = Compose([
        Resize(384),
        ToTensor(),
        Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    return yolo, midas, transform

yolo_model, midas_model, midas_transform = load_models()

# --- Streamlit App ---
st.title("Object Dimension & Volume Estimator")

# πŸ“Έ Image Source Selection
option = st.radio("Choose Image Source:", ("πŸ“€ Upload Image", "πŸ“Έ Use Camera"))

# Mode Selection
mode = st.selectbox("Select Image Type:", ["2D RGB Image", "RGB + Depth Image"])

image_pil = None

if option == "πŸ“€ Upload Image":
    uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
    if uploaded_file:
        image_pil = Image.open(uploaded_file).convert("RGB")

elif option == "πŸ“Έ Use Camera":
    camera_input = st.camera_input("Take a picture")
    if camera_input:
        image_pil = Image.open(camera_input).convert("RGB")

# Upload depth image if RGB + Depth selected
depth_map = None
if mode == "RGB + Depth Image" and image_pil is not None:
    depth_file = st.file_uploader("Upload corresponding Depth Map (grayscale image)", type=["png", "jpg", "jpeg", "tiff"])
    if depth_file:
        depth_map = np.array(Image.open(depth_file).convert("L"))

# Proceed if we have an image
if image_pil is not None:
    # Convert to OpenCV format
    image_cv = np.array(image_pil)
    image_cv = cv2.cvtColor(image_cv, cv2.COLOR_RGB2BGR)
    img_h, img_w = image_cv.shape[:2]
    st.image(image_pil, caption="Input Image", use_container_width=True)

    # Store detected object data
    object_data = []

    # πŸ” YOLO Object Detection
    results = yolo_model(image_cv)

    # πŸ“ Depth Estimation
    if mode == "2D RGB Image":
        input_tensor = midas_transform(image_pil).unsqueeze(0)
        with torch.no_grad():
            estimated_depth = midas_model(input_tensor).squeeze().cpu().numpy()
        depth_map_resized = cv2.resize(estimated_depth, (img_w, img_h))
    else:
        if depth_map is not None:
            depth_map_resized = cv2.resize(depth_map, (img_w, img_h))
        else:
            st.error("Please upload a corresponding depth map for 3D images.")
            st.stop()

    # 🎯 Process each detection
    for r in results:
        for box in r.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            width = x2 - x1
            height = y2 - y1
            region = depth_map_resized[y1:y2, x1:x2]
            if region.size == 0:
                continue
            depth = np.mean(region)
            volume = round(depth * width * height, 2)

            # Draw bounding box & label
            cv2.rectangle(image_cv, (x1, y1), (x2, y2), (0, 255, 0), 2)
            label = f"LΓ—BΓ—H: {depth:.2f}Γ—{width}Γ—{height} | V: {volume}"
            cv2.putText(image_cv, label, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2)

            # Store data for CSV
            object_data.append({
                "Length": round(depth, 2),
                "Breadth": int(width),
                "Height": int(height),
                "Volume": volume
            })

            # Show object details
            st.markdown(f"**🧠 Object Detected:**")
            st.write(f"πŸ“ Length Γ— Breadth Γ— Height: {depth:.2f} Γ— {width} Γ— {height}")
            st.write(f"πŸ“¦ Estimated Volume: {volume} (relative unitsΒ³)")

    # Show annotated image
    result_img = cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB)
    st.image(result_img, caption="Detected Dimensions", use_container_width=True)

    # Show table & CSV download if data exists
    if object_data:
        df = pd.DataFrame(object_data)
        st.markdown("### πŸ“‹ Detected Objects Table")
        st.dataframe(df)

        csv = df.to_csv(index=False).encode('utf-8')
        st.download_button(
            label="πŸ“₯ Download Results as CSV",
            data=csv,
            file_name='object_dimensions_volume.csv',
            mime='text/csv',
        )