lakshmi082024 commited on
Commit
18d448a
ยท
verified ยท
1 Parent(s): 45d3d65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -129
app.py CHANGED
@@ -1,129 +1,126 @@
1
- import streamlit as st
2
- st.set_page_config(page_title="Object Volume Estimator", layout="wide")
3
-
4
- import cv2
5
- import torch
6
- import numpy as np
7
- from PIL import Image
8
- import pandas as pd
9
- from ultralytics import YOLO
10
- from torchvision.transforms import Compose, Resize, ToTensor, Normalize
11
-
12
- # Load models
13
- @st.cache_resource
14
- def load_models():
15
- yolo = YOLO("yolov8n.pt")
16
- midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large")
17
- midas.eval()
18
- transform = Compose([
19
- Resize(384),
20
- ToTensor(),
21
- Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
22
- ])
23
- return yolo, midas, transform
24
-
25
- yolo_model, midas_model, midas_transform = load_models()
26
-
27
- # --- Streamlit App ---
28
- st.title("Object Dimension & Volume Estimator")
29
-
30
- # ๐Ÿ“ธ Image Source Selection
31
- option = st.radio("Choose Image Source:", ("๐Ÿ“ค Upload Image", "๐Ÿ“ธ Use Camera"))
32
-
33
- # Mode Selection
34
- mode = st.selectbox("Select Image Type:", ["2D RGB Image", "RGB + Depth Image"])
35
-
36
- image_pil = None
37
-
38
- if option == "๐Ÿ“ค Upload Image":
39
- uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
40
- if uploaded_file:
41
- image_pil = Image.open(uploaded_file).convert("RGB")
42
-
43
- elif option == "๐Ÿ“ธ Use Camera":
44
- camera_input = st.camera_input("Take a picture")
45
- if camera_input:
46
- image_pil = Image.open(camera_input).convert("RGB")
47
-
48
- # Upload depth image if RGB + Depth selected
49
- depth_map = None
50
- if mode == "RGB + Depth Image" and image_pil is not None:
51
- depth_file = st.file_uploader("Upload corresponding Depth Map (grayscale image)", type=["png", "jpg", "jpeg", "tiff"])
52
- if depth_file:
53
- depth_map = np.array(Image.open(depth_file).convert("L"))
54
-
55
- # Proceed if we have an image
56
- if image_pil is not None:
57
- # Convert to OpenCV format
58
- image_cv = np.array(image_pil)
59
- image_cv = cv2.cvtColor(image_cv, cv2.COLOR_RGB2BGR)
60
- img_h, img_w = image_cv.shape[:2]
61
- st.image(image_pil, caption="Input Image", use_container_width=True)
62
-
63
- # Store detected object data
64
- object_data = []
65
-
66
- # ๐Ÿ” YOLO Object Detection
67
- results = yolo_model(image_cv)
68
-
69
- # ๐Ÿ“ Depth Estimation
70
- if mode == "2D RGB Image":
71
- input_tensor = midas_transform(image_pil).unsqueeze(0)
72
- with torch.no_grad():
73
- estimated_depth = midas_model(input_tensor).squeeze().cpu().numpy()
74
- depth_map_resized = cv2.resize(estimated_depth, (img_w, img_h))
75
- else:
76
- if depth_map is not None:
77
- depth_map_resized = cv2.resize(depth_map, (img_w, img_h))
78
- else:
79
- st.error("Please upload a corresponding depth map for 3D images.")
80
- st.stop()
81
-
82
- # ๐ŸŽฏ Process each detection
83
- for r in results:
84
- for box in r.boxes:
85
- x1, y1, x2, y2 = map(int, box.xyxy[0])
86
- width = x2 - x1
87
- height = y2 - y1
88
- region = depth_map_resized[y1:y2, x1:x2]
89
- if region.size == 0:
90
- continue
91
- depth = np.mean(region)
92
- volume = round(depth * width * height, 2)
93
-
94
- # Draw bounding box & label
95
- cv2.rectangle(image_cv, (x1, y1), (x2, y2), (0, 255, 0), 2)
96
- label = f"Lร—Bร—H: {depth:.2f}ร—{width}ร—{height} | V: {volume}"
97
- cv2.putText(image_cv, label, (x1, y1 - 10),
98
- cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2)
99
-
100
- # Store data for CSV
101
- object_data.append({
102
- "Length": round(depth, 2),
103
- "Breadth": int(width),
104
- "Height": int(height),
105
- "Volume": volume
106
- })
107
-
108
- # Show object details
109
- st.markdown(f"**๐Ÿง  Object Detected:**")
110
- st.write(f"๐Ÿ“ Length ร— Breadth ร— Height: {depth:.2f} ร— {width} ร— {height}")
111
- st.write(f"๐Ÿ“ฆ Estimated Volume: {volume} (relative unitsยณ)")
112
-
113
- # Show annotated image
114
- result_img = cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB)
115
- st.image(result_img, caption="Detected Dimensions", use_container_width=True)
116
-
117
- # Show table & CSV download if data exists
118
- if object_data:
119
- df = pd.DataFrame(object_data)
120
- st.markdown("### ๐Ÿ“‹ Detected Objects Table")
121
- st.dataframe(df)
122
-
123
- csv = df.to_csv(index=False).encode('utf-8')
124
- st.download_button(
125
- label="๐Ÿ“ฅ Download Results as CSV",
126
- data=csv,
127
- file_name='object_dimensions_volume.csv',
128
- mime='text/csv',
129
- )
 
1
+ import streamlit as st
2
+ import cv2
3
+ import numpy as np
4
+ import pandas as pd
5
+ from PIL import Image
6
+ import torch
7
+ from torchvision.transforms import Compose, Resize, ToTensor, Normalize
8
+ from segment_anything import SamPredictor, sam_model_registry
9
+
10
+ # Set Streamlit configuration
11
+ st.set_page_config(page_title="Volume Estimator", layout="wide")
12
+ st.title("๐Ÿ“ฆ Volume Estimation using SAM Segmentation + MiDaS Depth")
13
+
14
+ # Load SAM and MiDaS models
15
+ @st.cache_resource
16
+ def load_models():
17
+ sam_checkpoint = "C:/Users/Administrator/Desktop/streamlit_tl/models/sam_vit_h_4b8939.pth"
18
+ sam = sam_model_registry["vit_h"](checkpoint=sam_checkpoint).to("cuda" if torch.cuda.is_available() else "cpu")
19
+ predictor = SamPredictor(sam)
20
+
21
+ midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large")
22
+ midas.eval()
23
+ midas_transform = Compose([
24
+ Resize(384),
25
+ ToTensor(),
26
+ Normalize(mean=[0.5]*3, std=[0.5]*3)
27
+ ])
28
+ return predictor, midas, midas_transform
29
+
30
+ predictor, midas_model, midas_transform = load_models()
31
+
32
+ # Input source selection
33
+ source_option = st.radio("Select input source", ("Upload Image", "Use Webcam"))
34
+
35
+ uploaded_file = None
36
+ image_pil = None
37
+
38
+ if source_option == "Upload Image":
39
+ uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
40
+ if uploaded_file:
41
+ image_pil = Image.open(uploaded_file).convert("RGB")
42
+
43
+ elif source_option == "Use Webcam":
44
+ run_camera = st.checkbox("Start Camera")
45
+
46
+ if run_camera:
47
+ cap = cv2.VideoCapture(0)
48
+ stframe = st.empty()
49
+ capture = False
50
+
51
+ while run_camera and cap.isOpened():
52
+ ret, frame = cap.read()
53
+ if not ret:
54
+ break
55
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
56
+ stframe.image(frame_rgb, caption="Live Camera Feed", channels="RGB")
57
+
58
+ if st.button("๐Ÿ“ธ Capture Frame"):
59
+ image_pil = Image.fromarray(frame_rgb)
60
+ run_camera = False
61
+ cap.release()
62
+ break
63
+
64
+ # Continue processing if we have an image
65
+ if image_pil:
66
+ image_np = np.array(image_pil)
67
+ img_h, img_w = image_np.shape[:2]
68
+ st.image(image_pil, caption="Selected Image", use_container_width=True)
69
+
70
+ # Real-world reference dimensions
71
+ real_image_width_cm = 100
72
+ real_image_height_cm = 75
73
+ assumed_max_depth_cm = 100
74
+
75
+ pixel_to_cm_x = real_image_width_cm / img_w
76
+ pixel_to_cm_y = real_image_height_cm / img_h
77
+
78
+ # SAM Segmentation
79
+ predictor.set_image(image_np)
80
+ masks, _, _ = predictor.predict(multimask_output=False)
81
+
82
+ # MiDaS Depth Estimation
83
+ input_tensor = midas_transform(image_pil).unsqueeze(0)
84
+ with torch.no_grad():
85
+ depth_prediction = midas_model(input_tensor).squeeze().cpu().numpy()
86
+ depth_resized = cv2.resize(depth_prediction, (img_w, img_h))
87
+
88
+ # Object volume computation
89
+ volume_data = []
90
+ for i, mask in enumerate(masks):
91
+ mask_np = mask
92
+ x, y, w, h = cv2.boundingRect(mask_np.astype(np.uint8))
93
+ width_px = w
94
+ height_px = h
95
+
96
+ width_cm = width_px * pixel_to_cm_x
97
+ height_cm = height_px * pixel_to_cm_y
98
+
99
+ depth_masked = depth_resized[mask_np > 0.5]
100
+
101
+ if depth_masked.size == 0:
102
+ continue
103
+
104
+ normalized_depth = (depth_masked - np.min(depth_resized)) / (np.max(depth_resized) - np.min(depth_resized) + 1e-6)
105
+ depth_cm = np.mean(normalized_depth) * assumed_max_depth_cm
106
+
107
+ volume_cm3 = round(depth_cm * width_cm * height_cm, 2)
108
+
109
+ volume_data.append({
110
+ "Object": f"Object #{i+1}",
111
+ "Length (Depth)": f"{round(depth_cm, 2)} cm",
112
+ "Breadth (Width)": f"{round(width_cm, 2)} cm",
113
+ "Height": f"{round(height_cm, 2)} cm",
114
+ "Volume": f"{volume_cm3} cmยณ"
115
+ })
116
+
117
+ # Display volume table
118
+ if volume_data:
119
+ df = pd.DataFrame(volume_data)
120
+ st.markdown("### ๐Ÿ“Š Object Dimensions and Volume")
121
+ st.dataframe(df)
122
+
123
+ csv = df.to_csv(index=False).encode('utf-8')
124
+ st.download_button("๐Ÿ“‚ Download Volume Table as CSV", csv, "object_volumes_with_units.csv", "text/csv")
125
+ else:
126
+ st.warning("๐Ÿšซ No objects were segmented.")