import streamlit as st st.set_page_config(page_title="Object Volume Estimator", layout="wide") import cv2 import torch import numpy as np from PIL import Image import pandas as pd from ultralytics import YOLO from torchvision.transforms import Compose, Resize, ToTensor, Normalize # Load models @st.cache_resource def load_models(): yolo = YOLO("yolov8n.pt") midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large") midas.eval() transform = Compose([ Resize(384), ToTensor(), Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) return yolo, midas, transform yolo_model, midas_model, midas_transform = load_models() # --- Streamlit App --- st.title("Object Dimension & Volume Estimator") # πŸ“Έ Image Source Selection option = st.radio("Choose Image Source:", ("πŸ“€ Upload Image", "πŸ“Έ Use Camera")) # Mode Selection mode = st.selectbox("Select Image Type:", ["2D RGB Image", "RGB + Depth Image"]) image_pil = None if option == "πŸ“€ Upload Image": uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) if uploaded_file: image_pil = Image.open(uploaded_file).convert("RGB") elif option == "πŸ“Έ Use Camera": camera_input = st.camera_input("Take a picture") if camera_input: image_pil = Image.open(camera_input).convert("RGB") # Upload depth image if RGB + Depth selected depth_map = None if mode == "RGB + Depth Image" and image_pil is not None: depth_file = st.file_uploader("Upload corresponding Depth Map (grayscale image)", type=["png", "jpg", "jpeg", "tiff"]) if depth_file: depth_map = np.array(Image.open(depth_file).convert("L")) # Proceed if we have an image if image_pil is not None: # Convert to OpenCV format image_cv = np.array(image_pil) image_cv = cv2.cvtColor(image_cv, cv2.COLOR_RGB2BGR) img_h, img_w = image_cv.shape[:2] st.image(image_pil, caption="Input Image", use_container_width=True) # Store detected object data object_data = [] # πŸ” YOLO Object Detection results = yolo_model(image_cv) # πŸ“ Depth Estimation if mode == "2D RGB Image": input_tensor = midas_transform(image_pil).unsqueeze(0) with torch.no_grad(): estimated_depth = midas_model(input_tensor).squeeze().cpu().numpy() depth_map_resized = cv2.resize(estimated_depth, (img_w, img_h)) else: if depth_map is not None: depth_map_resized = cv2.resize(depth_map, (img_w, img_h)) else: st.error("Please upload a corresponding depth map for 3D images.") st.stop() # 🎯 Process each detection for r in results: for box in r.boxes: x1, y1, x2, y2 = map(int, box.xyxy[0]) width = x2 - x1 height = y2 - y1 region = depth_map_resized[y1:y2, x1:x2] if region.size == 0: continue depth = np.mean(region) volume = round(depth * width * height, 2) # Draw bounding box & label cv2.rectangle(image_cv, (x1, y1), (x2, y2), (0, 255, 0), 2) label = f"LΓ—BΓ—H: {depth:.2f}Γ—{width}Γ—{height} | V: {volume}" cv2.putText(image_cv, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2) # Store data for CSV object_data.append({ "Length": round(depth, 2), "Breadth": int(width), "Height": int(height), "Volume": volume }) # Show object details st.markdown(f"**🧠 Object Detected:**") st.write(f"πŸ“ Length Γ— Breadth Γ— Height: {depth:.2f} Γ— {width} Γ— {height}") st.write(f"πŸ“¦ Estimated Volume: {volume} (relative unitsΒ³)") # Show annotated image result_img = cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB) st.image(result_img, caption="Detected Dimensions", use_container_width=True) # Show table & CSV download if data exists if object_data: df = pd.DataFrame(object_data) st.markdown("### πŸ“‹ Detected Objects Table") st.dataframe(df) csv = df.to_csv(index=False).encode('utf-8') st.download_button( label="πŸ“₯ Download Results as CSV", data=csv, file_name='object_dimensions_volume.csv', mime='text/csv', )