Spaces:

adil9858
/

blind_vision

Sleeping

App Files Files Community

adil9858 commited on about 1 month ago

Commit

71002bf

verified ·

1 Parent(s): c4a3c75

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -241

app.py CHANGED Viewed

@@ -1,259 +1,94 @@
 import streamlit as st
-import base64
 from openai import OpenAI
 from PIL import Image
 import io
 import cv2
 import numpy as np
-# Configure app
-st.set_page_config(
-    page_title="AI Vision Assistant",
-    page_icon="🔍",
-    layout="wide",
-    initial_sidebar_state="expanded"
-)
-# Custom CSS for futuristic design
-st.markdown("""
-<style>
-    /* Main colors */
-    :root {
-        --primary: #6366f1;
-        --secondary: #10b981;
-        --dark: #1e293b;
-        --light: #f8fafc;
-    }
-    /* Main container */
-    .stApp {
-        background: linear-gradient(135deg, #0f172a 0%, #1e293b 100%);
-        color: var(--light);
-    }
-    /* Headers */
-    h1, h2, h3, h4, h5, h6 {
-        color: var(--light) !important;
-        font-family: 'Inter', sans-serif;
-    }
-    /* Sidebar */
-    [data-testid="stSidebar"] {
-        background: linear-gradient(195deg, #0f172a 0%, #1e40af 100%) !important;
-    }
-    /* Buttons */
-    .stButton>button {
-        background: var(--primary) !important;
-        color: white !important;
-        border: none;
-        border-radius: 8px;
-        padding: 10px 24px;
-        font-weight: 500;
-        transition: all 0.3s;
-    }
-    .stButton>button:hover {
-        transform: translateY(-2px);
-        box-shadow: 0 4px 12px rgba(99, 102, 241, 0.3);
-    }
-    /* File uploader */
-    [data-testid="stFileUploader"] {
-        border: 2px dashed var(--primary) !important;
-        border-radius: 12px !important;
-        padding: 20px !important;
-    }
-    /* Markdown output */
-    .markdown-text {
-        background: rgba(30, 41, 59, 0.7) !important;
-        border-radius: 12px;
-        padding: 20px;
-        border-left: 4px solid var(--secondary);
-        animation: fadeIn 0.5s ease-in-out;
-    }
-    @keyframes fadeIn {
-        from { opacity: 0; transform: translateY(10px); }
-        to { opacity: 1; transform: translateY(0); }
-    }
-    /* Streamlit text input */
-    .stTextInput>div>div>input {
-        background: rgba(15, 23, 42, 0.7) !important;
-        color: white !important;
-        border: 1px solid #334155 !important;
-    }
-</style>
-""", unsafe_allow_html=True)
-# App title and description
-st.title("🔍 Optimus Alpha | Live Vision Assistant")
-# Initialize OpenAI client
-@st.cache_resource
-def get_client():
-    return OpenAI(
-        base_url="https://openrouter.ai/api/v1",
-        api_key='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'  # Replace with your actual key
     )
-# ===== Camera/Upload Selection =====
-input_method = st.radio(
-    "Select input method:",
-    ["Live Camera", "Upload Image"],
-    horizontal=True
-)
-# ===== Camera Section =====
-captured_image = None
-if input_method == "Live Camera":
-    st.subheader("Live Camera Feed")
-    run_camera = st.checkbox("Start Camera", value=False)
-    FRAME_WINDOW = st.empty()
-    if run_camera:
-        try:
-            cap = cv2.VideoCapture(1)
-            if not cap.isOpened():
-                st.error("Could not access camera. Please:")
-                st.markdown("""
-                - Check camera permissions
-                - Ensure no other app is using the camera
-                - Try reconnecting the camera
-                """)
-                run_camera = False
-            else:
-                capture_col, stop_col = st.columns(2)
-                with capture_col:
-                    capture_button = st.button("📸 Capture Image")
-                with stop_col:
-                    stop_button = st.button("🛑 Stop Camera")
-                if stop_button:
-                    cap.release()
-                    st.rerun()
-                while run_camera:
-                    ret, frame = cap.read()
-                    if not ret:
-                        st.error("Failed to capture frame")
-                        break
-                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-                    FRAME_WINDOW.image(frame)
-                    if capture_button:
-                        captured_image = frame
-                        cap.release()
-                        st.rerun()
-                        break
-        except Exception as e:
-            st.error(f"Camera error: {str(e)}")
-            run_camera = False
-# ===== Upload Section =====
-else:
-    st.subheader("Upload Image")
-    uploaded_file = st.file_uploader(
-        "Choose an image file",
-        type=["jpg", "jpeg", "png"],
-        label_visibility="collapsed"
-    )
-    if uploaded_file:
-        try:
-            captured_image = Image.open(uploaded_file)
-            st.image(captured_image, caption="Uploaded Image", width=300)
-        except Exception as e:
-            st.error(f"Error loading image: {str(e)}")
-# ===== Image Analysis Section =====
-if captured_image is not None:
-    st.subheader("AI Analysis")
-    # Convert to PIL Image if from OpenCV
-    if isinstance(captured_image, np.ndarray):
-        image = Image.fromarray(captured_image)
-    else:
-        image = captured_image
-    user_prompt = st.text_input(
-        "Ask about the image:",
-        placeholder="e.g. 'What is in this image?' or 'Explain this diagram'",
-        key="user_prompt"
-    )
-    if st.button("Analyze Image", type="primary"):
-        try:
-            # Convert image to base64
-            buffered = io.BytesIO()
-            image.save(buffered, format="JPEG")
-            image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
-            # Prepare messages
-            messages = [
-                {
-                    "role": "system",
-                    "content": """You are an expert vision assistant. Analyze images with:
-- Clear, structured responses
-- Bullet points for multiple objects
-- Concise explanations
-- Highlight important findings in bold"""
-                },
-                {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "text",
-                            "text": user_prompt if user_prompt else "Describe this image in detail"
-                        },
-                        {
-                            "type": "image_url",
-                            "image_url": {
-                                "url": f"data:image/jpeg;base64,{image_base64}"
-                            }
-                        }
-                    ]
-                }
-            ]
-            # Stream the response
-            response_container = st.empty()
-            full_response = ""
-            client = get_client()
-            stream = client.chat.completions.create(
-                model="openrouter/optimus-alpha",
-                messages=messages,
-                stream=True
-            )
-            for chunk in stream:
-                if chunk.choices[0].delta.content is not None:
-                    full_response += chunk.choices[0].delta.content
-                    response_container.markdown(f"""
-                        <div class="markdown-text">
-                        {full_response}
-                        </div>
-                    """, unsafe_allow_html=True)
-        except Exception as e:
-            st.error(f"Analysis error: {str(e)}")
-# Sidebar
-with st.sidebar:
-    st.image("https://via.placeholder.com/200", width=200)  # Replace with your logo
-    st.markdown("""
-    *Powered by OpenRouter*
-    """)
-    st.markdown("---")
-    st.markdown("""
-    **Tips:**
-    - For best results, use clear, well-lit images
-    - Ask specific questions for detailed answers
-    """)
-    st.markdown("Made with ❤️ by Koshur AI")

 import streamlit as st
 from openai import OpenAI
 from PIL import Image
 import io
+import os
+import uuid
+from gtts import gTTS
 import cv2
 import numpy as np
+# --- Configuration ---
+API_KEY = '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'
+REFERER_URL = "https://your-site.com"
+SITE_TITLE = "SightNarrator"
+client = OpenAI(
+    base_url="https://openrouter.ai/api/v1",
+    api_key=API_KEY
+)
+# --- Helper Functions ---
+def describe_image(image_url):
+    response = client.chat.completions.create(
+        extra_headers={
+            "HTTP-Referer": REFERER_URL,
+            "X-Title": SITE_TITLE,
+        },
+        model="opengvlab/internvl3-14b:free",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards like wet floors, stairs, obstacles."},
+                    {"type": "image_url", "image_url": {"url": image_url}}
+                ]
+            }
+        ]
     )
+    return response.choices[0].message.content
+def speak(text, filename=None):
+    if not filename:
+        filename = f"audio_{uuid.uuid4()}.mp3"
+    tts = gTTS(text=text, lang='en')
+    tts.save(filename)
+    return filename
+def image_to_array(uploaded_image):
+    img = Image.open(uploaded_image)
+    img = img.convert('RGB')  # Ensure 3 channels
+    return np.array(img)
+def array_to_base64(img_array):
+    _, buffer = cv2.imencode('.jpg', img_array)
+    return "data:image/jpeg;base64," + buffer.tobytes().hex()
+# --- Streamlit UI ---
+st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
+st.title("👁️ AI Visual Assistant for the Blind")
+st.markdown("Use your **camera** to capture the world around you.")
+st.subheader("📸 Take a Picture")
+camera_image = st.camera_input("Capture a frame from your camera")
+if camera_image is not None:
+    st.image(camera_image, caption="Captured Frame", use_column_width=True)
+    with st.spinner("Analyzing the scene..."):
+        # Save temporarily
+        temp_path = f"temp_frame_{uuid.uuid4()}.jpg"
+        pil_img = Image.open(camera_image).convert("RGB")
+        pil_img.save(temp_path)
+        # Simulate URL (in production, you'd upload to cloud storage)
+        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+        description = describe_image(image_url)
+        st.subheader("📝 Description")
+        st.write(description)
+        st.subheader("🔊 Audio Narration")
+        audio_file = speak(description)
+        audio_bytes = open(audio_file, 'rb').read()
+        st.audio(audio_bytes, format='audio/mp3')
+        # Cleanup
+        os.remove(temp_path)
+        os.remove(audio_file)
+st.markdown("---")
+st.markdown("*Built with 💡 using Streamlit, OpenRouter, and gTTS.*")