Spaces:

adil9858
/

blind_vision

Sleeping

File size: 7,738 Bytes

import streamlit as st
import base64
from openai import OpenAI
from PIL import Image
import io
import cv2
import numpy as np

# Configure app
st.set_page_config(
    page_title="AI Vision Assistant",
    page_icon="🔍",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS for futuristic design
st.markdown("""
<style>
    /* Main colors */
    :root {
        --primary: #6366f1;
        --secondary: #10b981;
        --dark: #1e293b;
        --light: #f8fafc;
    }
    
    /* Main container */
    .stApp {
        background: linear-gradient(135deg, #0f172a 0%, #1e293b 100%);
        color: var(--light);
    }
    
    /* Headers */
    h1, h2, h3, h4, h5, h6 {
        color: var(--light) !important;
        font-family: 'Inter', sans-serif;
    }
    
    /* Sidebar */
    [data-testid="stSidebar"] {
        background: linear-gradient(195deg, #0f172a 0%, #1e40af 100%) !important;
    }
    
    /* Buttons */
    .stButton>button {
        background: var(--primary) !important;
        color: white !important;
        border: none;
        border-radius: 8px;
        padding: 10px 24px;
        font-weight: 500;
        transition: all 0.3s;
    }
    
    .stButton>button:hover {
        transform: translateY(-2px);
        box-shadow: 0 4px 12px rgba(99, 102, 241, 0.3);
    }
    
    /* File uploader */
    [data-testid="stFileUploader"] {
        border: 2px dashed var(--primary) !important;
        border-radius: 12px !important;
        padding: 20px !important;
    }
    
    /* Markdown output */
    .markdown-text {
        background: rgba(30, 41, 59, 0.7) !important;
        border-radius: 12px;
        padding: 20px;
        border-left: 4px solid var(--secondary);
        animation: fadeIn 0.5s ease-in-out;
    }
    
    @keyframes fadeIn {
        from { opacity: 0; transform: translateY(10px); }
        to { opacity: 1; transform: translateY(0); }
    }
    
    /* Streamlit text input */
    .stTextInput>div>div>input {
        background: rgba(15, 23, 42, 0.7) !important;
        color: white !important;
        border: 1px solid #334155 !important;
    }
</style>
""", unsafe_allow_html=True)

# App title and description
st.title("🔍 Optimus Alpha | Live Vision Assistant")

# Initialize OpenAI client
@st.cache_resource
def get_client():
    return OpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key='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'  # Replace with your actual key
    )

# ===== Camera/Upload Selection =====
input_method = st.radio(
    "Select input method:",
    ["Live Camera", "Upload Image"],
    horizontal=True
)

# ===== Camera Section =====
captured_image = None

if input_method == "Live Camera":
    st.subheader("Live Camera Feed")
    run_camera = st.checkbox("Start Camera", value=False)
    
    FRAME_WINDOW = st.empty()
    
    if run_camera:
        try:
            cap = cv2.VideoCapture(1)
            if not cap.isOpened():
                st.error("Could not access camera. Please:")
                st.markdown("""
                - Check camera permissions
                - Ensure no other app is using the camera
                - Try reconnecting the camera
                """)
                run_camera = False
            else:
                capture_col, stop_col = st.columns(2)
                with capture_col:
                    capture_button = st.button("📸 Capture Image")
                with stop_col:
                    stop_button = st.button("🛑 Stop Camera")
                
                if stop_button:
                    cap.release()
                    st.rerun()
                
                while run_camera:
                    ret, frame = cap.read()
                    if not ret:
                        st.error("Failed to capture frame")
                        break
                    
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    FRAME_WINDOW.image(frame)
                    
                    if capture_button:
                        captured_image = frame
                        cap.release()
                        st.rerun()
                        break
        except Exception as e:
            st.error(f"Camera error: {str(e)}")
            run_camera = False

# ===== Upload Section =====
else:
    st.subheader("Upload Image")
    uploaded_file = st.file_uploader(
        "Choose an image file",
        type=["jpg", "jpeg", "png"],
        label_visibility="collapsed"
    )
    if uploaded_file:
        try:
            captured_image = Image.open(uploaded_file)
            st.image(captured_image, caption="Uploaded Image", width=300)
        except Exception as e:
            st.error(f"Error loading image: {str(e)}")

# ===== Image Analysis Section =====
if captured_image is not None:
    st.subheader("AI Analysis")
    
    # Convert to PIL Image if from OpenCV
    if isinstance(captured_image, np.ndarray):
        image = Image.fromarray(captured_image)
    else:
        image = captured_image
    
    user_prompt = st.text_input(
        "Ask about the image:",
        placeholder="e.g. 'What is in this image?' or 'Explain this diagram'",
        key="user_prompt"
    )
    
    if st.button("Analyze Image", type="primary"):
        try:
            # Convert image to base64
            buffered = io.BytesIO()
            image.save(buffered, format="JPEG")
            image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
            
            # Prepare messages
            messages = [
                {
                    "role": "system",
                    "content": """You are an expert vision assistant. Analyze images with:
- Clear, structured responses
- Bullet points for multiple objects
- Concise explanations
- Highlight important findings in bold"""
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": user_prompt if user_prompt else "Describe this image in detail"
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{image_base64}"
                            }
                        }
                    ]
                }
            ]
            
            # Stream the response
            response_container = st.empty()
            full_response = ""
            
            client = get_client()
            stream = client.chat.completions.create(
                model="openrouter/optimus-alpha",
                messages=messages,
                stream=True
            )
            
            for chunk in stream:
                if chunk.choices[0].delta.content is not None:
                    full_response += chunk.choices[0].delta.content
                    response_container.markdown(f"""
                        <div class="markdown-text">
                        {full_response}
                        </div>
                    """, unsafe_allow_html=True)
        
        except Exception as e:
            st.error(f"Analysis error: {str(e)}")

# Sidebar
with st.sidebar:
    st.image("https://via.placeholder.com/200", width=200)  # Replace with your logo
    st.markdown("""
    *Powered by OpenRouter*
    """)
    st.markdown("---")
    st.markdown("""
    **Tips:**
    - For best results, use clear, well-lit images
    - Ask specific questions for detailed answers
    """)
    st.markdown("Made with ❤️ by Koshur AI")