File size: 4,958 Bytes
a6c1838
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import streamlit as st
import base64
from openai import OpenAI
from PIL import Image
import io
import cv2
import numpy as np

# Configure app
st.set_page_config(
    page_title="AI Vision Assistant",
    page_icon="🔍",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS (keep your existing CSS here)
st.markdown("""
<style>
    /* Your existing CSS styles */
</style>
""", unsafe_allow_html=True)

# App title and description
st.title("🔍 Optimus Alpha | Live Vision Assistant")

# Initialize OpenAI client (keep your existing cached function)
@st.cache_resource
def get_client():
    return OpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key='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'
    )

# ===== New Live Camera Section =====
st.subheader("Live Camera Feed")
run_camera = st.checkbox("Enable Camera", value=False)

FRAME_WINDOW = st.empty()
captured_image = None

if run_camera:
    cap = cv2.VideoCapture(0)
    
    capture_button = st.button("Capture Image")
    stop_button = st.button("Stop Camera")
    
    if stop_button:
        run_camera = False
        cap.release()
        st.experimental_rerun()
    
    while run_camera:
        ret, frame = cap.read()
        if not ret:
            st.error("Failed to access camera")
            break
        
        # Display the live feed
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        FRAME_WINDOW.image(frame)
        
        if capture_button:
            captured_image = frame
            run_camera = False
            cap.release()
            break
else:
    FRAME_WINDOW.info("Camera is currently off")

# ===== Image Processing Section =====
col1, col2 = st.columns([1, 2])

with col1:
    st.subheader("Image Source")
    
    # Option to use captured image or upload
    if captured_image is not None:
        st.image(captured_image, caption="Captured Image", width=300)
        use_captured = True
    else:
        use_captured = False
    
    uploaded_file = st.file_uploader(
        "Or upload an image",
        type=["jpg", "jpeg", "png"],
        disabled=use_captured
    )
    
    # Determine which image to use
    if use_captured:
        image = Image.fromarray(captured_image)
    elif uploaded_file:
        image = Image.open(uploaded_file)
    else:
        image = None

with col2:
    st.subheader("AI Analysis")
    
    user_prompt = st.text_input(
        "Your question about the image:",
        placeholder="e.g. 'What objects do you see?' or 'Explain this diagram'",
        key="user_prompt"
    )
    
    if st.button("Analyze", type="primary") and image:
        try:
            # Convert image to base64
            buffered = io.BytesIO()
            image.save(buffered, format="JPEG")
            image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
            
            # Prepare messages
            messages = [
                {
                    "role": "system",
                    "content": """You are a real-time vision assistant. Analyze the current camera feed or uploaded image and:
1. Identify objects, people, text clearly
2. Answer follow-up questions precisely
3. Format responses with bullet points
4. Highlight urgent/important findings"""
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": user_prompt if user_prompt else "Describe what you see in detail"
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{image_base64}"
                            }
                        }
                    ]
                }
            ]
            
            # Stream the response
            response_container = st.empty()
            full_response = ""
            
            client = get_client()
            stream = client.chat.completions.create(
                model="openrouter/optimus-alpha",
                messages=messages,
                stream=True
            )
            
            for chunk in stream:
                if chunk.choices[0].delta.content is not None:
                    full_response += chunk.choices[0].delta.content
                    response_container.markdown(f"""
                        <div class="markdown-text">
                        {full_response}
                        </div>
                    """, unsafe_allow_html=True)
        
        except Exception as e:
            st.error(f"Error: {str(e)}")

# Sidebar (keep your existing sidebar)
with st.sidebar:
    st.image("blob.png", width=200)
    st.markdown("""
    *Powered by OpenRouter*
    """)
    st.markdown("---")
    st.markdown("Made with ❤️ by Koshur AI")