adil9858 commited on
Commit
71002bf
Β·
verified Β·
1 Parent(s): c4a3c75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -241
app.py CHANGED
@@ -1,259 +1,94 @@
1
  import streamlit as st
2
- import base64
3
  from openai import OpenAI
4
  from PIL import Image
5
  import io
 
 
 
6
  import cv2
7
  import numpy as np
8
 
9
- # Configure app
10
- st.set_page_config(
11
- page_title="AI Vision Assistant",
12
- page_icon="πŸ”",
13
- layout="wide",
14
- initial_sidebar_state="expanded"
15
- )
16
 
17
- # Custom CSS for futuristic design
18
- st.markdown("""
19
- <style>
20
- /* Main colors */
21
- :root {
22
- --primary: #6366f1;
23
- --secondary: #10b981;
24
- --dark: #1e293b;
25
- --light: #f8fafc;
26
- }
27
-
28
- /* Main container */
29
- .stApp {
30
- background: linear-gradient(135deg, #0f172a 0%, #1e293b 100%);
31
- color: var(--light);
32
- }
33
-
34
- /* Headers */
35
- h1, h2, h3, h4, h5, h6 {
36
- color: var(--light) !important;
37
- font-family: 'Inter', sans-serif;
38
- }
39
-
40
- /* Sidebar */
41
- [data-testid="stSidebar"] {
42
- background: linear-gradient(195deg, #0f172a 0%, #1e40af 100%) !important;
43
- }
44
-
45
- /* Buttons */
46
- .stButton>button {
47
- background: var(--primary) !important;
48
- color: white !important;
49
- border: none;
50
- border-radius: 8px;
51
- padding: 10px 24px;
52
- font-weight: 500;
53
- transition: all 0.3s;
54
- }
55
-
56
- .stButton>button:hover {
57
- transform: translateY(-2px);
58
- box-shadow: 0 4px 12px rgba(99, 102, 241, 0.3);
59
- }
60
-
61
- /* File uploader */
62
- [data-testid="stFileUploader"] {
63
- border: 2px dashed var(--primary) !important;
64
- border-radius: 12px !important;
65
- padding: 20px !important;
66
- }
67
-
68
- /* Markdown output */
69
- .markdown-text {
70
- background: rgba(30, 41, 59, 0.7) !important;
71
- border-radius: 12px;
72
- padding: 20px;
73
- border-left: 4px solid var(--secondary);
74
- animation: fadeIn 0.5s ease-in-out;
75
- }
76
-
77
- @keyframes fadeIn {
78
- from { opacity: 0; transform: translateY(10px); }
79
- to { opacity: 1; transform: translateY(0); }
80
- }
81
-
82
- /* Streamlit text input */
83
- .stTextInput>div>div>input {
84
- background: rgba(15, 23, 42, 0.7) !important;
85
- color: white !important;
86
- border: 1px solid #334155 !important;
87
- }
88
- </style>
89
- """, unsafe_allow_html=True)
90
 
91
- # App title and description
92
- st.title("πŸ” Optimus Alpha | Live Vision Assistant")
93
 
94
- # Initialize OpenAI client
95
- @st.cache_resource
96
- def get_client():
97
- return OpenAI(
98
- base_url="https://openrouter.ai/api/v1",
99
- api_key='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' # Replace with your actual key
 
 
 
 
 
 
 
 
 
 
100
  )
 
101
 
102
- # ===== Camera/Upload Selection =====
103
- input_method = st.radio(
104
- "Select input method:",
105
- ["Live Camera", "Upload Image"],
106
- horizontal=True
107
- )
108
 
109
- # ===== Camera Section =====
110
- captured_image = None
 
 
111
 
112
- if input_method == "Live Camera":
113
- st.subheader("Live Camera Feed")
114
- run_camera = st.checkbox("Start Camera", value=False)
115
-
116
- FRAME_WINDOW = st.empty()
117
-
118
- if run_camera:
119
- try:
120
- cap = cv2.VideoCapture(1)
121
- if not cap.isOpened():
122
- st.error("Could not access camera. Please:")
123
- st.markdown("""
124
- - Check camera permissions
125
- - Ensure no other app is using the camera
126
- - Try reconnecting the camera
127
- """)
128
- run_camera = False
129
- else:
130
- capture_col, stop_col = st.columns(2)
131
- with capture_col:
132
- capture_button = st.button("πŸ“Έ Capture Image")
133
- with stop_col:
134
- stop_button = st.button("πŸ›‘ Stop Camera")
135
-
136
- if stop_button:
137
- cap.release()
138
- st.rerun()
139
-
140
- while run_camera:
141
- ret, frame = cap.read()
142
- if not ret:
143
- st.error("Failed to capture frame")
144
- break
145
-
146
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
147
- FRAME_WINDOW.image(frame)
148
-
149
- if capture_button:
150
- captured_image = frame
151
- cap.release()
152
- st.rerun()
153
- break
154
- except Exception as e:
155
- st.error(f"Camera error: {str(e)}")
156
- run_camera = False
157
 
158
- # ===== Upload Section =====
159
- else:
160
- st.subheader("Upload Image")
161
- uploaded_file = st.file_uploader(
162
- "Choose an image file",
163
- type=["jpg", "jpeg", "png"],
164
- label_visibility="collapsed"
165
- )
166
- if uploaded_file:
167
- try:
168
- captured_image = Image.open(uploaded_file)
169
- st.image(captured_image, caption="Uploaded Image", width=300)
170
- except Exception as e:
171
- st.error(f"Error loading image: {str(e)}")
172
 
173
- # ===== Image Analysis Section =====
174
- if captured_image is not None:
175
- st.subheader("AI Analysis")
176
-
177
- # Convert to PIL Image if from OpenCV
178
- if isinstance(captured_image, np.ndarray):
179
- image = Image.fromarray(captured_image)
180
- else:
181
- image = captured_image
182
-
183
- user_prompt = st.text_input(
184
- "Ask about the image:",
185
- placeholder="e.g. 'What is in this image?' or 'Explain this diagram'",
186
- key="user_prompt"
187
- )
188
-
189
- if st.button("Analyze Image", type="primary"):
190
- try:
191
- # Convert image to base64
192
- buffered = io.BytesIO()
193
- image.save(buffered, format="JPEG")
194
- image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
195
-
196
- # Prepare messages
197
- messages = [
198
- {
199
- "role": "system",
200
- "content": """You are an expert vision assistant. Analyze images with:
201
- - Clear, structured responses
202
- - Bullet points for multiple objects
203
- - Concise explanations
204
- - Highlight important findings in bold"""
205
- },
206
- {
207
- "role": "user",
208
- "content": [
209
- {
210
- "type": "text",
211
- "text": user_prompt if user_prompt else "Describe this image in detail"
212
- },
213
- {
214
- "type": "image_url",
215
- "image_url": {
216
- "url": f"data:image/jpeg;base64,{image_base64}"
217
- }
218
- }
219
- ]
220
- }
221
- ]
222
-
223
- # Stream the response
224
- response_container = st.empty()
225
- full_response = ""
226
-
227
- client = get_client()
228
- stream = client.chat.completions.create(
229
- model="openrouter/optimus-alpha",
230
- messages=messages,
231
- stream=True
232
- )
233
-
234
- for chunk in stream:
235
- if chunk.choices[0].delta.content is not None:
236
- full_response += chunk.choices[0].delta.content
237
- response_container.markdown(f"""
238
- <div class="markdown-text">
239
- {full_response}
240
- </div>
241
- """, unsafe_allow_html=True)
242
-
243
- except Exception as e:
244
- st.error(f"Analysis error: {str(e)}")
245
 
246
- # Sidebar
247
- with st.sidebar:
248
- st.image("https://via.placeholder.com/200", width=200) # Replace with your logo
249
- st.markdown("""
250
- *Powered by OpenRouter*
251
- """)
252
- st.markdown("---")
253
- st.markdown("""
254
- **Tips:**
255
- - For best results, use clear, well-lit images
256
- - Ask specific questions for detailed answers
257
- """)
258
- st.markdown("Made with ❀️ by Koshur AI")
259
 
 
 
 
1
  import streamlit as st
 
2
  from openai import OpenAI
3
  from PIL import Image
4
  import io
5
+ import os
6
+ import uuid
7
+ from gtts import gTTS
8
  import cv2
9
  import numpy as np
10
 
11
+ # --- Configuration ---
12
+ API_KEY = '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'
13
+ REFERER_URL = "https://your-site.com"
14
+ SITE_TITLE = "SightNarrator"
 
 
 
15
 
16
+ client = OpenAI(
17
+ base_url="https://openrouter.ai/api/v1",
18
+ api_key=API_KEY
19
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # --- Helper Functions ---
 
22
 
23
+ def describe_image(image_url):
24
+ response = client.chat.completions.create(
25
+ extra_headers={
26
+ "HTTP-Referer": REFERER_URL,
27
+ "X-Title": SITE_TITLE,
28
+ },
29
+ model="opengvlab/internvl3-14b:free",
30
+ messages=[
31
+ {
32
+ "role": "user",
33
+ "content": [
34
+ {"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards like wet floors, stairs, obstacles."},
35
+ {"type": "image_url", "image_url": {"url": image_url}}
36
+ ]
37
+ }
38
+ ]
39
  )
40
+ return response.choices[0].message.content
41
 
42
+ def speak(text, filename=None):
43
+ if not filename:
44
+ filename = f"audio_{uuid.uuid4()}.mp3"
45
+ tts = gTTS(text=text, lang='en')
46
+ tts.save(filename)
47
+ return filename
48
 
49
+ def image_to_array(uploaded_image):
50
+ img = Image.open(uploaded_image)
51
+ img = img.convert('RGB') # Ensure 3 channels
52
+ return np.array(img)
53
 
54
+ def array_to_base64(img_array):
55
+ _, buffer = cv2.imencode('.jpg', img_array)
56
+ return "data:image/jpeg;base64," + buffer.tobytes().hex()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ # --- Streamlit UI ---
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
61
+ st.title("πŸ‘οΈ AI Visual Assistant for the Blind")
62
+ st.markdown("Use your **camera** to capture the world around you.")
63
+
64
+ st.subheader("πŸ“Έ Take a Picture")
65
+ camera_image = st.camera_input("Capture a frame from your camera")
66
+
67
+ if camera_image is not None:
68
+ st.image(camera_image, caption="Captured Frame", use_column_width=True)
69
+
70
+ with st.spinner("Analyzing the scene..."):
71
+ # Save temporarily
72
+ temp_path = f"temp_frame_{uuid.uuid4()}.jpg"
73
+ pil_img = Image.open(camera_image).convert("RGB")
74
+ pil_img.save(temp_path)
75
+
76
+ # Simulate URL (in production, you'd upload to cloud storage)
77
+ image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
78
+
79
+ description = describe_image(image_url)
80
+
81
+ st.subheader("πŸ“ Description")
82
+ st.write(description)
83
+
84
+ st.subheader("πŸ”Š Audio Narration")
85
+ audio_file = speak(description)
86
+ audio_bytes = open(audio_file, 'rb').read()
87
+ st.audio(audio_bytes, format='audio/mp3')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
+ # Cleanup
90
+ os.remove(temp_path)
91
+ os.remove(audio_file)
 
 
 
 
 
 
 
 
 
 
92
 
93
+ st.markdown("---")
94
+ st.markdown("*Built with πŸ’‘ using Streamlit, OpenRouter, and gTTS.*")