Spaces:
Sleeping
Sleeping
File size: 2,287 Bytes
a6c1838 71002bf a6c1838 f3c1778 a6c1838 71002bf a6c1838 71002bf a6c1838 71002bf a6c1838 f3c1778 71002bf 477a0fa f3c1778 71002bf a6c1838 71002bf a6c1838 71002bf a6c1838 71002bf 5d47b99 71002bf f3c1778 71002bf a6c1838 71002bf 5d47b99 71002bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import streamlit as st
from openai import OpenAI
from PIL import Image
import io
import os
import uuid
from gtts import gTTS
import cv2
import numpy as np
import base64
# --- Configuration ---
API_KEY = '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'
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=API_KEY
)
# --- Helper Functions ---
def describe_image(image_bytes):
# Convert to base64
base64_image = base64.b64encode(image_bytes).decode('utf-8')
response = client.chat.completions.create(
model="opengvlab/internvl3-14b:free",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards."},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
]
}
]
)
return response.choices[0].message.content
def speak(text, filename=None):
if not filename:
filename = f"audio_{uuid.uuid4()}.mp3"
tts = gTTS(text=text, lang='en')
tts.save(filename)
return filename
# --- Streamlit UI ---
st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
st.title("ποΈ AI Visual Assistant for the Blind")
st.markdown("Use your **camera** to capture the world around you.")
st.subheader("πΈ Take a Picture")
camera_image = st.camera_input("Capture a frame from your camera")
if camera_image is not None:
st.image(camera_image, caption="Captured Frame", use_column_width=True)
with st.spinner("Analyzing the scene..."):
# Read the image bytes directly
image_bytes = camera_image.getvalue()
description = describe_image(image_bytes)
st.subheader("π Description")
st.write(description)
st.subheader("π Audio Narration")
audio_file = speak(description)
audio_bytes = open(audio_file, 'rb').read()
st.audio(audio_bytes, format='audio/mp3')
# Cleanup
os.remove(audio_file)
st.markdown("---")
st.markdown("*Built with π‘ using Streamlit, OpenRouter, and gTTS.*") |