blind_vision / app.py
adil9858's picture
Update app.py
477a0fa verified
import streamlit as st
from openai import OpenAI
from PIL import Image
import io
import os
import uuid
from gtts import gTTS
import cv2
import numpy as np
import base64
# --- Configuration ---
API_KEY = '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'
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=API_KEY
)
# --- Helper Functions ---
def describe_image(image_bytes):
# Convert to base64
base64_image = base64.b64encode(image_bytes).decode('utf-8')
response = client.chat.completions.create(
model="opengvlab/internvl3-14b:free",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards."},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
]
}
]
)
return response.choices[0].message.content
def speak(text, filename=None):
if not filename:
filename = f"audio_{uuid.uuid4()}.mp3"
tts = gTTS(text=text, lang='en')
tts.save(filename)
return filename
# --- Streamlit UI ---
st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
st.title("πŸ‘οΈ AI Visual Assistant for the Blind")
st.markdown("Use your **camera** to capture the world around you.")
st.subheader("πŸ“Έ Take a Picture")
camera_image = st.camera_input("Capture a frame from your camera")
if camera_image is not None:
st.image(camera_image, caption="Captured Frame", use_column_width=True)
with st.spinner("Analyzing the scene..."):
# Read the image bytes directly
image_bytes = camera_image.getvalue()
description = describe_image(image_bytes)
st.subheader("πŸ“ Description")
st.write(description)
st.subheader("πŸ”Š Audio Narration")
audio_file = speak(description)
audio_bytes = open(audio_file, 'rb').read()
st.audio(audio_bytes, format='audio/mp3')
# Cleanup
os.remove(audio_file)
st.markdown("---")
st.markdown("*Built with πŸ’‘ using Streamlit, OpenRouter, and gTTS.*")