File size: 2,287 Bytes
a6c1838
 
 
 
71002bf
 
 
a6c1838
 
f3c1778
a6c1838
71002bf
 
a6c1838
71002bf
 
 
 
a6c1838
71002bf
a6c1838
f3c1778
 
 
 
71002bf
 
 
 
 
 
477a0fa
f3c1778
71002bf
 
 
a6c1838
71002bf
a6c1838
71002bf
 
 
 
 
 
a6c1838
71002bf
5d47b99
71002bf
 
 
 
 
 
 
 
 
 
 
f3c1778
 
 
 
71002bf
 
 
 
 
 
 
 
a6c1838
71002bf
 
5d47b99
71002bf
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import streamlit as st
from openai import OpenAI
from PIL import Image
import io
import os
import uuid
from gtts import gTTS
import cv2
import numpy as np
import base64

# --- Configuration ---
API_KEY = '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'

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=API_KEY
)

# --- Helper Functions ---

def describe_image(image_bytes):
    # Convert to base64
    base64_image = base64.b64encode(image_bytes).decode('utf-8')
    
    response = client.chat.completions.create(
        model="opengvlab/internvl3-14b:free",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards."},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
                ]
            }
        ]
    )
    return response.choices[0].message.content

def speak(text, filename=None):
    if not filename:
        filename = f"audio_{uuid.uuid4()}.mp3"
    tts = gTTS(text=text, lang='en')
    tts.save(filename)
    return filename

# --- Streamlit UI ---

st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
st.title("πŸ‘οΈ AI Visual Assistant for the Blind")
st.markdown("Use your **camera** to capture the world around you.")

st.subheader("πŸ“Έ Take a Picture")
camera_image = st.camera_input("Capture a frame from your camera")

if camera_image is not None:
    st.image(camera_image, caption="Captured Frame", use_column_width=True)

    with st.spinner("Analyzing the scene..."):
        # Read the image bytes directly
        image_bytes = camera_image.getvalue()
        
        description = describe_image(image_bytes)

        st.subheader("πŸ“ Description")
        st.write(description)

        st.subheader("πŸ”Š Audio Narration")
        audio_file = speak(description)
        audio_bytes = open(audio_file, 'rb').read()
        st.audio(audio_bytes, format='audio/mp3')

        # Cleanup
        os.remove(audio_file)

st.markdown("---")
st.markdown("*Built with πŸ’‘ using Streamlit, OpenRouter, and gTTS.*")