Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ import uuid
|
|
7 |
from gtts import gTTS
|
8 |
import cv2
|
9 |
import numpy as np
|
|
|
10 |
|
11 |
# --- Configuration ---
|
12 |
API_KEY = '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'
|
@@ -18,16 +19,18 @@ client = OpenAI(
|
|
18 |
|
19 |
# --- Helper Functions ---
|
20 |
|
21 |
-
def describe_image(
|
|
|
|
|
|
|
22 |
response = client.chat.completions.create(
|
23 |
-
|
24 |
model="opengvlab/internvl3-14b:free",
|
25 |
messages=[
|
26 |
{
|
27 |
"role": "user",
|
28 |
"content": [
|
29 |
{"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards like wet floors, stairs, obstacles."},
|
30 |
-
{"type": "image_url", "image_url": {"url":
|
31 |
]
|
32 |
}
|
33 |
]
|
@@ -41,15 +44,6 @@ def speak(text, filename=None):
|
|
41 |
tts.save(filename)
|
42 |
return filename
|
43 |
|
44 |
-
def image_to_array(uploaded_image):
|
45 |
-
img = Image.open(uploaded_image)
|
46 |
-
img = img.convert('RGB') # Ensure 3 channels
|
47 |
-
return np.array(img)
|
48 |
-
|
49 |
-
def array_to_base64(img_array):
|
50 |
-
_, buffer = cv2.imencode('.jpg', img_array)
|
51 |
-
return "data:image/jpeg;base64," + buffer.tobytes().hex()
|
52 |
-
|
53 |
# --- Streamlit UI ---
|
54 |
|
55 |
st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
|
@@ -63,15 +57,10 @@ if camera_image is not None:
|
|
63 |
st.image(camera_image, caption="Captured Frame", use_column_width=True)
|
64 |
|
65 |
with st.spinner("Analyzing the scene..."):
|
66 |
-
#
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
# Simulate URL (in production, you'd upload to cloud storage)
|
72 |
-
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
73 |
-
|
74 |
-
description = describe_image(image_url)
|
75 |
|
76 |
st.subheader("📝 Description")
|
77 |
st.write(description)
|
@@ -82,7 +71,6 @@ if camera_image is not None:
|
|
82 |
st.audio(audio_bytes, format='audio/mp3')
|
83 |
|
84 |
# Cleanup
|
85 |
-
os.remove(temp_path)
|
86 |
os.remove(audio_file)
|
87 |
|
88 |
st.markdown("---")
|
|
|
7 |
from gtts import gTTS
|
8 |
import cv2
|
9 |
import numpy as np
|
10 |
+
import base64
|
11 |
|
12 |
# --- Configuration ---
|
13 |
API_KEY = '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'
|
|
|
19 |
|
20 |
# --- Helper Functions ---
|
21 |
|
22 |
+
def describe_image(image_bytes):
|
23 |
+
# Convert to base64
|
24 |
+
base64_image = base64.b64encode(image_bytes).decode('utf-8')
|
25 |
+
|
26 |
response = client.chat.completions.create(
|
|
|
27 |
model="opengvlab/internvl3-14b:free",
|
28 |
messages=[
|
29 |
{
|
30 |
"role": "user",
|
31 |
"content": [
|
32 |
{"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards like wet floors, stairs, obstacles."},
|
33 |
+
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
|
34 |
]
|
35 |
}
|
36 |
]
|
|
|
44 |
tts.save(filename)
|
45 |
return filename
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
# --- Streamlit UI ---
|
48 |
|
49 |
st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
|
|
|
57 |
st.image(camera_image, caption="Captured Frame", use_column_width=True)
|
58 |
|
59 |
with st.spinner("Analyzing the scene..."):
|
60 |
+
# Read the image bytes directly
|
61 |
+
image_bytes = camera_image.getvalue()
|
62 |
+
|
63 |
+
description = describe_image(image_bytes)
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
st.subheader("📝 Description")
|
66 |
st.write(description)
|
|
|
71 |
st.audio(audio_bytes, format='audio/mp3')
|
72 |
|
73 |
# Cleanup
|
|
|
74 |
os.remove(audio_file)
|
75 |
|
76 |
st.markdown("---")
|