Spaces:

adil9858
/

blind_vision

Sleeping

App Files Files Community

adil9858 commited on 29 days ago

Commit

f3c1778

verified ·

1 Parent(s): 747adb5

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -22

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import uuid
 from gtts import gTTS
 import cv2
 import numpy as np
 # --- Configuration ---
 API_KEY = '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'
@@ -18,16 +19,18 @@ client = OpenAI(
 # --- Helper Functions ---
-def describe_image(image_url):
     response = client.chat.completions.create(
         model="opengvlab/internvl3-14b:free",
         messages=[
             {
                 "role": "user",
                 "content": [
                     {"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards like wet floors, stairs, obstacles."},
-                    {"type": "image_url", "image_url": {"url": image_url}}
                 ]
             }
         ]
@@ -41,15 +44,6 @@ def speak(text, filename=None):
     tts.save(filename)
     return filename
-def image_to_array(uploaded_image):
-    img = Image.open(uploaded_image)
-    img = img.convert('RGB')  # Ensure 3 channels
-    return np.array(img)
-def array_to_base64(img_array):
-    _, buffer = cv2.imencode('.jpg', img_array)
-    return "data:image/jpeg;base64," + buffer.tobytes().hex()
 # --- Streamlit UI ---
 st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
@@ -63,15 +57,10 @@ if camera_image is not None:
     st.image(camera_image, caption="Captured Frame", use_column_width=True)
     with st.spinner("Analyzing the scene..."):
-        # Save temporarily
-        temp_path = f"temp_frame_{uuid.uuid4()}.jpg"
-        pil_img = Image.open(camera_image).convert("RGB")
-        pil_img.save(temp_path)
-        # Simulate URL (in production, you'd upload to cloud storage)
-        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
-        description = describe_image(image_url)
         st.subheader("📝 Description")
         st.write(description)
@@ -82,7 +71,6 @@ if camera_image is not None:
         st.audio(audio_bytes, format='audio/mp3')
         # Cleanup
-        os.remove(temp_path)
         os.remove(audio_file)
 st.markdown("---")

 from gtts import gTTS
 import cv2
 import numpy as np
+import base64
 # --- Configuration ---
 API_KEY = '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'
 # --- Helper Functions ---
+def describe_image(image_bytes):
+    # Convert to base64
+    base64_image = base64.b64encode(image_bytes).decode('utf-8')
     response = client.chat.completions.create(
         model="opengvlab/internvl3-14b:free",
         messages=[
             {
                 "role": "user",
                 "content": [
                     {"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards like wet floors, stairs, obstacles."},
+                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
                 ]
             }
         ]
     tts.save(filename)
     return filename
 # --- Streamlit UI ---
 st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
     st.image(camera_image, caption="Captured Frame", use_column_width=True)
     with st.spinner("Analyzing the scene..."):
+        # Read the image bytes directly
+        image_bytes = camera_image.getvalue()
+        description = describe_image(image_bytes)
         st.subheader("📝 Description")
         st.write(description)
         st.audio(audio_bytes, format='audio/mp3')
         # Cleanup
         os.remove(audio_file)
 st.markdown("---")