adil9858 commited on
Commit
f3c1778
·
verified ·
1 Parent(s): 747adb5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -22
app.py CHANGED
@@ -7,6 +7,7 @@ import uuid
7
  from gtts import gTTS
8
  import cv2
9
  import numpy as np
 
10
 
11
  # --- Configuration ---
12
  API_KEY = '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'
@@ -18,16 +19,18 @@ client = OpenAI(
18
 
19
  # --- Helper Functions ---
20
 
21
- def describe_image(image_url):
 
 
 
22
  response = client.chat.completions.create(
23
-
24
  model="opengvlab/internvl3-14b:free",
25
  messages=[
26
  {
27
  "role": "user",
28
  "content": [
29
  {"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards like wet floors, stairs, obstacles."},
30
- {"type": "image_url", "image_url": {"url": image_url}}
31
  ]
32
  }
33
  ]
@@ -41,15 +44,6 @@ def speak(text, filename=None):
41
  tts.save(filename)
42
  return filename
43
 
44
- def image_to_array(uploaded_image):
45
- img = Image.open(uploaded_image)
46
- img = img.convert('RGB') # Ensure 3 channels
47
- return np.array(img)
48
-
49
- def array_to_base64(img_array):
50
- _, buffer = cv2.imencode('.jpg', img_array)
51
- return "data:image/jpeg;base64," + buffer.tobytes().hex()
52
-
53
  # --- Streamlit UI ---
54
 
55
  st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
@@ -63,15 +57,10 @@ if camera_image is not None:
63
  st.image(camera_image, caption="Captured Frame", use_column_width=True)
64
 
65
  with st.spinner("Analyzing the scene..."):
66
- # Save temporarily
67
- temp_path = f"temp_frame_{uuid.uuid4()}.jpg"
68
- pil_img = Image.open(camera_image).convert("RGB")
69
- pil_img.save(temp_path)
70
-
71
- # Simulate URL (in production, you'd upload to cloud storage)
72
- image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
73
-
74
- description = describe_image(image_url)
75
 
76
  st.subheader("📝 Description")
77
  st.write(description)
@@ -82,7 +71,6 @@ if camera_image is not None:
82
  st.audio(audio_bytes, format='audio/mp3')
83
 
84
  # Cleanup
85
- os.remove(temp_path)
86
  os.remove(audio_file)
87
 
88
  st.markdown("---")
 
7
  from gtts import gTTS
8
  import cv2
9
  import numpy as np
10
+ import base64
11
 
12
  # --- Configuration ---
13
  API_KEY = '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'
 
19
 
20
  # --- Helper Functions ---
21
 
22
+ def describe_image(image_bytes):
23
+ # Convert to base64
24
+ base64_image = base64.b64encode(image_bytes).decode('utf-8')
25
+
26
  response = client.chat.completions.create(
 
27
  model="opengvlab/internvl3-14b:free",
28
  messages=[
29
  {
30
  "role": "user",
31
  "content": [
32
  {"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards like wet floors, stairs, obstacles."},
33
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
34
  ]
35
  }
36
  ]
 
44
  tts.save(filename)
45
  return filename
46
 
 
 
 
 
 
 
 
 
 
47
  # --- Streamlit UI ---
48
 
49
  st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
 
57
  st.image(camera_image, caption="Captured Frame", use_column_width=True)
58
 
59
  with st.spinner("Analyzing the scene..."):
60
+ # Read the image bytes directly
61
+ image_bytes = camera_image.getvalue()
62
+
63
+ description = describe_image(image_bytes)
 
 
 
 
 
64
 
65
  st.subheader("📝 Description")
66
  st.write(description)
 
71
  st.audio(audio_bytes, format='audio/mp3')
72
 
73
  # Cleanup
 
74
  os.remove(audio_file)
75
 
76
  st.markdown("---")