awacke1 commited on
Commit
8c6351b
·
verified ·
1 Parent(s): 5876c46

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -19
app.py CHANGED
@@ -1,7 +1,10 @@
1
  import base64
 
 
2
  import cv2
3
  import openai
4
  import streamlit as st
 
5
  from dotenv import load_dotenv
6
  from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
7
  from langchain.schema.messages import SystemMessage
@@ -14,29 +17,52 @@ from speech_recognition import Microphone, Recognizer, UnknownValueError
14
 
15
  load_dotenv()
16
 
 
17
  class WebcamStream:
18
  def __init__(self):
19
- self.stream = cv2.VideoCapture(index=0)
20
  _, self.frame = self.stream.read()
21
  self.running = False
 
22
 
23
  def start(self):
 
 
 
24
  self.running = True
 
 
 
25
  return self
26
 
27
  def update(self):
28
  while self.running:
29
  _, frame = self.stream.read()
30
- _, buffer = cv2.imencode(".jpeg", frame)
31
- self.frame = base64.b64encode(buffer).decode()
32
 
33
- def read(self):
34
- return self.frame
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  def stop(self):
37
  self.running = False
 
 
 
 
38
  self.stream.release()
39
 
 
40
  class Assistant:
41
  def __init__(self, model):
42
  self.chain = self._create_inference_chain(model)
@@ -45,15 +71,11 @@ class Assistant:
45
  if not prompt:
46
  return
47
 
48
- print("Prompt:", prompt)
49
-
50
  response = self.chain.invoke(
51
- {"prompt": prompt, "image_base64": image},
52
  config={"configurable": {"session_id": "unused"}},
53
  ).strip()
54
 
55
- print("Response:", response)
56
-
57
  if response:
58
  self._tts(response)
59
 
@@ -108,15 +130,47 @@ class Assistant:
108
  history_messages_key="chat_history",
109
  )
110
 
111
- def audio_callback(recognizer, audio):
112
- try:
113
- prompt = recognizer.recognize_whisper(audio, model="base", language="english")
114
- assistant.answer(prompt, webcam_stream.read())
115
-
116
- except UnknownValueError:
117
- print("There was an error processing the audio.")
118
 
119
  def main():
120
- st.title("AI Assistant")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
- webcam_stream = WebcamStream()
 
 
1
  import base64
2
+ from threading import Lock, Thread
3
+
4
  import cv2
5
  import openai
6
  import streamlit as st
7
+ from cv2 import VideoCapture, imencode
8
  from dotenv import load_dotenv
9
  from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
10
  from langchain.schema.messages import SystemMessage
 
17
 
18
  load_dotenv()
19
 
20
+
21
  class WebcamStream:
22
  def __init__(self):
23
+ self.stream = VideoCapture(index=0)
24
  _, self.frame = self.stream.read()
25
  self.running = False
26
+ self.lock = Lock()
27
 
28
  def start(self):
29
+ if self.running:
30
+ return self
31
+
32
  self.running = True
33
+
34
+ self.thread = Thread(target=self.update, args=())
35
+ self.thread.start()
36
  return self
37
 
38
  def update(self):
39
  while self.running:
40
  _, frame = self.stream.read()
 
 
41
 
42
+ self.lock.acquire()
43
+ self.frame = frame
44
+ self.lock.release()
45
+
46
+ def read(self, encode=False):
47
+ self.lock.acquire()
48
+ frame = self.frame.copy()
49
+ self.lock.release()
50
+
51
+ if encode:
52
+ _, buffer = imencode(".jpeg", frame)
53
+ return base64.b64encode(buffer)
54
+
55
+ return frame
56
 
57
  def stop(self):
58
  self.running = False
59
+ if self.thread.is_alive():
60
+ self.thread.join()
61
+
62
+ def __exit__(self, exc_type, exc_value, exc_traceback):
63
  self.stream.release()
64
 
65
+
66
  class Assistant:
67
  def __init__(self, model):
68
  self.chain = self._create_inference_chain(model)
 
71
  if not prompt:
72
  return
73
 
 
 
74
  response = self.chain.invoke(
75
+ {"prompt": prompt, "image_base64": image.decode()},
76
  config={"configurable": {"session_id": "unused"}},
77
  ).strip()
78
 
 
 
79
  if response:
80
  self._tts(response)
81
 
 
130
  history_messages_key="chat_history",
131
  )
132
 
 
 
 
 
 
 
 
133
 
134
  def main():
135
+ st.title("AI Assistant with Webcam Stream")
136
+
137
+ # Instantiate Webcam Stream and start it
138
+ webcam_stream = WebcamStream().start()
139
+
140
+ # model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest")
141
+ # You can use OpenAI's GPT-4o model instead of Gemini Flash by uncommenting the following line:
142
+ model = ChatOpenAI(model="gpt-4o")
143
+ assistant = Assistant(model)
144
+
145
+ # UI for webcam feed
146
+ st_subtitle("Webcam Feed")
147
+
148
+ def run_webcam():
149
+ while True:
150
+ frame = webcam_stream.read()
151
+ _, buffer = cv2.imencode('.jpg', frame)
152
+ frame_data = base64.b64encode(buffer).decode('utf-8')
153
+
154
+ # Display frame in Streamlit app
155
+ st.image(f"data:image/jpeg;base64,{frame_data}", use_column_width=True)
156
+ st.experimental_rerun()
157
+
158
+ webcam_thread = Thread(target=run_webcam)
159
+ webcam_thread.start()
160
+
161
+ st.subheader("Ask the Assistant")
162
+
163
+ prompt = st.text_input("Enter your question:")
164
+
165
+ if st.button("Submit"):
166
+ if prompt:
167
+ assistant.answer(prompt, webcam_stream.read(encode=True))
168
+ else:
169
+ st.warning("Please enter a prompt to submit.")
170
+
171
+ if st.button("Stop Webcam"):
172
+ webcam_stream.stop()
173
+ cv2.destroyAllWindows()
174
 
175
+ if __name__ == "__main__":
176
+ main()