awacke1 commited on
Commit
1cc6fa9
·
verified ·
1 Parent(s): 8c6351b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -148
app.py CHANGED
@@ -1,10 +1,6 @@
1
  import base64
2
- from threading import Lock, Thread
3
-
4
  import cv2
5
  import openai
6
- import streamlit as st
7
- from cv2 import VideoCapture, imencode
8
  from dotenv import load_dotenv
9
  from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
10
  from langchain.schema.messages import SystemMessage
@@ -12,165 +8,38 @@ from langchain_community.chat_message_histories import ChatMessageHistory
12
  from langchain_core.output_parsers import StrOutputParser
13
  from langchain_core.runnables.history import RunnableWithMessageHistory
14
  from langchain_openai import ChatOpenAI
15
- from pyaudio import PyAudio, paInt16
16
- from speech_recognition import Microphone, Recognizer, UnknownValueError
17
 
18
  load_dotenv()
19
 
20
-
21
- class WebcamStream:
22
  def __init__(self):
23
- self.stream = VideoCapture(index=0)
24
- _, self.frame = self.stream.read()
25
- self.running = False
26
- self.lock = Lock()
27
-
28
- def start(self):
29
- if self.running:
30
- return self
31
-
32
- self.running = True
33
-
34
- self.thread = Thread(target=self.update, args=())
35
- self.thread.start()
36
- return self
37
-
38
- def update(self):
39
- while self.running:
40
- _, frame = self.stream.read()
41
-
42
- self.lock.acquire()
43
- self.frame = frame
44
- self.lock.release()
45
-
46
- def read(self, encode=False):
47
- self.lock.acquire()
48
- frame = self.frame.copy()
49
- self.lock.release()
50
-
51
- if encode:
52
- _, buffer = imencode(".jpeg", frame)
53
- return base64.b64encode(buffer)
54
-
55
- return frame
56
-
57
- def stop(self):
58
- self.running = False
59
- if self.thread.is_alive():
60
- self.thread.join()
61
-
62
- def __exit__(self, exc_type, exc_value, exc_traceback):
63
- self.stream.release()
64
 
 
 
 
 
65
 
66
  class Assistant:
67
- def __init__(self, model):
68
- self.chain = self._create_inference_chain(model)
69
-
70
- def answer(self, prompt, image):
71
- if not prompt:
72
- return
73
-
74
- response = self.chain.invoke(
75
- {"prompt": prompt, "image_base64": image.decode()},
76
- config={"configurable": {"session_id": "unused"}},
77
- ).strip()
78
-
79
- if response:
80
- self._tts(response)
81
-
82
- def _tts(self, response):
83
- player = PyAudio().open(format=paInt16, channels=1, rate=24000, output=True)
84
-
85
- with openai.audio.speech.with_streaming_response.create(
86
- model="tts-1",
87
- voice="alloy",
88
- response_format="pcm",
89
- input=response,
90
- ) as stream:
91
- for chunk in stream.iter_bytes(chunk_size=1024):
92
- player.write(chunk)
93
-
94
- def _create_inference_chain(self, model):
95
- SYSTEM_PROMPT = """
96
- You are a witty assistant that will use the chat history and the image
97
- provided by the user to answer its questions. Your job is to answer
98
- questions.
99
-
100
- Use few words on your answers. Go straight to the point. Do not use any
101
- emoticons or emojis.
102
-
103
- Be friendly and helpful. Show some personality.
104
- """
105
-
106
- prompt_template = ChatPromptTemplate.from_messages(
107
- [
108
- SystemMessage(content=SYSTEM_PROMPT),
109
- MessagesPlaceholder(variable_name="chat_history"),
110
- (
111
- "human",
112
- [
113
- {"type": "text", "text": "{prompt}"},
114
- {
115
- "type": "image_url",
116
- "image_url": "data:image/jpeg;base64,{image_base64}",
117
- },
118
- ],
119
- ),
120
- ]
121
- )
122
-
123
- chain = prompt_template | model | StrOutputParser()
124
-
125
- chat_message_history = ChatMessageHistory()
126
- return RunnableWithMessageHistory(
127
- chain,
128
- lambda _: chat_message_history,
129
- input_messages_key="prompt",
130
- history_messages_key="chat_history",
131
- )
132
-
133
 
134
  def main():
135
- st.title("AI Assistant with Webcam Stream")
136
-
137
- # Instantiate Webcam Stream and start it
138
- webcam_stream = WebcamStream().start()
139
 
140
  # model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest")
141
- # You can use OpenAI's GPT-4o model instead of Gemini Flash by uncommenting the following line:
142
- model = ChatOpenAI(model="gpt-4o")
143
- assistant = Assistant(model)
144
 
145
- # UI for webcam feed
146
- st_subtitle("Webcam Feed")
147
 
148
- def run_webcam():
149
  while True:
150
- frame = webcam_stream.read()
151
- _, buffer = cv2.imencode('.jpg', frame)
152
- frame_data = base64.b64encode(buffer).decode('utf-8')
153
-
154
- # Display frame in Streamlit app
155
- st.image(f"data:image/jpeg;base64,{frame_data}", use_column_width=True)
156
- st.experimental_rerun()
157
-
158
- webcam_thread = Thread(target=run_webcam)
159
- webcam_thread.start()
160
-
161
- st.subheader("Ask the Assistant")
162
-
163
- prompt = st.text_input("Enter your question:")
164
-
165
- if st.button("Submit"):
166
- if prompt:
167
- assistant.answer(prompt, webcam_stream.read(encode=True))
168
- else:
169
- st.warning("Please enter a prompt to submit.")
170
 
171
- if st.button("Stop Webcam"):
172
- webcam_stream.stop()
173
- cv2.destroyAllWindows()
174
 
175
  if __name__ == "__main__":
176
  main()
 
1
  import base64
 
 
2
  import cv2
3
  import openai
 
 
4
  from dotenv import load_dotenv
5
  from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
6
  from langchain.schema.messages import SystemMessage
 
8
  from langchain_core.output_parsers import StrOutputParser
9
  from langchain_core.runnables.history import RunnableWithMessageHistory
10
  from langchain_openai import ChatOpenAI
11
+ import streamlit as st
 
12
 
13
  load_dotenv()
14
 
15
+ class VideoStream:
 
16
  def __init__(self):
17
+ self.cap = cv2.VideoCapture(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ def get_frame(self):
20
+ ret, frame = self.cap.read()
21
+ encoded_image = cv2.imencode('.jpg', frame)[1].tobytes()
22
+ return encoded_image
23
 
24
  class Assistant:
25
+ # ... (Same code as before for the Assistant class)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  def main():
28
+ st.title("AI Assistant App")
29
+ video_stream = VideoStream()
 
 
30
 
31
  # model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest")
32
+ model = ChatOpenAI(model="gpt-4o") # Using OpenAI's GPT-4 model
 
 
33
 
34
+ assistant = Assistant(model)
 
35
 
36
+ if st.button("Start"):
37
  while True:
38
+ frame = video_stream.get_frame()
39
+ st.image(frame, channels="BGR")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ # Add code to capture audio input and process the response here
42
+ # ...
 
43
 
44
  if __name__ == "__main__":
45
  main()