awacke1 commited on
Commit
5a813b2
·
verified ·
1 Parent(s): eb6d723

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import cv2
3
+ import openai
4
+ import streamlit as st
5
+ from dotenv import load_dotenv
6
+ from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
7
+ from langchain.schema.messages import SystemMessage
8
+ from langchain_community.chat_message_histories import ChatMessageHistory
9
+ from langchain_core.output_parsers import StrOutputParser
10
+ from langchain_core.runnables.history import RunnableWithMessageHistory
11
+ from langchain_openai import ChatOpenAI
12
+ from pyaudio import PyAudio, paInt16
13
+ from speech_recognition import Microphone, Recognizer, UnknownValueError
14
+
15
+ load_dotenv()
16
+
17
+ class WebcamStream:
18
+ def __init__(self):
19
+ self.stream = cv2.VideoCapture(index=0)
20
+ _, self.frame = self.stream.read()
21
+ self.running = False
22
+
23
+ def start(self):
24
+ self.running = True
25
+ return self
26
+
27
+ def update(self):
28
+ while self.running:
29
+ _, frame = self.stream.read()
30
+ _, buffer = cv2.imencode(".jpeg", frame)
31
+ self.frame = base64.b64encode(buffer).decode()
32
+
33
+ def read(self):
34
+ return self.frame
35
+
36
+ def stop(self):
37
+ self.running = False
38
+ self.stream.release()
39
+
40
+ class Assistant:
41
+ def __init__(self, model):
42
+ self.chain = self._create_inference_chain(model)
43
+
44
+ def answer(self, prompt, image):
45
+ if not prompt:
46
+ return
47
+
48
+ print("Prompt:", prompt)
49
+
50
+ response = self.chain.invoke(
51
+ {"prompt": prompt, "image_base64": image},
52
+ config={"configurable": {"session_id": "unused"}},
53
+ ).strip()
54
+
55
+ print("Response:", response)
56
+
57
+ if response:
58
+ self._tts(response)
59
+
60
+ def _tts(self, response):
61
+ player = PyAudio().open(format=paInt16, channels=1, rate=24000, output=True)
62
+
63
+ with openai.audio.speech.with_streaming_response.create(
64
+ model="tts-1",
65
+ voice="alloy",
66
+ response_format="pcm",
67
+ input=response,
68
+ ) as stream:
69
+ for chunk in stream.iter_bytes(chunk_size=1024):
70
+ player.write(chunk)
71
+
72
+ def _create_inference_chain(self, model):
73
+ SYSTEM_PROMPT = """
74
+ You are a witty assistant that will use the chat history and the image
75
+ provided by the user to answer its questions. Your job is to answer
76
+ questions.
77
+
78
+ Use few words on your answers. Go straight to the point. Do not use any
79
+ emoticons or emojis.
80
+
81
+ Be friendly and helpful. Show some personality.
82
+ """
83
+
84
+ prompt_template = ChatPromptTemplate.from_messages(
85
+ [
86
+ SystemMessage(content=SYSTEM_PROMPT),
87
+ MessagesPlaceholder(variable_name="chat_history"),
88
+ (
89
+ "human",
90
+ [
91
+ {"type": "text", "text": "{prompt}"},
92
+ {
93
+ "type": "image_url",
94
+ "image_url": "data:image/jpeg;base64,{image_base64}",
95
+ },
96
+ ],
97
+ ),
98
+ ]
99
+ )
100
+
101
+ chain = prompt_template | model | StrOutputParser()
102
+
103
+ chat_message_history = ChatMessageHistory()
104
+ return RunnableWithMessageHistory(
105
+ chain,
106
+ lambda _: chat_message_history,
107
+ input_messages_key="prompt",
108
+ history_messages_key="chat_history",
109
+ )
110
+
111
+ def audio_callback(recognizer, audio):
112
+ try:
113
+ prompt = recognizer.recognize_whisper(audio, model="base", language="english")
114
+ assistant.answer(prompt, webcam_stream.read())
115
+
116
+ except UnknownValueError:
117
+ print("There was an error processing the audio.")
118
+
119
+ def main():
120
+ st.title("AI Assistant")
121
+
122
+ webcam_stream = WebcamStream().