Spaces:

awacke1
/

StreamlitSpeechAssistTest

Sleeping

App Files Files Community

awacke1 commited on Dec 3, 2024

Commit

5a813b2

verified ·

1 Parent(s): eb6d723

Create app.py

Browse files

Files changed (1) hide show

app.py +122 -0

app.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import base64
+import cv2
+import openai
+import streamlit as st
+from dotenv import load_dotenv
+from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain.schema.messages import SystemMessage
+from langchain_community.chat_message_histories import ChatMessageHistory
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables.history import RunnableWithMessageHistory
+from langchain_openai import ChatOpenAI
+from pyaudio import PyAudio, paInt16
+from speech_recognition import Microphone, Recognizer, UnknownValueError
+load_dotenv()
+class WebcamStream:
+    def __init__(self):
+        self.stream = cv2.VideoCapture(index=0)
+        _, self.frame = self.stream.read()
+        self.running = False
+    def start(self):
+        self.running = True
+        return self
+    def update(self):
+        while self.running:
+            _, frame = self.stream.read()
+            _, buffer = cv2.imencode(".jpeg", frame)
+            self.frame = base64.b64encode(buffer).decode()
+    def read(self):
+        return self.frame
+    def stop(self):
+        self.running = False
+        self.stream.release()
+class Assistant:
+    def __init__(self, model):
+        self.chain = self._create_inference_chain(model)
+    def answer(self, prompt, image):
+        if not prompt:
+            return
+        print("Prompt:", prompt)
+        response = self.chain.invoke(
+            {"prompt": prompt, "image_base64": image},
+            config={"configurable": {"session_id": "unused"}},
+        ).strip()
+        print("Response:", response)
+        if response:
+            self._tts(response)
+    def _tts(self, response):
+        player = PyAudio().open(format=paInt16, channels=1, rate=24000, output=True)
+        with openai.audio.speech.with_streaming_response.create(
+            model="tts-1",
+            voice="alloy",
+            response_format="pcm",
+            input=response,
+        ) as stream:
+            for chunk in stream.iter_bytes(chunk_size=1024):
+                player.write(chunk)
+    def _create_inference_chain(self, model):
+        SYSTEM_PROMPT = """
+        You are a witty assistant that will use the chat history and the image
+        provided by the user to answer its questions. Your job is to answer
+        questions.
+        Use few words on your answers. Go straight to the point. Do not use any
+        emoticons or emojis.
+        Be friendly and helpful. Show some personality.
+        """
+        prompt_template = ChatPromptTemplate.from_messages(
+            [
+                SystemMessage(content=SYSTEM_PROMPT),
+                MessagesPlaceholder(variable_name="chat_history"),
+                (
+                    "human",
+                    [
+                        {"type": "text", "text": "{prompt}"},
+                        {
+                            "type": "image_url",
+                            "image_url": "data:image/jpeg;base64,{image_base64}",
+                        },
+                    ],
+                ),
+            ]
+        )
+        chain = prompt_template | model | StrOutputParser()
+        chat_message_history = ChatMessageHistory()
+        return RunnableWithMessageHistory(
+            chain,
+            lambda _: chat_message_history,
+            input_messages_key="prompt",
+            history_messages_key="chat_history",
+        )
+def audio_callback(recognizer, audio):
+    try:
+        prompt = recognizer.recognize_whisper(audio, model="base", language="english")
+        assistant.answer(prompt, webcam_stream.read())
+    except UnknownValueError:
+        print("There was an error processing the audio.")
+def main():
+    st.title("AI Assistant")
+    webcam_stream = WebcamStream().