Spaces:

awacke1
/

StreamlitSpeechAssistTest

Sleeping

App Files Files Community

StreamlitSpeechAssistTest / app.py

awacke1

Create app.py

5a813b2 verified 7 months ago

raw

history blame

3.74 kB

	import base64
	import cv2
	import openai
	import streamlit as st
	from dotenv import load_dotenv
	from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
	from langchain.schema.messages import SystemMessage
	from langchain_community.chat_message_histories import ChatMessageHistory
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables.history import RunnableWithMessageHistory
	from langchain_openai import ChatOpenAI
	from pyaudio import PyAudio, paInt16
	from speech_recognition import Microphone, Recognizer, UnknownValueError

	load_dotenv()

	class WebcamStream:
	def __init__(self):
	self.stream = cv2.VideoCapture(index=0)
	_, self.frame = self.stream.read()
	self.running = False

	def start(self):
	self.running = True
	return self

	def update(self):
	while self.running:
	_, frame = self.stream.read()
	_, buffer = cv2.imencode(".jpeg", frame)
	self.frame = base64.b64encode(buffer).decode()

	def read(self):
	return self.frame

	def stop(self):
	self.running = False
	self.stream.release()

	class Assistant:
	def __init__(self, model):
	self.chain = self._create_inference_chain(model)

	def answer(self, prompt, image):
	if not prompt:
	return

	print("Prompt:", prompt)

	response = self.chain.invoke(
	{"prompt": prompt, "image_base64": image},
	config={"configurable": {"session_id": "unused"}},
	).strip()

	print("Response:", response)

	if response:
	self._tts(response)

	def _tts(self, response):
	player = PyAudio().open(format=paInt16, channels=1, rate=24000, output=True)

	with openai.audio.speech.with_streaming_response.create(
	model="tts-1",
	voice="alloy",
	response_format="pcm",
	input=response,
	) as stream:
	for chunk in stream.iter_bytes(chunk_size=1024):
	player.write(chunk)

	def _create_inference_chain(self, model):
	SYSTEM_PROMPT = """
	You are a witty assistant that will use the chat history and the image
	provided by the user to answer its questions. Your job is to answer
	questions.

	Use few words on your answers. Go straight to the point. Do not use any
	emoticons or emojis.

	Be friendly and helpful. Show some personality.
	"""

	prompt_template = ChatPromptTemplate.from_messages(
	[
	SystemMessage(content=SYSTEM_PROMPT),
	MessagesPlaceholder(variable_name="chat_history"),
	(
	"human",
	[
	{"type": "text", "text": "{prompt}"},
	{
	"type": "image_url",
	"image_url": "data:image/jpeg;base64,{image_base64}",
	},
	],
	),
	]
	)

	chain = prompt_template \| model \| StrOutputParser()

	chat_message_history = ChatMessageHistory()
	return RunnableWithMessageHistory(
	chain,
	lambda _: chat_message_history,
	input_messages_key="prompt",
	history_messages_key="chat_history",
	)

	def audio_callback(recognizer, audio):
	try:
	prompt = recognizer.recognize_whisper(audio, model="base", language="english")
	assistant.answer(prompt, webcam_stream.read())

	except UnknownValueError:
	print("There was an error processing the audio.")

	def main():
	st.title("AI Assistant")

	webcam_stream = WebcamStream().