awacke1's picture
Update app.py
0729bfc verified
import base64
import cv2
import streamlit as st
import openai
from threading import Lock, Thread
from cv2 import VideoCapture, imencode
from dotenv import load_dotenv
from playsound import playsound
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.messages import SystemMessage
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_openai import ChatOpenAI
from streamlit_webrtc import webrtc_streamer, VideoTransformerBase
load_dotenv()
class Assistant:
def __init__(self, model):
self.chain = self._create_inference_chain(model)
def answer(self, prompt, image):
if not prompt:
return
print("Prompt:", prompt)
response = self.chain.invoke(
{"prompt": prompt, "image_base64": image},
config={"configurable": {"session_id": "unused"}},
).strip()
print("Response:", response)
if response:
self._tts(response)
def _tts(self, response):
# Simple TTS simulation: Print to the console.
print(f"TTS Response: {response}")
def _create_inference_chain(self, model):
SYSTEM_PROMPT = """
You are a witty assistant that will use the chat history and the image
provided by the user to answer its questions. Your job is to answer
questions.
Use few words on your answers. Go straight to the point. Do not use any
emoticons or emojis.
Be friendly and helpful. Show some personality.
"""
prompt_template = ChatPromptTemplate.from_messages(
[
SystemMessage(content=SYSTEM_PROMPT),
MessagesPlaceholder(variable_name="chat_history"),
("human", [
{"type": "text", "text": "{prompt}"},
{"type": "image_url", "image_url": "data:image/jpeg;base64,{image_base64}"}
]),
]
)
chain = prompt_template | model | StrOutputParser()
chat_message_history = ChatMessageHistory()
return RunnableWithMessageHistory(
chain,
lambda _: chat_message_history,
input_messages_key="prompt",
history_messages_key="chat_history",
)
def main():
st.title("AI Assistant with Webcam Stream")
# model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest")
# You can use OpenAI's GPT-4o model instead of Gemini Flash by uncommenting the following line:
model = ChatOpenAI(model="gpt-4o")
assistant = Assistant(model)
class VideoProcessor(VideoTransformerBase):
def __init__(self):
self.lock = Lock()
self.frame = None
def transform(self, frame):
with self.lock:
self.frame = frame.to_ndarray(format="bgr24")
return frame
def get_base64_image(self):
with self.lock:
if self.frame is not None:
_, buffer = cv2.imencode('.jpeg', self.frame)
return base64.b64encode(buffer).decode('utf-8')
return None
ctx = webrtc_streamer(key="example", video_processor_factory=VideoProcessor)
st.subheader("Ask the Assistant")
prompt = st.text_input("Enter your question:")
if ctx.video_processor:
if st.button("Submit"):
base64_image = ctx.video_processor.get_base64_image()
if prompt and base64_image:
assistant.answer(prompt, base64_image)
else:
st.warning("Please enter a prompt and ensure webcam feed is available.")
if __name__ == "__main__":
main()