awacke1 commited on
Commit
0c09b53
·
verified ·
1 Parent(s): 8772f59

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -0
app.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import base64
3
+ from threading import Lock, Thread
4
+ import cv2
5
+ import openai
6
+ from dotenv import load_dotenv
7
+ from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
8
+ from langchain.schema.messages import SystemMessage
9
+ from langchain_community.chat_message_histories import ChatMessageHistory
10
+ from langchain_core.output_parsers import StrOutputParser
11
+ from langchain_core.runnables.history import RunnableWithMessageHistory
12
+ from langchain_openai import ChatOpenAI
13
+ from langchain_google_genai import ChatGoogleGenerativeAI
14
+ import pyaudio
15
+ import speech_recognition as sr
16
+ import os
17
+ import json
18
+ import random
19
+ import requests
20
+ import time
21
+ import zipfile
22
+ from PIL import Image
23
+ from urllib.parse import quote
24
+
25
+ load_dotenv()
26
+
27
+ class WebcamStream:
28
+ def __init__(self):
29
+ self.stream = cv2.VideoCapture(0)
30
+ _, self.frame = self.stream.read()
31
+ self.running = False
32
+ self.lock = Lock()
33
+
34
+ def start(self):
35
+ if self.running:
36
+ return self
37
+ self.running = True
38
+ self.thread = Thread(target=self.update, args=())
39
+ self.thread.start()
40
+ return self
41
+
42
+ def update(self):
43
+ while self.running:
44
+ _, frame = self.stream.read()
45
+ self.lock.acquire()
46
+ self.frame = frame
47
+ self.lock.release()
48
+
49
+ def read(self, encode=False):
50
+ self.lock.acquire()
51
+ frame = self.frame.copy()
52
+ self.lock.release()
53
+ if encode:
54
+ _, buffer = cv2.imencode(".jpeg", frame)
55
+ return base64.b64encode(buffer)
56
+ return frame
57
+
58
+ def stop(self):
59
+ self.running = False
60
+ if self.thread.is_alive():
61
+ self.thread.join()
62
+
63
+ def __exit__(self, exc_type, exc_value, exc_traceback):
64
+ self.stream.release()
65
+
66
+ class Assistant:
67
+ def __init__(self, model):
68
+ self.chain = self._create_inference_chain(model)
69
+
70
+ def answer(self, prompt, image):
71
+ if not prompt:
72
+ return
73
+ st.write("Prompt:", prompt)
74
+ response = self.chain.invoke(
75
+ {"prompt": prompt, "image_base64": image.decode()},
76
+ config={"configurable": {"session_id": "unused"}},
77
+ ).strip()
78
+ st.write("Response:", response)
79
+ if response:
80
+ self._tts(response)
81
+
82
+ def _tts(self, response):
83
+ player = pyaudio.PyAudio().open(format=pyaudio.paInt16, channels=1, rate=24000, output=True)
84
+ with openai.audio.speech.with_streaming_response.create(
85
+ model="tts-1",
86
+ voice="alloy",
87
+ response_format="pcm",
88
+ input=response,
89
+ ) as stream:
90
+ for chunk in stream.iter_bytes(chunk_size=1024):
91
+ player.write(chunk)
92
+
93
+ def _create_inference_chain(self, model):
94
+ SYSTEM_PROMPT = """
95
+ You are a witty assistant that will use the chat history and the image
96
+ provided by the user to answer its questions.
97
+ Use few words on your answers. Go straight to the point. Do not use any
98
+ emoticons or emojis. Do not ask the user any questions.
99
+ Be friendly and helpful. Show some personality. Do not be too formal.
100
+ """
101
+ prompt_template = ChatPromptTemplate.from_messages([
102
+ SystemMessage(content=SYSTEM_PROMPT),
103
+ MessagesPlaceholder(variable_name="chat_history"),
104
+ ("human", [
105
+ {"type": "text", "text": "{prompt}"},
106
+ {"type": "image_url", "image_url": "data:image/jpeg;base64,{image_base64}"},
107
+ ]),
108
+ ])
109
+ chain = prompt_template | model | StrOutputParser()
110
+ chat_message_history = ChatMessageHistory()
111
+ return RunnableWithMessageHistory(
112
+ chain,
113
+ lambda _: chat_message_history,
114
+ input_messages_key="prompt",
115
+ history_messages_key="chat_history",
116
+ )
117
+
118
+ def main():
119
+ st.title("🚀🌌 Scholarly Article Document Search with Memory")
120
+
121
+ webcam_stream = WebcamStream().start()
122
+ model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest")
123
+ assistant = Assistant(model)
124
+
125
+ # Sidebar
126
+ st.sidebar.title("Options")
127
+ should_save = st.sidebar.checkbox("💾 Save", value=True, help="Save your session data.")
128
+
129
+ # Main content
130
+ query = st.text_input("Enter your search query:")
131
+ if st.button("Search"):
132
+ image = webcam_stream.read(encode=True)
133
+ assistant.answer(query, image)
134
+
135
+ # File upload
136
+ uploaded_file = st.file_uploader("Upload a file for context", type=["txt", "pdf", "docx"])
137
+ if uploaded_file:
138
+ file_contents = uploaded_file.read()
139
+ st.write("File contents:", file_contents)
140
+
141
+ # Display webcam feed
142
+ st.image(webcam_stream.read(), channels="BGR")
143
+
144
+ # Audio recording
145
+ if st.button("Record Audio"):
146
+ r = sr.Recognizer()
147
+ with sr.Microphone() as source:
148
+ st.write("Speak now...")
149
+ audio = r.listen(source)
150
+ st.write("Processing audio...")
151
+ try:
152
+ text = r.recognize_google(audio)
153
+ st.write("You said:", text)
154
+ assistant.answer(text, webcam_stream.read(encode=True))
155
+ except sr.UnknownValueError:
156
+ st.write("Could not understand audio")
157
+ except sr.RequestError as e:
158
+ st.write("Could not request results; {0}".format(e))
159
+
160
+ webcam_stream.stop()
161
+
162
+ if __name__ == "__main__":
163
+ main()