Restoring
Browse files
app.py
CHANGED
@@ -10,140 +10,94 @@ import asyncio
|
|
10 |
from dotenv import load_dotenv
|
11 |
load_dotenv()
|
12 |
|
13 |
-
#
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
st.session_state
|
19 |
-
|
20 |
-
st.session_state
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
# App UI
|
74 |
-
st.title("🎤 Voice AI Assistant")
|
75 |
-
|
76 |
-
# Session init
|
77 |
-
if "conversation" not in st.session_state:
|
78 |
-
st.session_state.conversation = [] # list of (question, answer, audio_filename)
|
79 |
-
if "audio_count" not in st.session_state:
|
80 |
-
st.session_state.audio_count = 1
|
81 |
-
|
82 |
-
status = st.empty()
|
83 |
-
status.markdown("<div class='status-bar'>🎙️ Press mic button or type to ask a question</div>", unsafe_allow_html=True)
|
84 |
-
|
85 |
-
recorded_audio = audio_recorder(sample_rate=8000)
|
86 |
-
text_input = st.chat_input("Type your question here...")
|
87 |
-
|
88 |
-
# ----- INPUT HANDLER -----
|
89 |
-
def handle_input(user_text):
|
90 |
-
status.markdown("<div class='status-bar'>🤖 Thinking...</div>", unsafe_allow_html=True)
|
91 |
-
response = answer(user_text)
|
92 |
-
audio_file = f"output{st.session_state.audio_count}.wav"
|
93 |
-
status.markdown("<div class='status-bar'>🎧 Converting response to audio...</div>", unsafe_allow_html=True)
|
94 |
-
asyncio.run(convert_audio(response, audio_file))
|
95 |
-
st.session_state.audio_count += 1
|
96 |
-
|
97 |
-
st.session_state.conversation.append((f"Q: {user_text}", f"A: {response}", audio_file))
|
98 |
-
status.markdown("<div class='status-bar'>✅ Ask another question...</div>", unsafe_allow_html=True)
|
99 |
-
|
100 |
-
# ----- PROCESS INPUT -----
|
101 |
-
if text_input:
|
102 |
-
handle_input(text_input)
|
103 |
-
elif recorded_audio:
|
104 |
-
status.markdown("<div class='status-bar'>🧠 Transcribing speech...</div>", unsafe_allow_html=True)
|
105 |
-
data_to_file(recorded_audio)
|
106 |
-
transcription = audio_to_text("temp_audio.wav")
|
107 |
-
handle_input(transcription)
|
108 |
-
|
109 |
-
# ----- SHOW CONVERSATION -----
|
110 |
-
if st.session_state.conversation:
|
111 |
-
st.markdown("## 🧾 Conversation History")
|
112 |
-
for i, (q, a, audio_path) in enumerate(st.session_state.conversation):
|
113 |
-
with st.container():
|
114 |
-
st.markdown(f"<div class='conversation-block'>", unsafe_allow_html=True)
|
115 |
-
st.markdown(f"<div class='question'>{q}</div>", unsafe_allow_html=True)
|
116 |
-
st.markdown(f"<div class='answer'>{a}</div>", unsafe_allow_html=True)
|
117 |
-
st.audio(audio_path, format="audio/wav", autoplay=(i == len(st.session_state.conversation)-1))
|
118 |
-
st.markdown("</div>", unsafe_allow_html=True)
|
119 |
-
|
120 |
-
# ----- AUDIO TO TEXT -----
|
121 |
-
def data_to_file(audio_blob):
|
122 |
-
with open("temp_audio.wav", "wb") as f:
|
123 |
-
f.write(audio_blob)
|
124 |
-
|
125 |
-
def audio_to_text(path):
|
126 |
-
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
127 |
-
with open(path, "rb") as f:
|
128 |
transcription = client.audio.translations.create(
|
129 |
-
file=(
|
130 |
model='whisper-large-v3',
|
131 |
)
|
132 |
return transcription.text
|
133 |
|
134 |
-
#
|
135 |
-
def answer(
|
136 |
-
model = ChatGroq(
|
|
|
|
|
|
|
|
|
137 |
prompt = ChatPromptTemplate([
|
138 |
-
("system", "You are
|
139 |
-
("user", "User Query: {question}")
|
140 |
])
|
|
|
141 |
parser = StrOutputParser()
|
142 |
-
chain = prompt | model | parser
|
143 |
-
return chain.invoke({'question': question})
|
144 |
|
145 |
-
|
|
|
|
|
|
|
|
|
146 |
async def convert_audio(text, filename):
|
147 |
voice = "fr-FR-VivienneMultilingualNeural"
|
148 |
communicate = edge_tts.Communicate(text, voice)
|
149 |
await communicate.save(filename)
|
|
|
|
|
|
10 |
from dotenv import load_dotenv
|
11 |
load_dotenv()
|
12 |
|
13 |
+
#Front end using streamlit
|
14 |
+
def frontend():
|
15 |
+
st.title("Voice AI Demo")
|
16 |
+
|
17 |
+
# Initialize session state variables
|
18 |
+
if "conversation" not in st.session_state:
|
19 |
+
st.session_state.conversation = [] # Stores (question, answer, audio_filename)
|
20 |
+
if "audio_count" not in st.session_state:
|
21 |
+
st.session_state.audio_count = 1 # Start numbering audio files from output1.wav
|
22 |
+
|
23 |
+
status_placeholder = st.empty()
|
24 |
+
status_placeholder.write("Press Mic button to start asking a question")
|
25 |
+
|
26 |
+
recorded_audio = audio_recorder(sample_rate=8000)
|
27 |
+
text = st.chat_input()
|
28 |
+
|
29 |
+
def process_input(user_input):
|
30 |
+
status_placeholder.write("Getting response...")
|
31 |
+
response = answer(user_input)
|
32 |
+
status_placeholder.write("Converting response to audio...")
|
33 |
+
|
34 |
+
# Generate unique audio filename
|
35 |
+
audio_filename = f"output{st.session_state.audio_count}.wav"
|
36 |
+
asyncio.run(convert_audio(response, audio_filename))
|
37 |
+
st.session_state.audio_count += 1 # Increment for next response
|
38 |
+
|
39 |
+
status_placeholder.write("Press mic button again to ask more questions")
|
40 |
+
|
41 |
+
# Append (question, answer, audio_filename) to conversation history
|
42 |
+
st.session_state.conversation.append((f"Q: {user_input}", f"A: {response}", audio_filename))
|
43 |
+
|
44 |
+
# Handle user input
|
45 |
+
if text:
|
46 |
+
process_input(text)
|
47 |
+
elif recorded_audio:
|
48 |
+
status_placeholder.write("Converting audio...")
|
49 |
+
data_to_file(recorded_audio)
|
50 |
+
status_placeholder.write("Uploading audio...")
|
51 |
+
transcription = audio_to_text("temp_audio.wav")
|
52 |
+
status_placeholder.write("Transcription completed.")
|
53 |
+
process_input(transcription)
|
54 |
+
|
55 |
+
# Display full conversation history
|
56 |
+
for i, (q, a, audio_file) in enumerate(st.session_state.conversation):
|
57 |
+
st.write(q)
|
58 |
+
st.write(a)
|
59 |
+
st.audio(audio_file, format="audio/wav", loop=False, autoplay=(i == len(st.session_state.conversation) - 1))
|
60 |
+
|
61 |
+
|
62 |
+
#Fuction to convert audio data to audio file
|
63 |
+
def data_to_file(recorded_audio):
|
64 |
+
temp_audio_path = "temp_audio.wav"
|
65 |
+
with open(temp_audio_path, "wb") as temp_file:
|
66 |
+
temp_file.write(recorded_audio)
|
67 |
+
|
68 |
+
|
69 |
+
#Function for audio to text
|
70 |
+
def audio_to_text(audio_path):
|
71 |
+
client = Groq(api_key=os.getenv('GROQ_API_KEY'))
|
72 |
+
with open(audio_path, 'rb') as file:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
transcription = client.audio.translations.create(
|
74 |
+
file=(audio_path, file.read()),
|
75 |
model='whisper-large-v3',
|
76 |
)
|
77 |
return transcription.text
|
78 |
|
79 |
+
#Function for answerig User Query
|
80 |
+
def answer(user_question):
|
81 |
+
model = ChatGroq(
|
82 |
+
model="llama-3.3-70b-versatile",
|
83 |
+
temperature=0.6
|
84 |
+
)
|
85 |
+
|
86 |
prompt = ChatPromptTemplate([
|
87 |
+
("system", "You are super knowlegable AI chat bot which will answer all User Query, answer with confident, also this response will get convert back to speech, so dont make point or anything, but make your answer in para form and dont make it too large, and use proper annotation, comma, full stop, question mark, so that a better text to speach can be genrate back."),
|
88 |
+
("user", "User Query: {question}"),
|
89 |
])
|
90 |
+
|
91 |
parser = StrOutputParser()
|
|
|
|
|
92 |
|
93 |
+
chain = prompt|model|parser
|
94 |
+
answer = chain.invoke({'question': user_question})
|
95 |
+
return answer
|
96 |
+
|
97 |
+
# Audio conversion
|
98 |
async def convert_audio(text, filename):
|
99 |
voice = "fr-FR-VivienneMultilingualNeural"
|
100 |
communicate = edge_tts.Communicate(text, voice)
|
101 |
await communicate.save(filename)
|
102 |
+
|
103 |
+
frontend()
|