|
|
import streamlit as st |
|
|
from audio_recorder_streamlit import audio_recorder |
|
|
from groq import Groq |
|
|
import os |
|
|
from langchain_groq import ChatGroq |
|
|
from langchain_core.output_parsers import StrOutputParser |
|
|
from langchain_core.prompts import ChatPromptTemplate |
|
|
import edge_tts |
|
|
import asyncio |
|
|
from dotenv import load_dotenv |
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Voice AI Assistant", page_icon="π€", layout="centered") |
|
|
|
|
|
|
|
|
if "dark_mode" not in st.session_state: |
|
|
st.session_state.dark_mode = False |
|
|
dm = st.sidebar.checkbox("π Dark Mode", value=st.session_state.dark_mode) |
|
|
st.session_state.dark_mode = dm |
|
|
|
|
|
|
|
|
BG = "#0f1620" if dm else "#f8f9fa" |
|
|
PANEL = "#1c2330" if dm else "#ffffff" |
|
|
TEXT = "#e3e8f1" if dm else "#1a1a1a" |
|
|
CARD = "#2a3240" if dm else "#f1f3f5" |
|
|
ACCENT = "#ff5252" |
|
|
BORDER = "#333" if dm else "#ddd" |
|
|
|
|
|
|
|
|
st.markdown(f""" |
|
|
<style> |
|
|
.stApp {{ |
|
|
background-color: {BG}; |
|
|
color: {TEXT}; |
|
|
}} |
|
|
[data-testid="stSidebar"] {{ |
|
|
background-color: {PANEL}; |
|
|
}} |
|
|
.block-container {{ |
|
|
padding-top: 2rem; |
|
|
padding-bottom: 2rem; |
|
|
}} |
|
|
h1, h2, h3, h4 {{ |
|
|
color: {TEXT}; |
|
|
}} |
|
|
.conversation-block {{ |
|
|
background-color: {CARD}; |
|
|
padding: 1rem; |
|
|
border-radius: 8px; |
|
|
margin-bottom: 1rem; |
|
|
border: 1px solid {BORDER}; |
|
|
}} |
|
|
.question {{ |
|
|
font-weight: bold; |
|
|
color: {ACCENT}; |
|
|
}} |
|
|
.answer {{ |
|
|
margin-top: 0.5rem; |
|
|
color: {TEXT}; |
|
|
}} |
|
|
.audio-player {{ |
|
|
margin-top: 0.5rem; |
|
|
}} |
|
|
.status-bar {{ |
|
|
font-style: italic; |
|
|
color: {TEXT}AA; |
|
|
margin-bottom: 1rem; |
|
|
}} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.title("π€ Voice AI Assistant") |
|
|
|
|
|
|
|
|
if "conversation" not in st.session_state: |
|
|
st.session_state.conversation = [] |
|
|
if "audio_count" not in st.session_state: |
|
|
st.session_state.audio_count = 1 |
|
|
|
|
|
status = st.empty() |
|
|
status.markdown("<div class='status-bar'>ποΈ Press mic button or type to ask a question</div>", unsafe_allow_html=True) |
|
|
|
|
|
recorded_audio = audio_recorder(sample_rate=8000) |
|
|
text_input = st.chat_input("Type your question here...") |
|
|
|
|
|
|
|
|
def handle_input(user_text): |
|
|
status.markdown("<div class='status-bar'>π€ Thinking...</div>", unsafe_allow_html=True) |
|
|
response = answer(user_text) |
|
|
audio_file = f"output{st.session_state.audio_count}.wav" |
|
|
status.markdown("<div class='status-bar'>π§ Converting response to audio...</div>", unsafe_allow_html=True) |
|
|
asyncio.run(convert_audio(response, audio_file)) |
|
|
st.session_state.audio_count += 1 |
|
|
|
|
|
st.session_state.conversation.append((f"Q: {user_text}", f"A: {response}", audio_file)) |
|
|
status.markdown("<div class='status-bar'>β
Ask another question...</div>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
if text_input: |
|
|
handle_input(text_input) |
|
|
elif recorded_audio: |
|
|
status.markdown("<div class='status-bar'>π§ Transcribing speech...</div>", unsafe_allow_html=True) |
|
|
data_to_file(recorded_audio) |
|
|
transcription = audio_to_text("temp_audio.wav") |
|
|
handle_input(transcription) |
|
|
|
|
|
|
|
|
if st.session_state.conversation: |
|
|
st.markdown("## π§Ύ Conversation History") |
|
|
for i, (q, a, audio_path) in enumerate(st.session_state.conversation): |
|
|
with st.container(): |
|
|
st.markdown(f"<div class='conversation-block'>", unsafe_allow_html=True) |
|
|
st.markdown(f"<div class='question'>{q}</div>", unsafe_allow_html=True) |
|
|
st.markdown(f"<div class='answer'>{a}</div>", unsafe_allow_html=True) |
|
|
st.audio(audio_path, format="audio/wav", autoplay=(i == len(st.session_state.conversation)-1)) |
|
|
st.markdown("</div>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
def data_to_file(audio_blob): |
|
|
with open("temp_audio.wav", "wb") as f: |
|
|
f.write(audio_blob) |
|
|
|
|
|
def audio_to_text(path): |
|
|
client = Groq(api_key=os.getenv("GROQ_API_KEY")) |
|
|
with open(path, "rb") as f: |
|
|
transcription = client.audio.translations.create( |
|
|
file=(path, f.read()), |
|
|
model='whisper-large-v3', |
|
|
) |
|
|
return transcription.text |
|
|
|
|
|
|
|
|
def answer(question): |
|
|
model = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.6) |
|
|
prompt = ChatPromptTemplate([ |
|
|
("system", "You are a knowledgeable AI assistant. Keep answers clear, brief, and well-punctuated for speech conversion."), |
|
|
("user", "User Query: {question}") |
|
|
]) |
|
|
parser = StrOutputParser() |
|
|
chain = prompt | model | parser |
|
|
return chain.invoke({'question': question}) |
|
|
|
|
|
|
|
|
async def convert_audio(text, filename): |
|
|
voice = "fr-FR-VivienneMultilingualNeural" |
|
|
communicate = edge_tts.Communicate(text, voice) |
|
|
await communicate.save(filename) |
|
|
|