voice_ai / app.py
vsj0702's picture
Makeup for my application
9e8ba15 verified
raw
history blame
4.94 kB
import streamlit as st
from audio_recorder_streamlit import audio_recorder
from groq import Groq
import os
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
import edge_tts
import asyncio
from dotenv import load_dotenv
load_dotenv()
# Page config
st.set_page_config(page_title="Voice AI Assistant", page_icon="🎀", layout="centered")
# Theme toggle
if "dark_mode" not in st.session_state:
st.session_state.dark_mode = False # default: light mode
dm = st.sidebar.checkbox("πŸŒ™ Dark Mode", value=st.session_state.dark_mode)
st.session_state.dark_mode = dm
# Theme colors
BG = "#0f1620" if dm else "#f8f9fa"
PANEL = "#1c2330" if dm else "#ffffff"
TEXT = "#e3e8f1" if dm else "#1a1a1a"
CARD = "#2a3240" if dm else "#f1f3f5"
ACCENT = "#ff5252"
BORDER = "#333" if dm else "#ddd"
# Custom CSS
st.markdown(f"""
<style>
.stApp {{
background-color: {BG};
color: {TEXT};
}}
[data-testid="stSidebar"] {{
background-color: {PANEL};
}}
.block-container {{
padding-top: 2rem;
padding-bottom: 2rem;
}}
h1, h2, h3, h4 {{
color: {TEXT};
}}
.conversation-block {{
background-color: {CARD};
padding: 1rem;
border-radius: 8px;
margin-bottom: 1rem;
border: 1px solid {BORDER};
}}
.question {{
font-weight: bold;
color: {ACCENT};
}}
.answer {{
margin-top: 0.5rem;
color: {TEXT};
}}
.audio-player {{
margin-top: 0.5rem;
}}
.status-bar {{
font-style: italic;
color: {TEXT}AA;
margin-bottom: 1rem;
}}
</style>
""", unsafe_allow_html=True)
# App UI
st.title("🎀 Voice AI Assistant")
# Session init
if "conversation" not in st.session_state:
st.session_state.conversation = [] # list of (question, answer, audio_filename)
if "audio_count" not in st.session_state:
st.session_state.audio_count = 1
status = st.empty()
status.markdown("<div class='status-bar'>πŸŽ™οΈ Press mic button or type to ask a question</div>", unsafe_allow_html=True)
recorded_audio = audio_recorder(sample_rate=8000)
text_input = st.chat_input("Type your question here...")
# ----- INPUT HANDLER -----
def handle_input(user_text):
status.markdown("<div class='status-bar'>πŸ€– Thinking...</div>", unsafe_allow_html=True)
response = answer(user_text)
audio_file = f"output{st.session_state.audio_count}.wav"
status.markdown("<div class='status-bar'>🎧 Converting response to audio...</div>", unsafe_allow_html=True)
asyncio.run(convert_audio(response, audio_file))
st.session_state.audio_count += 1
st.session_state.conversation.append((f"Q: {user_text}", f"A: {response}", audio_file))
status.markdown("<div class='status-bar'>βœ… Ask another question...</div>", unsafe_allow_html=True)
# ----- PROCESS INPUT -----
if text_input:
handle_input(text_input)
elif recorded_audio:
status.markdown("<div class='status-bar'>🧠 Transcribing speech...</div>", unsafe_allow_html=True)
data_to_file(recorded_audio)
transcription = audio_to_text("temp_audio.wav")
handle_input(transcription)
# ----- SHOW CONVERSATION -----
if st.session_state.conversation:
st.markdown("## 🧾 Conversation History")
for i, (q, a, audio_path) in enumerate(st.session_state.conversation):
with st.container():
st.markdown(f"<div class='conversation-block'>", unsafe_allow_html=True)
st.markdown(f"<div class='question'>{q}</div>", unsafe_allow_html=True)
st.markdown(f"<div class='answer'>{a}</div>", unsafe_allow_html=True)
st.audio(audio_path, format="audio/wav", autoplay=(i == len(st.session_state.conversation)-1))
st.markdown("</div>", unsafe_allow_html=True)
# ----- AUDIO TO TEXT -----
def data_to_file(audio_blob):
with open("temp_audio.wav", "wb") as f:
f.write(audio_blob)
def audio_to_text(path):
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
with open(path, "rb") as f:
transcription = client.audio.translations.create(
file=(path, f.read()),
model='whisper-large-v3',
)
return transcription.text
# ----- LLM ANSWER -----
def answer(question):
model = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.6)
prompt = ChatPromptTemplate([
("system", "You are a knowledgeable AI assistant. Keep answers clear, brief, and well-punctuated for speech conversion."),
("user", "User Query: {question}")
])
parser = StrOutputParser()
chain = prompt | model | parser
return chain.invoke({'question': question})
# ----- TEXT TO AUDIO -----
async def convert_audio(text, filename):
voice = "fr-FR-VivienneMultilingualNeural"
communicate = edge_tts.Communicate(text, voice)
await communicate.save(filename)