vsj0702 commited on
Commit
9e8ba15
Β·
verified Β·
1 Parent(s): 5c63c06

Makeup for my application

Browse files
Files changed (1) hide show
  1. app.py +124 -78
app.py CHANGED
@@ -10,94 +10,140 @@ import asyncio
10
  from dotenv import load_dotenv
11
  load_dotenv()
12
 
13
- #Front end using streamlit
14
- def frontend():
15
- st.title("Voice AI Demo")
16
-
17
- # Initialize session state variables
18
- if "conversation" not in st.session_state:
19
- st.session_state.conversation = [] # Stores (question, answer, audio_filename)
20
- if "audio_count" not in st.session_state:
21
- st.session_state.audio_count = 1 # Start numbering audio files from output1.wav
22
-
23
- status_placeholder = st.empty()
24
- status_placeholder.write("Press Mic button to start asking a question")
25
-
26
- recorded_audio = audio_recorder(sample_rate=8000)
27
- text = st.chat_input()
28
-
29
- def process_input(user_input):
30
- status_placeholder.write("Getting response...")
31
- response = answer(user_input)
32
- status_placeholder.write("Converting response to audio...")
33
-
34
- # Generate unique audio filename
35
- audio_filename = f"output{st.session_state.audio_count}.wav"
36
- asyncio.run(convert_audio(response, audio_filename))
37
- st.session_state.audio_count += 1 # Increment for next response
38
-
39
- status_placeholder.write("Press mic button again to ask more questions")
40
-
41
- # Append (question, answer, audio_filename) to conversation history
42
- st.session_state.conversation.append((f"Q: {user_input}", f"A: {response}", audio_filename))
43
-
44
- # Handle user input
45
- if text:
46
- process_input(text)
47
- elif recorded_audio:
48
- status_placeholder.write("Converting audio...")
49
- data_to_file(recorded_audio)
50
- status_placeholder.write("Uploading audio...")
51
- transcription = audio_to_text("temp_audio.wav")
52
- status_placeholder.write("Transcription completed.")
53
- process_input(transcription)
54
-
55
- # Display full conversation history
56
- for i, (q, a, audio_file) in enumerate(st.session_state.conversation):
57
- st.write(q)
58
- st.write(a)
59
- st.audio(audio_file, format="audio/wav", loop=False, autoplay=(i == len(st.session_state.conversation) - 1))
60
-
61
-
62
- #Fuction to convert audio data to audio file
63
- def data_to_file(recorded_audio):
64
- temp_audio_path = "temp_audio.wav"
65
- with open(temp_audio_path, "wb") as temp_file:
66
- temp_file.write(recorded_audio)
67
-
68
-
69
- #Function for audio to text
70
- def audio_to_text(audio_path):
71
- client = Groq(api_key=os.getenv('GROQ_API_KEY'))
72
- with open(audio_path, 'rb') as file:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  transcription = client.audio.translations.create(
74
- file=(audio_path, file.read()),
75
  model='whisper-large-v3',
76
  )
77
  return transcription.text
78
 
79
- #Function for answerig User Query
80
- def answer(user_question):
81
- model = ChatGroq(
82
- model="llama-3.3-70b-versatile",
83
- temperature=0.6
84
- )
85
-
86
  prompt = ChatPromptTemplate([
87
- ("system", "You are super knowlegable AI chat bot which will answer all User Query, answer with confident, also this response will get convert back to speech, so dont make point or anything, but make your answer in para form and dont make it too large, and use proper annotation, comma, full stop, question mark, so that a better text to speach can be genrate back."),
88
- ("user", "User Query: {question}"),
89
  ])
90
-
91
  parser = StrOutputParser()
 
 
92
 
93
- chain = prompt|model|parser
94
- answer = chain.invoke({'question': user_question})
95
- return answer
96
-
97
- # Audio conversion
98
  async def convert_audio(text, filename):
99
  voice = "fr-FR-VivienneMultilingualNeural"
100
  communicate = edge_tts.Communicate(text, voice)
101
  await communicate.save(filename)
102
-
103
- frontend()
 
10
  from dotenv import load_dotenv
11
  load_dotenv()
12
 
13
+ # Page config
14
+ st.set_page_config(page_title="Voice AI Assistant", page_icon="🎀", layout="centered")
15
+
16
+ # Theme toggle
17
+ if "dark_mode" not in st.session_state:
18
+ st.session_state.dark_mode = False # default: light mode
19
+ dm = st.sidebar.checkbox("πŸŒ™ Dark Mode", value=st.session_state.dark_mode)
20
+ st.session_state.dark_mode = dm
21
+
22
+ # Theme colors
23
+ BG = "#0f1620" if dm else "#f8f9fa"
24
+ PANEL = "#1c2330" if dm else "#ffffff"
25
+ TEXT = "#e3e8f1" if dm else "#1a1a1a"
26
+ CARD = "#2a3240" if dm else "#f1f3f5"
27
+ ACCENT = "#ff5252"
28
+ BORDER = "#333" if dm else "#ddd"
29
+
30
+ # Custom CSS
31
+ st.markdown(f"""
32
+ <style>
33
+ .stApp {{
34
+ background-color: {BG};
35
+ color: {TEXT};
36
+ }}
37
+ [data-testid="stSidebar"] {{
38
+ background-color: {PANEL};
39
+ }}
40
+ .block-container {{
41
+ padding-top: 2rem;
42
+ padding-bottom: 2rem;
43
+ }}
44
+ h1, h2, h3, h4 {{
45
+ color: {TEXT};
46
+ }}
47
+ .conversation-block {{
48
+ background-color: {CARD};
49
+ padding: 1rem;
50
+ border-radius: 8px;
51
+ margin-bottom: 1rem;
52
+ border: 1px solid {BORDER};
53
+ }}
54
+ .question {{
55
+ font-weight: bold;
56
+ color: {ACCENT};
57
+ }}
58
+ .answer {{
59
+ margin-top: 0.5rem;
60
+ color: {TEXT};
61
+ }}
62
+ .audio-player {{
63
+ margin-top: 0.5rem;
64
+ }}
65
+ .status-bar {{
66
+ font-style: italic;
67
+ color: {TEXT}AA;
68
+ margin-bottom: 1rem;
69
+ }}
70
+ </style>
71
+ """, unsafe_allow_html=True)
72
+
73
+ # App UI
74
+ st.title("🎀 Voice AI Assistant")
75
+
76
+ # Session init
77
+ if "conversation" not in st.session_state:
78
+ st.session_state.conversation = [] # list of (question, answer, audio_filename)
79
+ if "audio_count" not in st.session_state:
80
+ st.session_state.audio_count = 1
81
+
82
+ status = st.empty()
83
+ status.markdown("<div class='status-bar'>πŸŽ™οΈ Press mic button or type to ask a question</div>", unsafe_allow_html=True)
84
+
85
+ recorded_audio = audio_recorder(sample_rate=8000)
86
+ text_input = st.chat_input("Type your question here...")
87
+
88
+ # ----- INPUT HANDLER -----
89
+ def handle_input(user_text):
90
+ status.markdown("<div class='status-bar'>πŸ€– Thinking...</div>", unsafe_allow_html=True)
91
+ response = answer(user_text)
92
+ audio_file = f"output{st.session_state.audio_count}.wav"
93
+ status.markdown("<div class='status-bar'>🎧 Converting response to audio...</div>", unsafe_allow_html=True)
94
+ asyncio.run(convert_audio(response, audio_file))
95
+ st.session_state.audio_count += 1
96
+
97
+ st.session_state.conversation.append((f"Q: {user_text}", f"A: {response}", audio_file))
98
+ status.markdown("<div class='status-bar'>βœ… Ask another question...</div>", unsafe_allow_html=True)
99
+
100
+ # ----- PROCESS INPUT -----
101
+ if text_input:
102
+ handle_input(text_input)
103
+ elif recorded_audio:
104
+ status.markdown("<div class='status-bar'>🧠 Transcribing speech...</div>", unsafe_allow_html=True)
105
+ data_to_file(recorded_audio)
106
+ transcription = audio_to_text("temp_audio.wav")
107
+ handle_input(transcription)
108
+
109
+ # ----- SHOW CONVERSATION -----
110
+ if st.session_state.conversation:
111
+ st.markdown("## 🧾 Conversation History")
112
+ for i, (q, a, audio_path) in enumerate(st.session_state.conversation):
113
+ with st.container():
114
+ st.markdown(f"<div class='conversation-block'>", unsafe_allow_html=True)
115
+ st.markdown(f"<div class='question'>{q}</div>", unsafe_allow_html=True)
116
+ st.markdown(f"<div class='answer'>{a}</div>", unsafe_allow_html=True)
117
+ st.audio(audio_path, format="audio/wav", autoplay=(i == len(st.session_state.conversation)-1))
118
+ st.markdown("</div>", unsafe_allow_html=True)
119
+
120
+ # ----- AUDIO TO TEXT -----
121
+ def data_to_file(audio_blob):
122
+ with open("temp_audio.wav", "wb") as f:
123
+ f.write(audio_blob)
124
+
125
+ def audio_to_text(path):
126
+ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
127
+ with open(path, "rb") as f:
128
  transcription = client.audio.translations.create(
129
+ file=(path, f.read()),
130
  model='whisper-large-v3',
131
  )
132
  return transcription.text
133
 
134
+ # ----- LLM ANSWER -----
135
+ def answer(question):
136
+ model = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.6)
 
 
 
 
137
  prompt = ChatPromptTemplate([
138
+ ("system", "You are a knowledgeable AI assistant. Keep answers clear, brief, and well-punctuated for speech conversion."),
139
+ ("user", "User Query: {question}")
140
  ])
 
141
  parser = StrOutputParser()
142
+ chain = prompt | model | parser
143
+ return chain.invoke({'question': question})
144
 
145
+ # ----- TEXT TO AUDIO -----
 
 
 
 
146
  async def convert_audio(text, filename):
147
  voice = "fr-FR-VivienneMultilingualNeural"
148
  communicate = edge_tts.Communicate(text, voice)
149
  await communicate.save(filename)