rodrigomasini commited on
Commit
d3a9b11
·
verified ·
1 Parent(s): 84837eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -55
app.py CHANGED
@@ -91,21 +91,6 @@ def extract_user_input(transcription_response):
91
  print(f"[ERROR] KeyError in transcription response: {e}")
92
  return ""
93
 
94
- #def format_generated_response(response):
95
- # print("[DEBUG] Formatting the generated response.")
96
- # if response is None:
97
- # print("[ERROR] No response to format.")
98
- # return "Error: No valid response received."
99
- # try:
100
- # generated_text = response['choices'][0]['message']['content']
101
- # partial_text = re.sub(r'<.*?>', '', generated_text)
102
- # cleaned_text = re.sub(r'#.*?\n', '', partial_text)
103
- # print(f"[DEBUG] Formatted response: {cleaned_text.strip()}")
104
- # return cleaned_text.strip()
105
- # except (KeyError, IndexError) as e:
106
- # print(f"[ERROR] Error formatting response: {e}")
107
- # return f"Error: Missing key or index {e} in response."
108
-
109
  def generate_speech(text):
110
  print("[DEBUG] Generating speech from text.")
111
  tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
@@ -121,7 +106,7 @@ def generate_speech(text):
121
  print(f"[ERROR] Error generating TTS: {e}")
122
  return None
123
 
124
- def chatbot_conversation(audio_file_path):
125
  print("[DEBUG] Starting chatbot conversation.")
126
  try:
127
  transcription = transcript(audio_file_path)
@@ -129,23 +114,27 @@ def chatbot_conversation(audio_file_path):
129
 
130
  if not user_input:
131
  print("[ERROR] No user input extracted from transcription.")
132
- yield "I could not generate the text. Please try again.", None
133
  return
134
 
 
135
  system_message = system_instruction
136
- history = [] # If history is meant to persist, consider storing it externally
137
- messages = []
138
 
139
- # Reconstruct history if needed (currently empty)
140
- for val in history:
141
- if val[0]:
142
- messages.append({"role": "user", "content": val[0]})
143
- if val[1]:
144
- messages.append({"role": "assistant", "content": val[1]})
 
 
 
 
 
 
145
 
146
- # Current user input
147
  messages.append({"role": "user", "content": user_input})
148
- messages.insert(0, {"role": "system", "content": system_message})
149
 
150
  print("[DEBUG] Sending request to sync_client for chat completion.")
151
  print(f"[DEBUG] Messages: {messages}")
@@ -163,47 +152,49 @@ def chatbot_conversation(audio_file_path):
163
  ):
164
  token = message.choices[0].delta.content
165
  response += token
166
- # Yield partial text, no audio yet
167
- # The first output is the transcription (assistant message),
168
- # second output is audio, which we pass as None for now
169
- yield (response, None)
170
  except Exception as e:
171
  print(f"[ERROR] Error during streaming response: {e}")
172
- yield ("I could not understand you. Please try again.", None)
173
  return
174
 
175
- # Now that we have the full response, generate TTS
176
- if response:
177
- history.append([
178
- {"role": "user", "content": user_input},
179
- {"role": "assistant", "content": response}
180
- ])
181
- print("[DEBUG] Generating TTS for full response.")
182
- tts_file_name = generate_speech(response)
183
- if tts_file_name:
184
- print("[DEBUG] Returning final response and TTS file.")
185
- # Now yield again with final text and audio
186
- yield (response, tts_file_name)
187
- else:
188
- print("[ERROR] Failed to generate TTS.")
189
- yield (response, None)
190
  else:
191
- print("[ERROR] No response generated.")
192
- yield ("I could not synthesize the audio. Please try again.", None)
193
 
194
  except Exception as e:
195
  print(f"[ERROR] Exception in chatbot_conversation: {e}")
196
- yield ("I could not understand you. Please try again.", None)
197
 
198
- gr.Interface(
 
199
  fn=chatbot_conversation,
200
- inputs=gr.Audio(label="User", type="filepath", streaming=False, container=True),
 
 
 
201
  outputs=[
202
  gr.Textbox(label="Transcription"),
203
- gr.Audio(type="filepath", autoplay=True, label="MAGIC Chat")
 
204
  ],
205
  title="MAGIC VoiceChat",
206
  description="A simple example of audio conversational AI",
207
  theme="sudeepshouche/minimalist",
208
- live=True
209
- ).launch()
 
 
91
  print(f"[ERROR] KeyError in transcription response: {e}")
92
  return ""
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  def generate_speech(text):
95
  print("[DEBUG] Generating speech from text.")
96
  tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
 
106
  print(f"[ERROR] Error generating TTS: {e}")
107
  return None
108
 
109
+ def chatbot_conversation(audio_file_path, history):
110
  print("[DEBUG] Starting chatbot conversation.")
111
  try:
112
  transcription = transcript(audio_file_path)
 
114
 
115
  if not user_input:
116
  print("[ERROR] No user input extracted from transcription.")
117
+ yield "I could not generate the text. Please try again.", None, history
118
  return
119
 
120
+ # Ensure we have a system_message
121
  system_message = system_instruction
 
 
122
 
123
+ if history is None:
124
+ history = []
125
+
126
+ # Reconstruct messages from history
127
+ messages = [{"role": "system", "content": system_message}]
128
+ for turn in history:
129
+ user_msg = turn[0].get("content") if turn[0] else ""
130
+ assistant_msg = turn[1].get("content") if turn[1] else ""
131
+ if user_msg:
132
+ messages.append({"role": "user", "content": user_msg})
133
+ if assistant_msg:
134
+ messages.append({"role": "assistant", "content": assistant_msg})
135
 
136
+ # Add the current user input
137
  messages.append({"role": "user", "content": user_input})
 
138
 
139
  print("[DEBUG] Sending request to sync_client for chat completion.")
140
  print(f"[DEBUG] Messages: {messages}")
 
152
  ):
153
  token = message.choices[0].delta.content
154
  response += token
155
+ # Yield partial text updates, no audio yet, history unchanged yet
156
+ yield (response, None, history)
 
 
157
  except Exception as e:
158
  print(f"[ERROR] Error during streaming response: {e}")
159
+ yield ("I could not understand you. Please try again.", None, history)
160
  return
161
 
162
+ # Now that we have the full response, update history
163
+ history.append([
164
+ {"role": "user", "content": user_input},
165
+ {"role": "assistant", "content": response}
166
+ ])
167
+
168
+ # Generate TTS now
169
+ print("[DEBUG] Generating TTS for full response.")
170
+ tts_file_name = generate_speech(response)
171
+ if tts_file_name:
172
+ print("[DEBUG] Returning final response and TTS file with updated history.")
173
+ # Now yield again with final text, audio, and updated history
174
+ yield (response, tts_file_name, history)
 
 
175
  else:
176
+ print("[ERROR] Failed to generate TTS.")
177
+ yield (response, None, history)
178
 
179
  except Exception as e:
180
  print(f"[ERROR] Exception in chatbot_conversation: {e}")
181
+ yield ("I could not understand you. Please try again.", None, history)
182
 
183
+ # We now have three outputs: transcription text, audio, and the updated history
184
+ interface = gr.Interface(
185
  fn=chatbot_conversation,
186
+ inputs=[
187
+ gr.Audio(label="User", type="filepath", streaming=False, container=True),
188
+ gr.State([]) # State holds the conversation history
189
+ ],
190
  outputs=[
191
  gr.Textbox(label="Transcription"),
192
+ gr.Audio(type="filepath", autoplay=True, label="MAGIC Chat"),
193
+ gr.State([]) # Return updated history
194
  ],
195
  title="MAGIC VoiceChat",
196
  description="A simple example of audio conversational AI",
197
  theme="sudeepshouche/minimalist",
198
+ )
199
+
200
+ interface.queue().launch()