Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -91,21 +91,6 @@ def extract_user_input(transcription_response):
|
|
91 |
print(f"[ERROR] KeyError in transcription response: {e}")
|
92 |
return ""
|
93 |
|
94 |
-
#def format_generated_response(response):
|
95 |
-
# print("[DEBUG] Formatting the generated response.")
|
96 |
-
# if response is None:
|
97 |
-
# print("[ERROR] No response to format.")
|
98 |
-
# return "Error: No valid response received."
|
99 |
-
# try:
|
100 |
-
# generated_text = response['choices'][0]['message']['content']
|
101 |
-
# partial_text = re.sub(r'<.*?>', '', generated_text)
|
102 |
-
# cleaned_text = re.sub(r'#.*?\n', '', partial_text)
|
103 |
-
# print(f"[DEBUG] Formatted response: {cleaned_text.strip()}")
|
104 |
-
# return cleaned_text.strip()
|
105 |
-
# except (KeyError, IndexError) as e:
|
106 |
-
# print(f"[ERROR] Error formatting response: {e}")
|
107 |
-
# return f"Error: Missing key or index {e} in response."
|
108 |
-
|
109 |
def generate_speech(text):
|
110 |
print("[DEBUG] Generating speech from text.")
|
111 |
tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
@@ -121,7 +106,7 @@ def generate_speech(text):
|
|
121 |
print(f"[ERROR] Error generating TTS: {e}")
|
122 |
return None
|
123 |
|
124 |
-
def chatbot_conversation(audio_file_path):
|
125 |
print("[DEBUG] Starting chatbot conversation.")
|
126 |
try:
|
127 |
transcription = transcript(audio_file_path)
|
@@ -129,23 +114,27 @@ def chatbot_conversation(audio_file_path):
|
|
129 |
|
130 |
if not user_input:
|
131 |
print("[ERROR] No user input extracted from transcription.")
|
132 |
-
yield "I could not generate the text. Please try again.", None
|
133 |
return
|
134 |
|
|
|
135 |
system_message = system_instruction
|
136 |
-
history = [] # If history is meant to persist, consider storing it externally
|
137 |
-
messages = []
|
138 |
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
-
#
|
147 |
messages.append({"role": "user", "content": user_input})
|
148 |
-
messages.insert(0, {"role": "system", "content": system_message})
|
149 |
|
150 |
print("[DEBUG] Sending request to sync_client for chat completion.")
|
151 |
print(f"[DEBUG] Messages: {messages}")
|
@@ -163,47 +152,49 @@ def chatbot_conversation(audio_file_path):
|
|
163 |
):
|
164 |
token = message.choices[0].delta.content
|
165 |
response += token
|
166 |
-
# Yield partial text, no audio yet
|
167 |
-
|
168 |
-
# second output is audio, which we pass as None for now
|
169 |
-
yield (response, None)
|
170 |
except Exception as e:
|
171 |
print(f"[ERROR] Error during streaming response: {e}")
|
172 |
-
yield ("I could not understand you. Please try again.", None)
|
173 |
return
|
174 |
|
175 |
-
# Now that we have the full response,
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
print("[ERROR] Failed to generate TTS.")
|
189 |
-
yield (response, None)
|
190 |
else:
|
191 |
-
print("[ERROR]
|
192 |
-
yield (
|
193 |
|
194 |
except Exception as e:
|
195 |
print(f"[ERROR] Exception in chatbot_conversation: {e}")
|
196 |
-
yield ("I could not understand you. Please try again.", None)
|
197 |
|
198 |
-
|
|
|
199 |
fn=chatbot_conversation,
|
200 |
-
inputs=
|
|
|
|
|
|
|
201 |
outputs=[
|
202 |
gr.Textbox(label="Transcription"),
|
203 |
-
gr.Audio(type="filepath", autoplay=True, label="MAGIC Chat")
|
|
|
204 |
],
|
205 |
title="MAGIC VoiceChat",
|
206 |
description="A simple example of audio conversational AI",
|
207 |
theme="sudeepshouche/minimalist",
|
208 |
-
|
209 |
-
|
|
|
|
91 |
print(f"[ERROR] KeyError in transcription response: {e}")
|
92 |
return ""
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
def generate_speech(text):
|
95 |
print("[DEBUG] Generating speech from text.")
|
96 |
tts_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
|
|
106 |
print(f"[ERROR] Error generating TTS: {e}")
|
107 |
return None
|
108 |
|
109 |
+
def chatbot_conversation(audio_file_path, history):
|
110 |
print("[DEBUG] Starting chatbot conversation.")
|
111 |
try:
|
112 |
transcription = transcript(audio_file_path)
|
|
|
114 |
|
115 |
if not user_input:
|
116 |
print("[ERROR] No user input extracted from transcription.")
|
117 |
+
yield "I could not generate the text. Please try again.", None, history
|
118 |
return
|
119 |
|
120 |
+
# Ensure we have a system_message
|
121 |
system_message = system_instruction
|
|
|
|
|
122 |
|
123 |
+
if history is None:
|
124 |
+
history = []
|
125 |
+
|
126 |
+
# Reconstruct messages from history
|
127 |
+
messages = [{"role": "system", "content": system_message}]
|
128 |
+
for turn in history:
|
129 |
+
user_msg = turn[0].get("content") if turn[0] else ""
|
130 |
+
assistant_msg = turn[1].get("content") if turn[1] else ""
|
131 |
+
if user_msg:
|
132 |
+
messages.append({"role": "user", "content": user_msg})
|
133 |
+
if assistant_msg:
|
134 |
+
messages.append({"role": "assistant", "content": assistant_msg})
|
135 |
|
136 |
+
# Add the current user input
|
137 |
messages.append({"role": "user", "content": user_input})
|
|
|
138 |
|
139 |
print("[DEBUG] Sending request to sync_client for chat completion.")
|
140 |
print(f"[DEBUG] Messages: {messages}")
|
|
|
152 |
):
|
153 |
token = message.choices[0].delta.content
|
154 |
response += token
|
155 |
+
# Yield partial text updates, no audio yet, history unchanged yet
|
156 |
+
yield (response, None, history)
|
|
|
|
|
157 |
except Exception as e:
|
158 |
print(f"[ERROR] Error during streaming response: {e}")
|
159 |
+
yield ("I could not understand you. Please try again.", None, history)
|
160 |
return
|
161 |
|
162 |
+
# Now that we have the full response, update history
|
163 |
+
history.append([
|
164 |
+
{"role": "user", "content": user_input},
|
165 |
+
{"role": "assistant", "content": response}
|
166 |
+
])
|
167 |
+
|
168 |
+
# Generate TTS now
|
169 |
+
print("[DEBUG] Generating TTS for full response.")
|
170 |
+
tts_file_name = generate_speech(response)
|
171 |
+
if tts_file_name:
|
172 |
+
print("[DEBUG] Returning final response and TTS file with updated history.")
|
173 |
+
# Now yield again with final text, audio, and updated history
|
174 |
+
yield (response, tts_file_name, history)
|
|
|
|
|
175 |
else:
|
176 |
+
print("[ERROR] Failed to generate TTS.")
|
177 |
+
yield (response, None, history)
|
178 |
|
179 |
except Exception as e:
|
180 |
print(f"[ERROR] Exception in chatbot_conversation: {e}")
|
181 |
+
yield ("I could not understand you. Please try again.", None, history)
|
182 |
|
183 |
+
# We now have three outputs: transcription text, audio, and the updated history
|
184 |
+
interface = gr.Interface(
|
185 |
fn=chatbot_conversation,
|
186 |
+
inputs=[
|
187 |
+
gr.Audio(label="User", type="filepath", streaming=False, container=True),
|
188 |
+
gr.State([]) # State holds the conversation history
|
189 |
+
],
|
190 |
outputs=[
|
191 |
gr.Textbox(label="Transcription"),
|
192 |
+
gr.Audio(type="filepath", autoplay=True, label="MAGIC Chat"),
|
193 |
+
gr.State([]) # Return updated history
|
194 |
],
|
195 |
title="MAGIC VoiceChat",
|
196 |
description="A simple example of audio conversational AI",
|
197 |
theme="sudeepshouche/minimalist",
|
198 |
+
)
|
199 |
+
|
200 |
+
interface.queue().launch()
|