audio-to-text

Runtime error

App Files Files Community

rodrigomasini commited on Dec 12, 2024

Commit

7cdad84

verified ·

1 Parent(s): 202a516

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -19

app.py CHANGED Viewed

@@ -129,21 +129,21 @@ def chatbot_conversation(audio_file_path):
         if not user_input:
             print("[ERROR] No user input extracted from transcription.")
-            return "I could not generate the text. Please try again.", None
-        # Ensure we have a system_message and history variables
         system_message = system_instruction
         history = []  # If history is meant to persist, consider storing it externally
         messages = []
-        # If you had previous conversation history, you could reconstruct messages here.
         for val in history:
             if val[0]:
                 messages.append({"role": "user", "content": val[0]})
             if val[1]:
                 messages.append({"role": "assistant", "content": val[1]})
-        # Include the current user input
         messages.append({"role": "user", "content": user_input})
         messages.insert(0, {"role": "system", "content": system_message})
@@ -151,7 +151,7 @@ def chatbot_conversation(audio_file_path):
         print(f"[DEBUG] Messages: {messages}")
         response = ""
-        # Streaming response from the API
         try:
             for message in sync_client.chat.completions.create(
                 model="tela-gpt4o",
@@ -163,38 +163,37 @@ def chatbot_conversation(audio_file_path):
             ):
                 token = message.choices[0].delta.content
                 response += token
-                # Optional: print tokens as they arrive for debugging
-                print(f"[DEBUG] Partial response token received: {token}")
-                yield response
         except Exception as e:
             print(f"[ERROR] Error during streaming response: {e}")
-            return "I could not understand you. Please try again.", None
-        #formatted_output = format_generated_response(
-        #    {"choices": [{"message": {"content": response}}]}
-        #)
         if response:
-            # Append the conversation turn to history
             history.append([
                 {"role": "user", "content": user_input},
                 {"role": "assistant", "content": response}
             ])
-            print("[DEBUG] Generating TTS for response.")
             tts_file_name = generate_speech(response)
             if tts_file_name:
                 print("[DEBUG] Returning final response and TTS file.")
-                return formatted_output, tts_file_name
             else:
                 print("[ERROR] Failed to generate TTS.")
-                return formatted_output, None
         else:
             print("[ERROR] No response generated.")
-            return "I could not synthesize the audio. Please try again.", None
     except Exception as e:
         print(f"[ERROR] Exception in chatbot_conversation: {e}")
-        return "I could not understand you. Please try again.", None
 gr.Interface(
     fn=chatbot_conversation,

         if not user_input:
             print("[ERROR] No user input extracted from transcription.")
+            yield "I could not generate the text. Please try again.", None
+            return
         system_message = system_instruction
         history = []  # If history is meant to persist, consider storing it externally
         messages = []
+        # Reconstruct history if needed (currently empty)
         for val in history:
             if val[0]:
                 messages.append({"role": "user", "content": val[0]})
             if val[1]:
                 messages.append({"role": "assistant", "content": val[1]})
+        # Current user input
         messages.append({"role": "user", "content": user_input})
         messages.insert(0, {"role": "system", "content": system_message})
         print(f"[DEBUG] Messages: {messages}")
         response = ""
+        # Stream partial responses
         try:
             for message in sync_client.chat.completions.create(
                 model="tela-gpt4o",
             ):
                 token = message.choices[0].delta.content
                 response += token
+                # Yield partial text, no audio yet
+                # The first output is the transcription (assistant message),
+                # second output is audio, which we pass as None for now
+                yield (response, None)
         except Exception as e:
             print(f"[ERROR] Error during streaming response: {e}")
+            yield ("I could not understand you. Please try again.", None)
+            return
+        # Now that we have the full response, generate TTS
         if response:
             history.append([
                 {"role": "user", "content": user_input},
                 {"role": "assistant", "content": response}
             ])
+            print("[DEBUG] Generating TTS for full response.")
             tts_file_name = generate_speech(response)
             if tts_file_name:
                 print("[DEBUG] Returning final response and TTS file.")
+                # Now yield again with final text and audio
+                yield (response, tts_file_name)
             else:
                 print("[ERROR] Failed to generate TTS.")
+                yield (response, None)
         else:
             print("[ERROR] No response generated.")
+            yield ("I could not synthesize the audio. Please try again.", None)
     except Exception as e:
         print(f"[ERROR] Exception in chatbot_conversation: {e}")
+        yield ("I could not understand you. Please try again.", None)
 gr.Interface(
     fn=chatbot_conversation,