Spaces:

abocha
/

esl-dialogue-tts

Running

App Files Files Community

abocha commited on May 7

Commit

635a2fb

1 Parent(s): 186ebe6

debug

Browse files

Files changed (2) hide show

app.py +10 -32
event_handlers.py +33 -18

app.py CHANGED Viewed

@@ -2,37 +2,32 @@ import gradio as gr
 import os
 import asyncio
 from openai import AsyncOpenAI
-from functools import partial # For handle_script_processing
-# Import UI creation functions and constants
 from ui_layout import (
     create_main_input_components, create_speaker_config_components,
     create_action_and_output_components, create_examples_ui,
     TTS_MODELS_AVAILABLE, MODEL_DEFAULT_ENV
 )
-# Import event handler functions
 from event_handlers import (
     handle_script_processing, handle_calculate_cost,
     update_model_controls_visibility, update_speaker_config_method_visibility,
-    load_refresh_per_speaker_ui
 )
-# --- Application Secrets and Global Client ---
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 NSFW_API_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
 MODEL_DEFAULT_FROM_ENV = os.getenv("MODEL_DEFAULT", MODEL_DEFAULT_ENV)
-# Validate MODEL_DEFAULT_FROM_ENV or use hardcoded default
 EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_ENV if MODEL_DEFAULT_FROM_ENV in TTS_MODELS_AVAILABLE else MODEL_DEFAULT_ENV
 async_openai_client = None
 if not OPENAI_API_KEY:
     try:
-        # Attempt to load from Hugging Face Hub secrets if not in env
         from huggingface_hub import HfApi
         api = HfApi()
-        space_id = os.getenv("SPACE_ID") # Provided by HF Spaces
         if space_id:
             secrets = api.get_space_secrets(repo_id=space_id)
             OPENAI_API_KEY = secrets.get("OPENAI_API_KEY")
@@ -49,16 +44,13 @@ else:
     print("CRITICAL ERROR: OPENAI_API_KEY secret is not set. The application will not function properly.")
-# --- Gradio Application UI and Logic ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# Dialogue Script to Speech (OpenAI TTS) - Refactored")
     if not OPENAI_API_KEY or not async_openai_client:
         gr.Markdown("<h3 style='color:red;'>⚠️ Warning: OPENAI_API_KEY not set or invalid. Audio generation will fail. Please configure it in your Space settings.</h3>")
-    # Central state for detailed speaker configurations
-    speaker_configs_state = gr.State({}) # This is crucial for dynamic UI
-    # --- Define UI Components by calling layout functions ---
     (script_input, tts_model_dropdown, pause_input,
      global_speed_input, global_instructions_input) = create_main_input_components(EFFECTIVE_MODEL_DEFAULT)
@@ -70,72 +62,58 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
      individual_lines_zip_output, merged_dialogue_mp3_output,
      status_output) = create_action_and_output_components()
-    # --- Event Wiring ---
-    # When TTS model changes, update visibility of global speed/instructions & refresh dynamic UI
     tts_model_dropdown.change(
         fn=update_model_controls_visibility,
         inputs=[tts_model_dropdown, script_input, speaker_configs_state, speaker_configs_state],
         outputs=[global_speed_input, global_instructions_input, dynamic_speaker_ui_area, speaker_configs_state]
     )
-    # When speaker config method changes, update visibility of relevant UI groups
     speaker_config_method_dropdown.change(
         fn=update_speaker_config_method_visibility,
         inputs=[speaker_config_method_dropdown],
         outputs=[single_voice_group, detailed_per_speaker_ui_group]
     )
-    # Button to load/refresh the detailed per-speaker UI configurations
     load_per_speaker_ui_button.click(
-        fn=load_refresh_per_speaker_ui,
         inputs=[script_input, speaker_configs_state, tts_model_dropdown, speaker_configs_state],
         outputs=[dynamic_speaker_ui_area, speaker_configs_state]
     )
-    # Calculate cost button
     calculate_cost_button.click(
         fn=handle_calculate_cost,
         inputs=[script_input, tts_model_dropdown],
         outputs=[cost_output]
     )
-    # Generate audio button
-    # Use functools.partial to pass fixed arguments like API key and client to the handler
-    # Gradio inputs will be appended to these fixed arguments when the handler is called.
     generate_button_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
     generate_button.click(
         fn=generate_button_fn,
         inputs=[
             script_input, tts_model_dropdown, pause_input,
             speaker_config_method_dropdown, global_voice_dropdown,
-            speaker_configs_state, # The gr.State object itself
             global_speed_input, global_instructions_input
         ],
         outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
     )
-    # --- Examples UI ---
     example_inputs_list = [
         script_input, tts_model_dropdown, pause_input,
         speaker_config_method_dropdown, global_voice_dropdown,
         speaker_configs_state,
         global_speed_input, global_instructions_input
     ]
     example_outputs_list = [individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
-    # Make examples runnable
     example_process_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
     _ = create_examples_ui(
         inputs_for_examples=example_inputs_list,
-        process_fn=example_process_fn if OPENAI_API_KEY else None, # Only make runnable if API key exists
         outputs_for_examples=example_outputs_list if OPENAI_API_KEY else None
     )
-# --- Launch ---
 if __name__ == "__main__":
     if os.name == 'nt':
         asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())

 import os
 import asyncio
 from openai import AsyncOpenAI
+from functools import partial
 from ui_layout import (
     create_main_input_components, create_speaker_config_components,
     create_action_and_output_components, create_examples_ui,
     TTS_MODELS_AVAILABLE, MODEL_DEFAULT_ENV
 )
 from event_handlers import (
     handle_script_processing, handle_calculate_cost,
     update_model_controls_visibility, update_speaker_config_method_visibility,
+    handle_load_refresh_button_click # Import the new wrapper
+    # load_refresh_per_speaker_ui_core is now internal to event_handlers.py
 )
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 NSFW_API_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
 MODEL_DEFAULT_FROM_ENV = os.getenv("MODEL_DEFAULT", MODEL_DEFAULT_ENV)
 EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_ENV if MODEL_DEFAULT_FROM_ENV in TTS_MODELS_AVAILABLE else MODEL_DEFAULT_ENV
 async_openai_client = None
+# ... (Secrets loading logic remains the same) ...
 if not OPENAI_API_KEY:
     try:
         from huggingface_hub import HfApi
         api = HfApi()
+        space_id = os.getenv("SPACE_ID")
         if space_id:
             secrets = api.get_space_secrets(repo_id=space_id)
             OPENAI_API_KEY = secrets.get("OPENAI_API_KEY")
     print("CRITICAL ERROR: OPENAI_API_KEY secret is not set. The application will not function properly.")
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# Dialogue Script to Speech (OpenAI TTS) - Refactored")
     if not OPENAI_API_KEY or not async_openai_client:
         gr.Markdown("<h3 style='color:red;'>⚠️ Warning: OPENAI_API_KEY not set or invalid. Audio generation will fail. Please configure it in your Space settings.</h3>")
+    speaker_configs_state = gr.State({})
     (script_input, tts_model_dropdown, pause_input,
      global_speed_input, global_instructions_input) = create_main_input_components(EFFECTIVE_MODEL_DEFAULT)
      individual_lines_zip_output, merged_dialogue_mp3_output,
      status_output) = create_action_and_output_components()
     tts_model_dropdown.change(
         fn=update_model_controls_visibility,
         inputs=[tts_model_dropdown, script_input, speaker_configs_state, speaker_configs_state],
         outputs=[global_speed_input, global_instructions_input, dynamic_speaker_ui_area, speaker_configs_state]
     )
     speaker_config_method_dropdown.change(
         fn=update_speaker_config_method_visibility,
         inputs=[speaker_config_method_dropdown],
         outputs=[single_voice_group, detailed_per_speaker_ui_group]
     )
+    # MODIFIED: Button click now uses the new wrapper handle_load_refresh_button_click
     load_per_speaker_ui_button.click(
+        fn=handle_load_refresh_button_click, # Use the new wrapper
         inputs=[script_input, speaker_configs_state, tts_model_dropdown, speaker_configs_state],
         outputs=[dynamic_speaker_ui_area, speaker_configs_state]
     )
     calculate_cost_button.click(
         fn=handle_calculate_cost,
         inputs=[script_input, tts_model_dropdown],
         outputs=[cost_output]
     )
     generate_button_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
     generate_button.click(
         fn=generate_button_fn,
         inputs=[
             script_input, tts_model_dropdown, pause_input,
             speaker_config_method_dropdown, global_voice_dropdown,
+            speaker_configs_state,
             global_speed_input, global_instructions_input
         ],
         outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
     )
     example_inputs_list = [
         script_input, tts_model_dropdown, pause_input,
         speaker_config_method_dropdown, global_voice_dropdown,
         speaker_configs_state,
         global_speed_input, global_instructions_input
     ]
     example_outputs_list = [individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
     example_process_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
     _ = create_examples_ui(
         inputs_for_examples=example_inputs_list,
+        process_fn=example_process_fn if OPENAI_API_KEY else None,
         outputs_for_examples=example_outputs_list if OPENAI_API_KEY else None
     )
 if __name__ == "__main__":
     if os.name == 'nt':
         asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())

event_handlers.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import gradio as gr
 import os
 import asyncio
 import tempfile
@@ -43,16 +44,24 @@ def handle_dynamic_input_change(new_value, current_configs_state_dict: dict, spe
     return current_configs_state_dict
-def load_refresh_per_speaker_ui(script_text: str, current_configs_state_dict: dict, tts_model: str, speaker_configs_state_component: gr.State):
     # --- START OF PHASE 1 DEBUGGING (returns list of components directly) ---
-    print("DEBUG: load_refresh_per_speaker_ui CALLED - Phase 1: HARDCODED RETURN (direct list for Column)")
-    debug_markdown = gr.Markdown("## !! Dynamic Area Test Content Loaded (Direct list for Column) !!")
-    # Return the list of components and the state update
     return [debug_markdown], {}
     # --- END OF PHASE 1 DEBUGGING ---
     # --- ORIGINAL LOGIC (Commented out for Phase 1) ---
-    # ... (original logic would eventually return: new_ui_components, current_configs_state_dict)
     # --- END OF ORIGINAL LOGIC ---
@@ -66,7 +75,7 @@ async def handle_script_processing(
     global_instructions: str,
     progress=gr.Progress(track_tqdm=True)
 ):
-    # ... (content of this function remains unchanged from the previous correct version) ...
     if not openai_api_key or not async_openai_client:
         return None, None, "Error: OpenAI API Key or client is not configured."
     if not dialogue_script.strip():
@@ -202,23 +211,30 @@ def handle_calculate_cost(dialogue_script: str, tts_model: str):
     except Exception as e:
         return f"An unexpected error occurred during cost calculation: {str(e)}"
-def update_model_controls_visibility(selected_model: str, script_text_for_refresh: str, current_speaker_configs_for_refresh: dict, speaker_configs_state_comp: gr.State):
     """Updates visibility of global controls and refreshes per-speaker UI when TTS model changes."""
     print(f"Model changed to: {selected_model}. Refreshing dynamic UI and controls.")
     try:
-        # load_refresh_per_speaker_ui returns (list_of_components, updated_state_dict)
-        dynamic_ui_components_list, updated_state_dict = load_refresh_per_speaker_ui(
-            script_text_for_refresh, current_speaker_configs_for_refresh, selected_model, speaker_configs_state_comp
         )
-        # The list of components is passed directly for the Column output.
-        # Gradio should handle replacing children of 'dynamic_speaker_ui_area' (a gr.Column)
-        # with this new list of components.
     except Exception as e:
-        print(f"Error in load_refresh_per_speaker_ui called from model_controls_visibility: {e}")
         error_markdown = gr.Markdown(f"Error refreshing per-speaker UI: {e}")
-        dynamic_ui_components_list = [error_markdown] # Fallback to an error message list
-        updated_state_dict = current_speaker_configs_for_refresh
     is_tts1_family = selected_model in ["tts-1", "tts-1-hd"]
     is_gpt_mini_tts = selected_model == "gpt-4o-mini-tts"
@@ -226,11 +242,10 @@ def update_model_controls_visibility(selected_model: str, script_text_for_refres
     return (
         gr.update(visible=is_tts1_family, interactive=is_tts1_family),
         gr.update(visible=is_gpt_mini_tts, interactive=is_gpt_mini_tts),
-        dynamic_ui_components_list, # Pass the list of components directly
         updated_state_dict
     )
 def update_speaker_config_method_visibility(method: str):
     # ... (no change) ...
     is_single = (method == "Single Voice (Global)")

 import gradio as gr
+# ... (other imports remain the same) ...
 import os
 import asyncio
 import tempfile
     return current_configs_state_dict
+def load_refresh_per_speaker_ui_core(script_text: str, current_configs_state_dict: dict, tts_model: str, speaker_configs_state_component: gr.State):
+    """
+    Core logic for generating per-speaker UI components.
+    Returns: (list_of_components_for_column, updated_state_dict)
+    """
     # --- START OF PHASE 1 DEBUGGING (returns list of components directly) ---
+    print("DEBUG: load_refresh_per_speaker_ui_core CALLED - Phase 1: HARDCODED RETURN (direct list for Column)")
+    debug_markdown = gr.Markdown("## !! Dynamic Area Test Content (Button Click Path) !!")
     return [debug_markdown], {}
     # --- END OF PHASE 1 DEBUGGING ---
     # --- ORIGINAL LOGIC (Commented out for Phase 1) ---
+    # print(f"load_refresh_per_speaker_ui_core CALLED. TTS Model: {tts_model}")
+    # unique_speakers = get_speakers_from_script(script_text)
+    # new_ui_components = []
+    # # ... (rest of original logic from previous load_refresh_per_speaker_ui) ...
+    # # Make sure this original logic path would also return:
+    # # return new_ui_components, current_configs_state_dict
     # --- END OF ORIGINAL LOGIC ---
     global_instructions: str,
     progress=gr.Progress(track_tqdm=True)
 ):
+    # ... (content of this function remains unchanged) ...
     if not openai_api_key or not async_openai_client:
         return None, None, "Error: OpenAI API Key or client is not configured."
     if not dialogue_script.strip():
     except Exception as e:
         return f"An unexpected error occurred during cost calculation: {str(e)}"
+# Wrapper for the "Load/Refresh Per-Speaker UI Button" click
+def handle_load_refresh_button_click(script_text: str, current_configs_state_dict: dict, tts_model: str, speaker_configs_state_comp: gr.State):
+    components_list, new_state_dict = load_refresh_per_speaker_ui_core(
+        script_text, current_configs_state_dict, tts_model, speaker_configs_state_comp
+    )
+    # Return gr.update for the column, and the raw state dict for the gr.State component
+    return gr.update(children=components_list), new_state_dict
+def update_model_controls_visibility(selected_model: str, script_text_for_refresh: str, current_configs_state_dict: dict, speaker_configs_state_comp: gr.State):
     """Updates visibility of global controls and refreshes per-speaker UI when TTS model changes."""
     print(f"Model changed to: {selected_model}. Refreshing dynamic UI and controls.")
     try:
+        # load_refresh_per_speaker_ui_core returns (list_of_components, updated_state_dict)
+        dynamic_ui_components_list, updated_state_dict = load_refresh_per_speaker_ui_core(
+            script_text_for_refresh, current_configs_state_dict, selected_model, speaker_configs_state_comp
         )
+        # Wrap the list of components in gr.update(children=...) here
+        dynamic_ui_update_for_column = gr.update(children=dynamic_ui_components_list)
     except Exception as e:
+        print(f"Error in load_refresh_per_speaker_ui_core called from model_controls_visibility: {e}")
         error_markdown = gr.Markdown(f"Error refreshing per-speaker UI: {e}")
+        dynamic_ui_update_for_column = gr.update(children=[error_markdown])
+        updated_state_dict = current_configs_state_dict
     is_tts1_family = selected_model in ["tts-1", "tts-1-hd"]
     is_gpt_mini_tts = selected_model == "gpt-4o-mini-tts"
     return (
         gr.update(visible=is_tts1_family, interactive=is_tts1_family),
         gr.update(visible=is_gpt_mini_tts, interactive=is_gpt_mini_tts),
+        dynamic_ui_update_for_column,
         updated_state_dict
     )
 def update_speaker_config_method_visibility(method: str):
     # ... (no change) ...
     is_single = (method == "Single Voice (Global)")