Spaces:

abocha
/

esl-dialogue-tts

Running

App Files Files Community

abocha commited on May 7

Commit

f0f7952

1 Parent(s): 05e4a98

debug

Browse files

Files changed (1) hide show

event_handlers.py +96 -109

event_handlers.py CHANGED Viewed

@@ -36,8 +36,7 @@ def get_speakers_from_script(script_text: str):
 def handle_dynamic_input_change(new_value, current_configs_state_dict: dict, speaker_name: str, config_key: str, tts_model: str):
     """Handles changes from dynamically generated UI elements for per-speaker settings."""
-    # print(f"Dynamic change for {speaker_name}, key {config_key}: {new_value}. State: {current_configs_state_dict}")
-    if current_configs_state_dict is None: # Should ideally be initialized by Gradio's gr.State
         current_configs_state_dict = {}
     if speaker_name not in current_configs_state_dict:
         current_configs_state_dict[speaker_name] = {}
@@ -51,107 +50,102 @@ def load_refresh_per_speaker_ui(script_text: str, current_configs_state_dict: di
     Generates or refreshes the dynamic UI components (accordions) for each speaker.
     Returns a list of Gradio components to populate the dynamic UI area and the updated state.
     """
-    # event_handlers.py - inside load_refresh_per_speaker_ui
-    print("DEBUG: load_refresh_per_speaker_ui CALLED - HARDCODED RETURN")
-    debug_markdown = gr.Markdown("## !! Dynamic Area Test Content Loaded !!")
     # Return this simple component and an empty dict for state for now
     return [debug_markdown], {}
-    # Comment out ALL original logic in this function for this test.
-    # print(f"Load/Refresh UI called. TTS Model: {tts_model}") # Debug
     # unique_speakers = get_speakers_from_script(script_text)
     # new_ui_components = []
     # if current_configs_state_dict is None:
-        # current_configs_state_dict = {}
-    # # Ensure a default voice for safety
     # safe_default_voice = APP_AVAILABLE_VOICES[0] if APP_AVAILABLE_VOICES else "alloy"
     # for speaker_name in unique_speakers:
-        # if speaker_name not in current_configs_state_dict:
-            # current_configs_state_dict[speaker_name] = {
-                # "voice": safe_default_voice, "speed": 1.0,
-                # "vibe": DEFAULT_VIBE, "custom_instructions": ""
-            # }
-        # # Ensure all keys exist with defaults
-        # current_configs_state_dict[speaker_name].setdefault("voice", safe_default_voice)
-        # current_configs_state_dict[speaker_name].setdefault("speed", 1.0)
-        # current_configs_state_dict[speaker_name].setdefault("vibe", DEFAULT_VIBE)
-        # current_configs_state_dict[speaker_name].setdefault("custom_instructions", "")
     # if not unique_speakers:
-        # print("No unique speakers found, returning markdown.") # Debug
-        # new_ui_components.append(gr.Markdown("No speakers detected in the script, or script is empty. Type a script and click 'Load/Refresh' again, or change the script content."))
-        # return new_ui_components, current_configs_state_dict
-    # print(f"Found speakers: {unique_speakers}. Building UI...") # Debug
     # for speaker_name in unique_speakers:
-        # speaker_cfg = current_configs_state_dict[speaker_name]
-        # speed_interactive = tts_model in ["tts-1", "tts-1-hd"]
-        # instructions_relevant = tts_model == "gpt-4o-mini-tts"
-        # # Use a unique elem_id for each accordion to help Gradio differentiate if needed
-        # accordion_elem_id = f"accordion_speaker_{speaker_name.replace(' ', '_')}"
-        # with gr.Accordion(label=f"Settings for: {speaker_name}", open=False, elem_id=accordion_elem_id) as speaker_accordion:
-            # # Voice Dropdown
-            # voice_dd = gr.Dropdown(
-                # label="Voice", choices=APP_AVAILABLE_VOICES, value=speaker_cfg.get("voice", safe_default_voice), interactive=True
-            # )
-            # voice_dd.change(
-                # fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="voice", tts_model=tts_model),
-                # inputs=[voice_dd, speaker_configs_state_component],
-                # outputs=[speaker_configs_state_component]
-            # )
-            # # Speed Slider
-            # speed_slider_label = "Speech Speed" + (" (Active for tts-1/hd)" if speed_interactive else " (N/A for this model)")
-            # speed_slider = gr.Slider(
-                # label=speed_slider_label, minimum=0.25, maximum=4.0, value=float(speaker_cfg.get("speed", 1.0)),
-                # step=0.05, interactive=speed_interactive
-            # )
-            # if speed_interactive:
-                # speed_slider.release(
-                    # fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="speed", tts_model=tts_model),
-                    # inputs=[speed_slider, speaker_configs_state_component],
-                    # outputs=[speaker_configs_state_component]
-                # )
-            # # Vibe Dropdown
-            # vibe_label = "Vibe/Emotion Preset" + (" (For gpt-4o-mini-tts)" if instructions_relevant else " (Less impact on other models)")
-            # vibe_dd = gr.Dropdown(
-                # label=vibe_label, choices=VIBE_CHOICES, value=speaker_cfg.get("vibe", DEFAULT_VIBE), interactive=True
-            # )
-            # vibe_dd.change(
-                # fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="vibe", tts_model=tts_model),
-                # inputs=[vibe_dd, speaker_configs_state_component],
-                # outputs=[speaker_configs_state_component]
-            # )
-            # # Custom Instructions Textbox
-            # custom_instr_label = "Custom Instructions"
-            # custom_instr_placeholder = "Used if Vibe is 'Custom...'. Overrides Vibe preset."
-            # custom_instr_tb = gr.Textbox(
-                # label=custom_instr_label,
-                # value=speaker_cfg.get("custom_instructions", ""),
-                # placeholder=custom_instr_placeholder,
-                # lines=2, interactive=True
-            # )
-            # custom_instr_tb.input(
-                # fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="custom_instructions", tts_model=tts_model),
-                # inputs=[custom_instr_tb, speaker_configs_state_component],
-                # outputs=[speaker_configs_state_component]
-            # )
-        # new_ui_components.append(speaker_accordion)
-    # print(f"Returning {len(new_ui_components)} UI components for dynamic area.") # Debug
     # return new_ui_components, current_configs_state_dict
 async def handle_script_processing(
-    openai_api_key: str, async_openai_client, nsfw_api_url_template: str, # Passed from app.py
     dialogue_script: str, tts_model: str, pause_ms: int,
     speaker_config_method: str, global_voice_selection: str,
     speaker_configs_state_dict: dict,
@@ -179,14 +173,13 @@ async def handle_script_processing(
     if speaker_configs_state_dict is None: speaker_configs_state_dict = {}
-    # Ensure a default voice for safety
     safe_default_global_voice = global_voice_selection if global_voice_selection in APP_AVAILABLE_VOICES else DEFAULT_FALLBACK_VOICE
     speaker_voice_map = {}
     if speaker_config_method in ["Random per Speaker", "A/B Round Robin"]:
         unique_script_speakers_for_map = get_speakers_from_script(dialogue_script)
         temp_voices_pool = APP_AVAILABLE_VOICES.copy()
-        if not temp_voices_pool: temp_voices_pool = [DEFAULT_FALLBACK_VOICE] # Ensure pool isn't empty
         if speaker_config_method == "Random per Speaker":
             for spk_name in unique_script_speakers_for_map:
@@ -196,7 +189,6 @@ async def handle_script_processing(
                 speaker_voice_map[spk_name] = temp_voices_pool[i % len(temp_voices_pool)]
     tasks = []
-    # line_audio_files map to store results by original line ID for correct ordering
     line_audio_files_map = {}
     for i, line_data in enumerate(parsed_lines):
@@ -240,7 +232,7 @@ async def handle_script_processing(
     results = await asyncio.gather(*tasks, return_exceptions=True)
     for idx, res_path_or_exc in enumerate(results):
-        original_line_id = parsed_lines[idx]['id'] # Get original ID from the parsed line
         if isinstance(res_path_or_exc, Exception):
             print(f"Error synthesizing line ID {original_line_id} ({parsed_lines[idx]['speaker']}): {res_path_or_exc}")
             line_audio_files_map[original_line_id] = None
@@ -250,14 +242,13 @@ async def handle_script_processing(
         else:
             line_audio_files_map[original_line_id] = res_path_or_exc
-    # Reconstruct ordered list of files for merging, using original line IDs
     ordered_files_for_merge_and_zip = []
     for p_line in parsed_lines:
         file_path = line_audio_files_map.get(p_line['id'])
         if file_path and os.path.exists(file_path) and os.path.getsize(file_path) > 0:
             ordered_files_for_merge_and_zip.append(file_path)
         else:
-            ordered_files_for_merge_and_zip.append(None) # Keep placeholder for failed lines for merge logic
     valid_files_for_zip = [f for f in ordered_files_for_merge_and_zip if f]
@@ -271,7 +262,6 @@ async def handle_script_processing(
             zf.write(f_path, os.path.basename(f_path))
     merged_fn = os.path.join(job_audio_path_prefix, "merged_dialogue.mp3")
-    # For merge_mp3_files, pass only the list of existing files in order
     files_to_actually_merge = [f for f in ordered_files_for_merge_and_zip if f]
     merged_path = merge_mp3_files(files_to_actually_merge, merged_fn, pause_ms)
@@ -300,31 +290,26 @@ def handle_calculate_cost(dialogue_script: str, tts_model: str):
 def update_model_controls_visibility(selected_model: str, script_text_for_refresh: str, current_speaker_configs_for_refresh: dict, speaker_configs_state_comp: gr.State):
     """Updates visibility of global controls and refreshes per-speaker UI when TTS model changes."""
-    print(f"Model changed to: {selected_model}. Refreshing dynamic UI and controls.") # Debug
     try:
-        # load_refresh_per_speaker_ui might return components or markdown
-        # It now takes speaker_configs_state_comp as an argument to wire up .change() correctly
         dynamic_ui_output, updated_state = load_refresh_per_speaker_ui(
             script_text_for_refresh, current_speaker_configs_for_refresh, selected_model, speaker_configs_state_comp
         )
     except Exception as e:
         print(f"Error in load_refresh_per_speaker_ui called from model_controls_visibility: {e}")
-        # Fallback: clear dynamic UI and keep state as is, or return an error message component
         dynamic_ui_output = [gr.Markdown(f"Error refreshing per-speaker UI: {e}")]
-        updated_state = current_speaker_configs_for_refresh # or {} to reset
     is_tts1_family = selected_model in ["tts-1", "tts-1-hd"]
     is_gpt_mini_tts = selected_model == "gpt-4o-mini-tts"
-    # The keys in this dictionary must match the Gradio components passed in the `outputs` list
-    # of the .change() event.
-    updates = {
-        "global_speed_input": gr.update(visible=is_tts1_family, interactive=is_tts1_family),
-        "global_instructions_input": gr.update(visible=is_gpt_mini_tts, interactive=is_gpt_mini_tts),
-        "dynamic_speaker_ui_area": dynamic_ui_output, # This directly provides the new children for the Column
-        "speaker_configs_state": updated_state
-    }
-    return updates["global_speed_input"], updates["global_instructions_input"], updates["dynamic_speaker_ui_area"], updates["speaker_configs_state"]
 def update_speaker_config_method_visibility(method: str):
@@ -332,8 +317,10 @@ def update_speaker_config_method_visibility(method: str):
     is_single = (method == "Single Voice (Global)")
     is_detailed_per_speaker = (method == "Detailed Configuration (Per Speaker UI)")
-    # Keys here must match the Gradio components in the .change() event's `outputs` list.
-    return {
-        "single_voice_group": gr.update(visible=is_single),
-        "detailed_per_speaker_ui_group": gr.update(visible=is_detailed_per_speaker),
-    }

 def handle_dynamic_input_change(new_value, current_configs_state_dict: dict, speaker_name: str, config_key: str, tts_model: str):
     """Handles changes from dynamically generated UI elements for per-speaker settings."""
+    if current_configs_state_dict is None:
         current_configs_state_dict = {}
     if speaker_name not in current_configs_state_dict:
         current_configs_state_dict[speaker_name] = {}
     Generates or refreshes the dynamic UI components (accordions) for each speaker.
     Returns a list of Gradio components to populate the dynamic UI area and the updated state.
     """
+    # --- START OF PHASE 1 DEBUGGING ---
+    print("DEBUG: load_refresh_per_speaker_ui CALLED - Phase 1: HARDCODED RETURN")
+    debug_markdown = gr.Markdown("## !! Dynamic Area Test Content Loaded via Load/Refresh Button !!")
     # Return this simple component and an empty dict for state for now
     return [debug_markdown], {}
+    # --- END OF PHASE 1 DEBUGGING ---
+    # --- ORIGINAL LOGIC (Commented out for Phase 1) ---
+    # print(f"Load/Refresh UI called. TTS Model: {tts_model}")
     # unique_speakers = get_speakers_from_script(script_text)
     # new_ui_components = []
     # if current_configs_state_dict is None:
+    #     current_configs_state_dict = {}
     # safe_default_voice = APP_AVAILABLE_VOICES[0] if APP_AVAILABLE_VOICES else "alloy"
     # for speaker_name in unique_speakers:
+    #     if speaker_name not in current_configs_state_dict:
+    #         current_configs_state_dict[speaker_name] = {
+    #             "voice": safe_default_voice, "speed": 1.0,
+    #             "vibe": DEFAULT_VIBE, "custom_instructions": ""
+    #         }
+    #     current_configs_state_dict[speaker_name].setdefault("voice", safe_default_voice)
+    #     current_configs_state_dict[speaker_name].setdefault("speed", 1.0)
+    #     current_configs_state_dict[speaker_name].setdefault("vibe", DEFAULT_VIBE)
+    #     current_configs_state_dict[speaker_name].setdefault("custom_instructions", "")
     # if not unique_speakers:
+    #     print("No unique speakers found, returning markdown.")
+    #     new_ui_components.append(gr.Markdown("No speakers detected in the script, or script is empty. Type a script and click 'Load/Refresh' again, or change the script content."))
+    #     return new_ui_components, current_configs_state_dict
+    # print(f"Found speakers: {unique_speakers}. Building UI...")
     # for speaker_name in unique_speakers:
+    #     speaker_cfg = current_configs_state_dict[speaker_name]
+    #     speed_interactive = tts_model in ["tts-1", "tts-1-hd"]
+    #     instructions_relevant = tts_model == "gpt-4o-mini-tts"
+    #     accordion_elem_id = f"accordion_speaker_{speaker_name.replace(' ', '_')}"
+    #     with gr.Accordion(label=f"Settings for: {speaker_name}", open=False, elem_id=accordion_elem_id) as speaker_accordion:
+    #         voice_dd = gr.Dropdown(
+    #             label="Voice", choices=APP_AVAILABLE_VOICES, value=speaker_cfg.get("voice", safe_default_voice), interactive=True
+    #         )
+    #         voice_dd.change(
+    #             fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="voice", tts_model=tts_model),
+    #             inputs=[voice_dd, speaker_configs_state_component],
+    #             outputs=[speaker_configs_state_component]
+    #         )
+    #         speed_slider_label = "Speech Speed" + (" (Active for tts-1/hd)" if speed_interactive else " (N/A for this model)")
+    #         speed_slider = gr.Slider(
+    #             label=speed_slider_label, minimum=0.25, maximum=4.0, value=float(speaker_cfg.get("speed", 1.0)),
+    #             step=0.05, interactive=speed_interactive
+    #         )
+    #         if speed_interactive:
+    #             speed_slider.release(
+    #                 fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="speed", tts_model=tts_model),
+    #                 inputs=[speed_slider, speaker_configs_state_component],
+    #                 outputs=[speaker_configs_state_component]
+    #             )
+    #         vibe_label = "Vibe/Emotion Preset" + (" (For gpt-4o-mini-tts)" if instructions_relevant else " (Less impact on other models)")
+    #         vibe_dd = gr.Dropdown(
+    #             label=vibe_label, choices=VIBE_CHOICES, value=speaker_cfg.get("vibe", DEFAULT_VIBE), interactive=True
+    #         )
+    #         vibe_dd.change(
+    #             fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="vibe", tts_model=tts_model),
+    #             inputs=[vibe_dd, speaker_configs_state_component],
+    #             outputs=[speaker_configs_state_component]
+    #         )
+    #         custom_instr_label = "Custom Instructions"
+    #         custom_instr_placeholder = "Used if Vibe is 'Custom...'. Overrides Vibe preset."
+    #         custom_instr_tb = gr.Textbox(
+    #             label=custom_instr_label,
+    #             value=speaker_cfg.get("custom_instructions", ""),
+    #             placeholder=custom_instr_placeholder,
+    #             lines=2, interactive=True
+    #         )
+    #         custom_instr_tb.input(
+    #             fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="custom_instructions", tts_model=tts_model),
+    #             inputs=[custom_instr_tb, speaker_configs_state_component],
+    #             outputs=[speaker_configs_state_component]
+    #         )
+    #     new_ui_components.append(speaker_accordion)
+    # print(f"Returning {len(new_ui_components)} UI components for dynamic area.")
     # return new_ui_components, current_configs_state_dict
+    # --- END OF ORIGINAL LOGIC ---
 async def handle_script_processing(
+    openai_api_key: str, async_openai_client, nsfw_api_url_template: str,
     dialogue_script: str, tts_model: str, pause_ms: int,
     speaker_config_method: str, global_voice_selection: str,
     speaker_configs_state_dict: dict,
     if speaker_configs_state_dict is None: speaker_configs_state_dict = {}
     safe_default_global_voice = global_voice_selection if global_voice_selection in APP_AVAILABLE_VOICES else DEFAULT_FALLBACK_VOICE
     speaker_voice_map = {}
     if speaker_config_method in ["Random per Speaker", "A/B Round Robin"]:
         unique_script_speakers_for_map = get_speakers_from_script(dialogue_script)
         temp_voices_pool = APP_AVAILABLE_VOICES.copy()
+        if not temp_voices_pool: temp_voices_pool = [DEFAULT_FALLBACK_VOICE]
         if speaker_config_method == "Random per Speaker":
             for spk_name in unique_script_speakers_for_map:
                 speaker_voice_map[spk_name] = temp_voices_pool[i % len(temp_voices_pool)]
     tasks = []
     line_audio_files_map = {}
     for i, line_data in enumerate(parsed_lines):
     results = await asyncio.gather(*tasks, return_exceptions=True)
     for idx, res_path_or_exc in enumerate(results):
+        original_line_id = parsed_lines[idx]['id']
         if isinstance(res_path_or_exc, Exception):
             print(f"Error synthesizing line ID {original_line_id} ({parsed_lines[idx]['speaker']}): {res_path_or_exc}")
             line_audio_files_map[original_line_id] = None
         else:
             line_audio_files_map[original_line_id] = res_path_or_exc
     ordered_files_for_merge_and_zip = []
     for p_line in parsed_lines:
         file_path = line_audio_files_map.get(p_line['id'])
         if file_path and os.path.exists(file_path) and os.path.getsize(file_path) > 0:
             ordered_files_for_merge_and_zip.append(file_path)
         else:
+            ordered_files_for_merge_and_zip.append(None)
     valid_files_for_zip = [f for f in ordered_files_for_merge_and_zip if f]
             zf.write(f_path, os.path.basename(f_path))
     merged_fn = os.path.join(job_audio_path_prefix, "merged_dialogue.mp3")
     files_to_actually_merge = [f for f in ordered_files_for_merge_and_zip if f]
     merged_path = merge_mp3_files(files_to_actually_merge, merged_fn, pause_ms)
 def update_model_controls_visibility(selected_model: str, script_text_for_refresh: str, current_speaker_configs_for_refresh: dict, speaker_configs_state_comp: gr.State):
     """Updates visibility of global controls and refreshes per-speaker UI when TTS model changes."""
+    print(f"Model changed to: {selected_model}. Refreshing dynamic UI and controls.")
     try:
         dynamic_ui_output, updated_state = load_refresh_per_speaker_ui(
             script_text_for_refresh, current_speaker_configs_for_refresh, selected_model, speaker_configs_state_comp
         )
     except Exception as e:
         print(f"Error in load_refresh_per_speaker_ui called from model_controls_visibility: {e}")
         dynamic_ui_output = [gr.Markdown(f"Error refreshing per-speaker UI: {e}")]
+        updated_state = current_speaker_configs_for_refresh
     is_tts1_family = selected_model in ["tts-1", "tts-1-hd"]
     is_gpt_mini_tts = selected_model == "gpt-4o-mini-tts"
+    # Return a TUPLE of updates, matching the order of components in 'outputs' list of the .change() event
+    return (
+        gr.update(visible=is_tts1_family, interactive=is_tts1_family), # For global_speed_input
+        gr.update(visible=is_gpt_mini_tts, interactive=is_gpt_mini_tts), # For global_instructions_input
+        dynamic_ui_output, # For dynamic_speaker_ui_area
+        updated_state      # For speaker_configs_state
+    )
 def update_speaker_config_method_visibility(method: str):
     is_single = (method == "Single Voice (Global)")
     is_detailed_per_speaker = (method == "Detailed Configuration (Per Speaker UI)")
+    # Return a TUPLE of gr.update objects, in the order expected by the outputs list
+    # of the speaker_config_method_dropdown.change() event in app.py
+    # The order in app.py is: outputs=[single_voice_group, detailed_per_speaker_ui_group]
+    return (
+        gr.update(visible=is_single),                   # For single_voice_group
+        gr.update(visible=is_detailed_per_speaker)      # For detailed_per_speaker_ui_group
+    )