abocha commited on
Commit
f0f7952
·
1 Parent(s): 05e4a98
Files changed (1) hide show
  1. event_handlers.py +96 -109
event_handlers.py CHANGED
@@ -36,8 +36,7 @@ def get_speakers_from_script(script_text: str):
36
 
37
  def handle_dynamic_input_change(new_value, current_configs_state_dict: dict, speaker_name: str, config_key: str, tts_model: str):
38
  """Handles changes from dynamically generated UI elements for per-speaker settings."""
39
- # print(f"Dynamic change for {speaker_name}, key {config_key}: {new_value}. State: {current_configs_state_dict}")
40
- if current_configs_state_dict is None: # Should ideally be initialized by Gradio's gr.State
41
  current_configs_state_dict = {}
42
  if speaker_name not in current_configs_state_dict:
43
  current_configs_state_dict[speaker_name] = {}
@@ -51,107 +50,102 @@ def load_refresh_per_speaker_ui(script_text: str, current_configs_state_dict: di
51
  Generates or refreshes the dynamic UI components (accordions) for each speaker.
52
  Returns a list of Gradio components to populate the dynamic UI area and the updated state.
53
  """
54
- # event_handlers.py - inside load_refresh_per_speaker_ui
55
- print("DEBUG: load_refresh_per_speaker_ui CALLED - HARDCODED RETURN")
56
- debug_markdown = gr.Markdown("## !! Dynamic Area Test Content Loaded !!")
57
  # Return this simple component and an empty dict for state for now
58
  return [debug_markdown], {}
59
- # Comment out ALL original logic in this function for this test.
60
-
61
- # print(f"Load/Refresh UI called. TTS Model: {tts_model}") # Debug
 
62
  # unique_speakers = get_speakers_from_script(script_text)
63
  # new_ui_components = []
64
 
65
  # if current_configs_state_dict is None:
66
- # current_configs_state_dict = {}
67
 
68
- # # Ensure a default voice for safety
69
  # safe_default_voice = APP_AVAILABLE_VOICES[0] if APP_AVAILABLE_VOICES else "alloy"
70
 
71
  # for speaker_name in unique_speakers:
72
- # if speaker_name not in current_configs_state_dict:
73
- # current_configs_state_dict[speaker_name] = {
74
- # "voice": safe_default_voice, "speed": 1.0,
75
- # "vibe": DEFAULT_VIBE, "custom_instructions": ""
76
- # }
77
- # # Ensure all keys exist with defaults
78
- # current_configs_state_dict[speaker_name].setdefault("voice", safe_default_voice)
79
- # current_configs_state_dict[speaker_name].setdefault("speed", 1.0)
80
- # current_configs_state_dict[speaker_name].setdefault("vibe", DEFAULT_VIBE)
81
- # current_configs_state_dict[speaker_name].setdefault("custom_instructions", "")
82
 
83
  # if not unique_speakers:
84
- # print("No unique speakers found, returning markdown.") # Debug
85
- # new_ui_components.append(gr.Markdown("No speakers detected in the script, or script is empty. Type a script and click 'Load/Refresh' again, or change the script content."))
86
- # return new_ui_components, current_configs_state_dict
87
 
88
- # print(f"Found speakers: {unique_speakers}. Building UI...") # Debug
89
  # for speaker_name in unique_speakers:
90
- # speaker_cfg = current_configs_state_dict[speaker_name]
91
-
92
- # speed_interactive = tts_model in ["tts-1", "tts-1-hd"]
93
- # instructions_relevant = tts_model == "gpt-4o-mini-tts"
94
-
95
- # # Use a unique elem_id for each accordion to help Gradio differentiate if needed
96
- # accordion_elem_id = f"accordion_speaker_{speaker_name.replace(' ', '_')}"
97
-
98
- # with gr.Accordion(label=f"Settings for: {speaker_name}", open=False, elem_id=accordion_elem_id) as speaker_accordion:
99
- # # Voice Dropdown
100
- # voice_dd = gr.Dropdown(
101
- # label="Voice", choices=APP_AVAILABLE_VOICES, value=speaker_cfg.get("voice", safe_default_voice), interactive=True
102
- # )
103
- # voice_dd.change(
104
- # fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="voice", tts_model=tts_model),
105
- # inputs=[voice_dd, speaker_configs_state_component],
106
- # outputs=[speaker_configs_state_component]
107
- # )
108
-
109
- # # Speed Slider
110
- # speed_slider_label = "Speech Speed" + (" (Active for tts-1/hd)" if speed_interactive else " (N/A for this model)")
111
- # speed_slider = gr.Slider(
112
- # label=speed_slider_label, minimum=0.25, maximum=4.0, value=float(speaker_cfg.get("speed", 1.0)),
113
- # step=0.05, interactive=speed_interactive
114
- # )
115
- # if speed_interactive:
116
- # speed_slider.release(
117
- # fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="speed", tts_model=tts_model),
118
- # inputs=[speed_slider, speaker_configs_state_component],
119
- # outputs=[speaker_configs_state_component]
120
- # )
121
 
122
- # # Vibe Dropdown
123
- # vibe_label = "Vibe/Emotion Preset" + (" (For gpt-4o-mini-tts)" if instructions_relevant else " (Less impact on other models)")
124
- # vibe_dd = gr.Dropdown(
125
- # label=vibe_label, choices=VIBE_CHOICES, value=speaker_cfg.get("vibe", DEFAULT_VIBE), interactive=True
126
- # )
127
- # vibe_dd.change(
128
- # fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="vibe", tts_model=tts_model),
129
- # inputs=[vibe_dd, speaker_configs_state_component],
130
- # outputs=[speaker_configs_state_component]
131
- # )
132
-
133
- # # Custom Instructions Textbox
134
- # custom_instr_label = "Custom Instructions"
135
- # custom_instr_placeholder = "Used if Vibe is 'Custom...'. Overrides Vibe preset."
136
- # custom_instr_tb = gr.Textbox(
137
- # label=custom_instr_label,
138
- # value=speaker_cfg.get("custom_instructions", ""),
139
- # placeholder=custom_instr_placeholder,
140
- # lines=2, interactive=True
141
- # )
142
- # custom_instr_tb.input(
143
- # fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="custom_instructions", tts_model=tts_model),
144
- # inputs=[custom_instr_tb, speaker_configs_state_component],
145
- # outputs=[speaker_configs_state_component]
146
- # )
147
- # new_ui_components.append(speaker_accordion)
148
 
149
- # print(f"Returning {len(new_ui_components)} UI components for dynamic area.") # Debug
150
  # return new_ui_components, current_configs_state_dict
 
151
 
152
 
153
  async def handle_script_processing(
154
- openai_api_key: str, async_openai_client, nsfw_api_url_template: str, # Passed from app.py
155
  dialogue_script: str, tts_model: str, pause_ms: int,
156
  speaker_config_method: str, global_voice_selection: str,
157
  speaker_configs_state_dict: dict,
@@ -179,14 +173,13 @@ async def handle_script_processing(
179
 
180
  if speaker_configs_state_dict is None: speaker_configs_state_dict = {}
181
 
182
- # Ensure a default voice for safety
183
  safe_default_global_voice = global_voice_selection if global_voice_selection in APP_AVAILABLE_VOICES else DEFAULT_FALLBACK_VOICE
184
 
185
  speaker_voice_map = {}
186
  if speaker_config_method in ["Random per Speaker", "A/B Round Robin"]:
187
  unique_script_speakers_for_map = get_speakers_from_script(dialogue_script)
188
  temp_voices_pool = APP_AVAILABLE_VOICES.copy()
189
- if not temp_voices_pool: temp_voices_pool = [DEFAULT_FALLBACK_VOICE] # Ensure pool isn't empty
190
 
191
  if speaker_config_method == "Random per Speaker":
192
  for spk_name in unique_script_speakers_for_map:
@@ -196,7 +189,6 @@ async def handle_script_processing(
196
  speaker_voice_map[spk_name] = temp_voices_pool[i % len(temp_voices_pool)]
197
 
198
  tasks = []
199
- # line_audio_files map to store results by original line ID for correct ordering
200
  line_audio_files_map = {}
201
 
202
  for i, line_data in enumerate(parsed_lines):
@@ -240,7 +232,7 @@ async def handle_script_processing(
240
  results = await asyncio.gather(*tasks, return_exceptions=True)
241
 
242
  for idx, res_path_or_exc in enumerate(results):
243
- original_line_id = parsed_lines[idx]['id'] # Get original ID from the parsed line
244
  if isinstance(res_path_or_exc, Exception):
245
  print(f"Error synthesizing line ID {original_line_id} ({parsed_lines[idx]['speaker']}): {res_path_or_exc}")
246
  line_audio_files_map[original_line_id] = None
@@ -250,14 +242,13 @@ async def handle_script_processing(
250
  else:
251
  line_audio_files_map[original_line_id] = res_path_or_exc
252
 
253
- # Reconstruct ordered list of files for merging, using original line IDs
254
  ordered_files_for_merge_and_zip = []
255
  for p_line in parsed_lines:
256
  file_path = line_audio_files_map.get(p_line['id'])
257
  if file_path and os.path.exists(file_path) and os.path.getsize(file_path) > 0:
258
  ordered_files_for_merge_and_zip.append(file_path)
259
  else:
260
- ordered_files_for_merge_and_zip.append(None) # Keep placeholder for failed lines for merge logic
261
 
262
  valid_files_for_zip = [f for f in ordered_files_for_merge_and_zip if f]
263
 
@@ -271,7 +262,6 @@ async def handle_script_processing(
271
  zf.write(f_path, os.path.basename(f_path))
272
 
273
  merged_fn = os.path.join(job_audio_path_prefix, "merged_dialogue.mp3")
274
- # For merge_mp3_files, pass only the list of existing files in order
275
  files_to_actually_merge = [f for f in ordered_files_for_merge_and_zip if f]
276
  merged_path = merge_mp3_files(files_to_actually_merge, merged_fn, pause_ms)
277
 
@@ -300,31 +290,26 @@ def handle_calculate_cost(dialogue_script: str, tts_model: str):
300
 
301
  def update_model_controls_visibility(selected_model: str, script_text_for_refresh: str, current_speaker_configs_for_refresh: dict, speaker_configs_state_comp: gr.State):
302
  """Updates visibility of global controls and refreshes per-speaker UI when TTS model changes."""
303
- print(f"Model changed to: {selected_model}. Refreshing dynamic UI and controls.") # Debug
304
  try:
305
- # load_refresh_per_speaker_ui might return components or markdown
306
- # It now takes speaker_configs_state_comp as an argument to wire up .change() correctly
307
  dynamic_ui_output, updated_state = load_refresh_per_speaker_ui(
308
  script_text_for_refresh, current_speaker_configs_for_refresh, selected_model, speaker_configs_state_comp
309
  )
310
  except Exception as e:
311
  print(f"Error in load_refresh_per_speaker_ui called from model_controls_visibility: {e}")
312
- # Fallback: clear dynamic UI and keep state as is, or return an error message component
313
  dynamic_ui_output = [gr.Markdown(f"Error refreshing per-speaker UI: {e}")]
314
- updated_state = current_speaker_configs_for_refresh # or {} to reset
315
 
316
  is_tts1_family = selected_model in ["tts-1", "tts-1-hd"]
317
  is_gpt_mini_tts = selected_model == "gpt-4o-mini-tts"
318
 
319
- # The keys in this dictionary must match the Gradio components passed in the `outputs` list
320
- # of the .change() event.
321
- updates = {
322
- "global_speed_input": gr.update(visible=is_tts1_family, interactive=is_tts1_family),
323
- "global_instructions_input": gr.update(visible=is_gpt_mini_tts, interactive=is_gpt_mini_tts),
324
- "dynamic_speaker_ui_area": dynamic_ui_output, # This directly provides the new children for the Column
325
- "speaker_configs_state": updated_state
326
- }
327
- return updates["global_speed_input"], updates["global_instructions_input"], updates["dynamic_speaker_ui_area"], updates["speaker_configs_state"]
328
 
329
 
330
  def update_speaker_config_method_visibility(method: str):
@@ -332,8 +317,10 @@ def update_speaker_config_method_visibility(method: str):
332
  is_single = (method == "Single Voice (Global)")
333
  is_detailed_per_speaker = (method == "Detailed Configuration (Per Speaker UI)")
334
 
335
- # Keys here must match the Gradio components in the .change() event's `outputs` list.
336
- return {
337
- "single_voice_group": gr.update(visible=is_single),
338
- "detailed_per_speaker_ui_group": gr.update(visible=is_detailed_per_speaker),
339
- }
 
 
 
36
 
37
  def handle_dynamic_input_change(new_value, current_configs_state_dict: dict, speaker_name: str, config_key: str, tts_model: str):
38
  """Handles changes from dynamically generated UI elements for per-speaker settings."""
39
+ if current_configs_state_dict is None:
 
40
  current_configs_state_dict = {}
41
  if speaker_name not in current_configs_state_dict:
42
  current_configs_state_dict[speaker_name] = {}
 
50
  Generates or refreshes the dynamic UI components (accordions) for each speaker.
51
  Returns a list of Gradio components to populate the dynamic UI area and the updated state.
52
  """
53
+ # --- START OF PHASE 1 DEBUGGING ---
54
+ print("DEBUG: load_refresh_per_speaker_ui CALLED - Phase 1: HARDCODED RETURN")
55
+ debug_markdown = gr.Markdown("## !! Dynamic Area Test Content Loaded via Load/Refresh Button !!")
56
  # Return this simple component and an empty dict for state for now
57
  return [debug_markdown], {}
58
+ # --- END OF PHASE 1 DEBUGGING ---
59
+
60
+ # --- ORIGINAL LOGIC (Commented out for Phase 1) ---
61
+ # print(f"Load/Refresh UI called. TTS Model: {tts_model}")
62
  # unique_speakers = get_speakers_from_script(script_text)
63
  # new_ui_components = []
64
 
65
  # if current_configs_state_dict is None:
66
+ # current_configs_state_dict = {}
67
 
 
68
  # safe_default_voice = APP_AVAILABLE_VOICES[0] if APP_AVAILABLE_VOICES else "alloy"
69
 
70
  # for speaker_name in unique_speakers:
71
+ # if speaker_name not in current_configs_state_dict:
72
+ # current_configs_state_dict[speaker_name] = {
73
+ # "voice": safe_default_voice, "speed": 1.0,
74
+ # "vibe": DEFAULT_VIBE, "custom_instructions": ""
75
+ # }
76
+ # current_configs_state_dict[speaker_name].setdefault("voice", safe_default_voice)
77
+ # current_configs_state_dict[speaker_name].setdefault("speed", 1.0)
78
+ # current_configs_state_dict[speaker_name].setdefault("vibe", DEFAULT_VIBE)
79
+ # current_configs_state_dict[speaker_name].setdefault("custom_instructions", "")
 
80
 
81
  # if not unique_speakers:
82
+ # print("No unique speakers found, returning markdown.")
83
+ # new_ui_components.append(gr.Markdown("No speakers detected in the script, or script is empty. Type a script and click 'Load/Refresh' again, or change the script content."))
84
+ # return new_ui_components, current_configs_state_dict
85
 
86
+ # print(f"Found speakers: {unique_speakers}. Building UI...")
87
  # for speaker_name in unique_speakers:
88
+ # speaker_cfg = current_configs_state_dict[speaker_name]
89
+
90
+ # speed_interactive = tts_model in ["tts-1", "tts-1-hd"]
91
+ # instructions_relevant = tts_model == "gpt-4o-mini-tts"
92
+
93
+ # accordion_elem_id = f"accordion_speaker_{speaker_name.replace(' ', '_')}"
94
+
95
+ # with gr.Accordion(label=f"Settings for: {speaker_name}", open=False, elem_id=accordion_elem_id) as speaker_accordion:
96
+ # voice_dd = gr.Dropdown(
97
+ # label="Voice", choices=APP_AVAILABLE_VOICES, value=speaker_cfg.get("voice", safe_default_voice), interactive=True
98
+ # )
99
+ # voice_dd.change(
100
+ # fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="voice", tts_model=tts_model),
101
+ # inputs=[voice_dd, speaker_configs_state_component],
102
+ # outputs=[speaker_configs_state_component]
103
+ # )
104
+
105
+ # speed_slider_label = "Speech Speed" + (" (Active for tts-1/hd)" if speed_interactive else " (N/A for this model)")
106
+ # speed_slider = gr.Slider(
107
+ # label=speed_slider_label, minimum=0.25, maximum=4.0, value=float(speaker_cfg.get("speed", 1.0)),
108
+ # step=0.05, interactive=speed_interactive
109
+ # )
110
+ # if speed_interactive:
111
+ # speed_slider.release(
112
+ # fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="speed", tts_model=tts_model),
113
+ # inputs=[speed_slider, speaker_configs_state_component],
114
+ # outputs=[speaker_configs_state_component]
115
+ # )
 
 
 
116
 
117
+ # vibe_label = "Vibe/Emotion Preset" + (" (For gpt-4o-mini-tts)" if instructions_relevant else " (Less impact on other models)")
118
+ # vibe_dd = gr.Dropdown(
119
+ # label=vibe_label, choices=VIBE_CHOICES, value=speaker_cfg.get("vibe", DEFAULT_VIBE), interactive=True
120
+ # )
121
+ # vibe_dd.change(
122
+ # fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="vibe", tts_model=tts_model),
123
+ # inputs=[vibe_dd, speaker_configs_state_component],
124
+ # outputs=[speaker_configs_state_component]
125
+ # )
126
+
127
+ # custom_instr_label = "Custom Instructions"
128
+ # custom_instr_placeholder = "Used if Vibe is 'Custom...'. Overrides Vibe preset."
129
+ # custom_instr_tb = gr.Textbox(
130
+ # label=custom_instr_label,
131
+ # value=speaker_cfg.get("custom_instructions", ""),
132
+ # placeholder=custom_instr_placeholder,
133
+ # lines=2, interactive=True
134
+ # )
135
+ # custom_instr_tb.input(
136
+ # fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="custom_instructions", tts_model=tts_model),
137
+ # inputs=[custom_instr_tb, speaker_configs_state_component],
138
+ # outputs=[speaker_configs_state_component]
139
+ # )
140
+ # new_ui_components.append(speaker_accordion)
 
 
141
 
142
+ # print(f"Returning {len(new_ui_components)} UI components for dynamic area.")
143
  # return new_ui_components, current_configs_state_dict
144
+ # --- END OF ORIGINAL LOGIC ---
145
 
146
 
147
  async def handle_script_processing(
148
+ openai_api_key: str, async_openai_client, nsfw_api_url_template: str,
149
  dialogue_script: str, tts_model: str, pause_ms: int,
150
  speaker_config_method: str, global_voice_selection: str,
151
  speaker_configs_state_dict: dict,
 
173
 
174
  if speaker_configs_state_dict is None: speaker_configs_state_dict = {}
175
 
 
176
  safe_default_global_voice = global_voice_selection if global_voice_selection in APP_AVAILABLE_VOICES else DEFAULT_FALLBACK_VOICE
177
 
178
  speaker_voice_map = {}
179
  if speaker_config_method in ["Random per Speaker", "A/B Round Robin"]:
180
  unique_script_speakers_for_map = get_speakers_from_script(dialogue_script)
181
  temp_voices_pool = APP_AVAILABLE_VOICES.copy()
182
+ if not temp_voices_pool: temp_voices_pool = [DEFAULT_FALLBACK_VOICE]
183
 
184
  if speaker_config_method == "Random per Speaker":
185
  for spk_name in unique_script_speakers_for_map:
 
189
  speaker_voice_map[spk_name] = temp_voices_pool[i % len(temp_voices_pool)]
190
 
191
  tasks = []
 
192
  line_audio_files_map = {}
193
 
194
  for i, line_data in enumerate(parsed_lines):
 
232
  results = await asyncio.gather(*tasks, return_exceptions=True)
233
 
234
  for idx, res_path_or_exc in enumerate(results):
235
+ original_line_id = parsed_lines[idx]['id']
236
  if isinstance(res_path_or_exc, Exception):
237
  print(f"Error synthesizing line ID {original_line_id} ({parsed_lines[idx]['speaker']}): {res_path_or_exc}")
238
  line_audio_files_map[original_line_id] = None
 
242
  else:
243
  line_audio_files_map[original_line_id] = res_path_or_exc
244
 
 
245
  ordered_files_for_merge_and_zip = []
246
  for p_line in parsed_lines:
247
  file_path = line_audio_files_map.get(p_line['id'])
248
  if file_path and os.path.exists(file_path) and os.path.getsize(file_path) > 0:
249
  ordered_files_for_merge_and_zip.append(file_path)
250
  else:
251
+ ordered_files_for_merge_and_zip.append(None)
252
 
253
  valid_files_for_zip = [f for f in ordered_files_for_merge_and_zip if f]
254
 
 
262
  zf.write(f_path, os.path.basename(f_path))
263
 
264
  merged_fn = os.path.join(job_audio_path_prefix, "merged_dialogue.mp3")
 
265
  files_to_actually_merge = [f for f in ordered_files_for_merge_and_zip if f]
266
  merged_path = merge_mp3_files(files_to_actually_merge, merged_fn, pause_ms)
267
 
 
290
 
291
  def update_model_controls_visibility(selected_model: str, script_text_for_refresh: str, current_speaker_configs_for_refresh: dict, speaker_configs_state_comp: gr.State):
292
  """Updates visibility of global controls and refreshes per-speaker UI when TTS model changes."""
293
+ print(f"Model changed to: {selected_model}. Refreshing dynamic UI and controls.")
294
  try:
 
 
295
  dynamic_ui_output, updated_state = load_refresh_per_speaker_ui(
296
  script_text_for_refresh, current_speaker_configs_for_refresh, selected_model, speaker_configs_state_comp
297
  )
298
  except Exception as e:
299
  print(f"Error in load_refresh_per_speaker_ui called from model_controls_visibility: {e}")
 
300
  dynamic_ui_output = [gr.Markdown(f"Error refreshing per-speaker UI: {e}")]
301
+ updated_state = current_speaker_configs_for_refresh
302
 
303
  is_tts1_family = selected_model in ["tts-1", "tts-1-hd"]
304
  is_gpt_mini_tts = selected_model == "gpt-4o-mini-tts"
305
 
306
+ # Return a TUPLE of updates, matching the order of components in 'outputs' list of the .change() event
307
+ return (
308
+ gr.update(visible=is_tts1_family, interactive=is_tts1_family), # For global_speed_input
309
+ gr.update(visible=is_gpt_mini_tts, interactive=is_gpt_mini_tts), # For global_instructions_input
310
+ dynamic_ui_output, # For dynamic_speaker_ui_area
311
+ updated_state # For speaker_configs_state
312
+ )
 
 
313
 
314
 
315
  def update_speaker_config_method_visibility(method: str):
 
317
  is_single = (method == "Single Voice (Global)")
318
  is_detailed_per_speaker = (method == "Detailed Configuration (Per Speaker UI)")
319
 
320
+ # Return a TUPLE of gr.update objects, in the order expected by the outputs list
321
+ # of the speaker_config_method_dropdown.change() event in app.py
322
+ # The order in app.py is: outputs=[single_voice_group, detailed_per_speaker_ui_group]
323
+ return (
324
+ gr.update(visible=is_single), # For single_voice_group
325
+ gr.update(visible=is_detailed_per_speaker) # For detailed_per_speaker_ui_group
326
+ )