Spaces:
Running
Running
debug
Browse files- event_handlers.py +96 -109
event_handlers.py
CHANGED
@@ -36,8 +36,7 @@ def get_speakers_from_script(script_text: str):
|
|
36 |
|
37 |
def handle_dynamic_input_change(new_value, current_configs_state_dict: dict, speaker_name: str, config_key: str, tts_model: str):
|
38 |
"""Handles changes from dynamically generated UI elements for per-speaker settings."""
|
39 |
-
|
40 |
-
if current_configs_state_dict is None: # Should ideally be initialized by Gradio's gr.State
|
41 |
current_configs_state_dict = {}
|
42 |
if speaker_name not in current_configs_state_dict:
|
43 |
current_configs_state_dict[speaker_name] = {}
|
@@ -51,107 +50,102 @@ def load_refresh_per_speaker_ui(script_text: str, current_configs_state_dict: di
|
|
51 |
Generates or refreshes the dynamic UI components (accordions) for each speaker.
|
52 |
Returns a list of Gradio components to populate the dynamic UI area and the updated state.
|
53 |
"""
|
54 |
-
#
|
55 |
-
print("DEBUG: load_refresh_per_speaker_ui CALLED - HARDCODED RETURN")
|
56 |
-
debug_markdown = gr.Markdown("## !! Dynamic Area Test Content Loaded !!")
|
57 |
# Return this simple component and an empty dict for state for now
|
58 |
return [debug_markdown], {}
|
59 |
-
#
|
60 |
-
|
61 |
-
#
|
|
|
62 |
# unique_speakers = get_speakers_from_script(script_text)
|
63 |
# new_ui_components = []
|
64 |
|
65 |
# if current_configs_state_dict is None:
|
66 |
-
|
67 |
|
68 |
-
# # Ensure a default voice for safety
|
69 |
# safe_default_voice = APP_AVAILABLE_VOICES[0] if APP_AVAILABLE_VOICES else "alloy"
|
70 |
|
71 |
# for speaker_name in unique_speakers:
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
# current_configs_state_dict[speaker_name].setdefault("custom_instructions", "")
|
82 |
|
83 |
# if not unique_speakers:
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
|
88 |
-
# print(f"Found speakers: {unique_speakers}. Building UI...")
|
89 |
# for speaker_name in unique_speakers:
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
# inputs=[speed_slider, speaker_configs_state_component],
|
119 |
-
# outputs=[speaker_configs_state_component]
|
120 |
-
# )
|
121 |
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
# )
|
147 |
-
# new_ui_components.append(speaker_accordion)
|
148 |
|
149 |
-
# print(f"Returning {len(new_ui_components)} UI components for dynamic area.")
|
150 |
# return new_ui_components, current_configs_state_dict
|
|
|
151 |
|
152 |
|
153 |
async def handle_script_processing(
|
154 |
-
openai_api_key: str, async_openai_client, nsfw_api_url_template: str,
|
155 |
dialogue_script: str, tts_model: str, pause_ms: int,
|
156 |
speaker_config_method: str, global_voice_selection: str,
|
157 |
speaker_configs_state_dict: dict,
|
@@ -179,14 +173,13 @@ async def handle_script_processing(
|
|
179 |
|
180 |
if speaker_configs_state_dict is None: speaker_configs_state_dict = {}
|
181 |
|
182 |
-
# Ensure a default voice for safety
|
183 |
safe_default_global_voice = global_voice_selection if global_voice_selection in APP_AVAILABLE_VOICES else DEFAULT_FALLBACK_VOICE
|
184 |
|
185 |
speaker_voice_map = {}
|
186 |
if speaker_config_method in ["Random per Speaker", "A/B Round Robin"]:
|
187 |
unique_script_speakers_for_map = get_speakers_from_script(dialogue_script)
|
188 |
temp_voices_pool = APP_AVAILABLE_VOICES.copy()
|
189 |
-
if not temp_voices_pool: temp_voices_pool = [DEFAULT_FALLBACK_VOICE]
|
190 |
|
191 |
if speaker_config_method == "Random per Speaker":
|
192 |
for spk_name in unique_script_speakers_for_map:
|
@@ -196,7 +189,6 @@ async def handle_script_processing(
|
|
196 |
speaker_voice_map[spk_name] = temp_voices_pool[i % len(temp_voices_pool)]
|
197 |
|
198 |
tasks = []
|
199 |
-
# line_audio_files map to store results by original line ID for correct ordering
|
200 |
line_audio_files_map = {}
|
201 |
|
202 |
for i, line_data in enumerate(parsed_lines):
|
@@ -240,7 +232,7 @@ async def handle_script_processing(
|
|
240 |
results = await asyncio.gather(*tasks, return_exceptions=True)
|
241 |
|
242 |
for idx, res_path_or_exc in enumerate(results):
|
243 |
-
original_line_id = parsed_lines[idx]['id']
|
244 |
if isinstance(res_path_or_exc, Exception):
|
245 |
print(f"Error synthesizing line ID {original_line_id} ({parsed_lines[idx]['speaker']}): {res_path_or_exc}")
|
246 |
line_audio_files_map[original_line_id] = None
|
@@ -250,14 +242,13 @@ async def handle_script_processing(
|
|
250 |
else:
|
251 |
line_audio_files_map[original_line_id] = res_path_or_exc
|
252 |
|
253 |
-
# Reconstruct ordered list of files for merging, using original line IDs
|
254 |
ordered_files_for_merge_and_zip = []
|
255 |
for p_line in parsed_lines:
|
256 |
file_path = line_audio_files_map.get(p_line['id'])
|
257 |
if file_path and os.path.exists(file_path) and os.path.getsize(file_path) > 0:
|
258 |
ordered_files_for_merge_and_zip.append(file_path)
|
259 |
else:
|
260 |
-
ordered_files_for_merge_and_zip.append(None)
|
261 |
|
262 |
valid_files_for_zip = [f for f in ordered_files_for_merge_and_zip if f]
|
263 |
|
@@ -271,7 +262,6 @@ async def handle_script_processing(
|
|
271 |
zf.write(f_path, os.path.basename(f_path))
|
272 |
|
273 |
merged_fn = os.path.join(job_audio_path_prefix, "merged_dialogue.mp3")
|
274 |
-
# For merge_mp3_files, pass only the list of existing files in order
|
275 |
files_to_actually_merge = [f for f in ordered_files_for_merge_and_zip if f]
|
276 |
merged_path = merge_mp3_files(files_to_actually_merge, merged_fn, pause_ms)
|
277 |
|
@@ -300,31 +290,26 @@ def handle_calculate_cost(dialogue_script: str, tts_model: str):
|
|
300 |
|
301 |
def update_model_controls_visibility(selected_model: str, script_text_for_refresh: str, current_speaker_configs_for_refresh: dict, speaker_configs_state_comp: gr.State):
|
302 |
"""Updates visibility of global controls and refreshes per-speaker UI when TTS model changes."""
|
303 |
-
print(f"Model changed to: {selected_model}. Refreshing dynamic UI and controls.")
|
304 |
try:
|
305 |
-
# load_refresh_per_speaker_ui might return components or markdown
|
306 |
-
# It now takes speaker_configs_state_comp as an argument to wire up .change() correctly
|
307 |
dynamic_ui_output, updated_state = load_refresh_per_speaker_ui(
|
308 |
script_text_for_refresh, current_speaker_configs_for_refresh, selected_model, speaker_configs_state_comp
|
309 |
)
|
310 |
except Exception as e:
|
311 |
print(f"Error in load_refresh_per_speaker_ui called from model_controls_visibility: {e}")
|
312 |
-
# Fallback: clear dynamic UI and keep state as is, or return an error message component
|
313 |
dynamic_ui_output = [gr.Markdown(f"Error refreshing per-speaker UI: {e}")]
|
314 |
-
updated_state = current_speaker_configs_for_refresh
|
315 |
|
316 |
is_tts1_family = selected_model in ["tts-1", "tts-1-hd"]
|
317 |
is_gpt_mini_tts = selected_model == "gpt-4o-mini-tts"
|
318 |
|
319 |
-
#
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
}
|
327 |
-
return updates["global_speed_input"], updates["global_instructions_input"], updates["dynamic_speaker_ui_area"], updates["speaker_configs_state"]
|
328 |
|
329 |
|
330 |
def update_speaker_config_method_visibility(method: str):
|
@@ -332,8 +317,10 @@ def update_speaker_config_method_visibility(method: str):
|
|
332 |
is_single = (method == "Single Voice (Global)")
|
333 |
is_detailed_per_speaker = (method == "Detailed Configuration (Per Speaker UI)")
|
334 |
|
335 |
-
#
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
|
|
|
|
|
36 |
|
37 |
def handle_dynamic_input_change(new_value, current_configs_state_dict: dict, speaker_name: str, config_key: str, tts_model: str):
|
38 |
"""Handles changes from dynamically generated UI elements for per-speaker settings."""
|
39 |
+
if current_configs_state_dict is None:
|
|
|
40 |
current_configs_state_dict = {}
|
41 |
if speaker_name not in current_configs_state_dict:
|
42 |
current_configs_state_dict[speaker_name] = {}
|
|
|
50 |
Generates or refreshes the dynamic UI components (accordions) for each speaker.
|
51 |
Returns a list of Gradio components to populate the dynamic UI area and the updated state.
|
52 |
"""
|
53 |
+
# --- START OF PHASE 1 DEBUGGING ---
|
54 |
+
print("DEBUG: load_refresh_per_speaker_ui CALLED - Phase 1: HARDCODED RETURN")
|
55 |
+
debug_markdown = gr.Markdown("## !! Dynamic Area Test Content Loaded via Load/Refresh Button !!")
|
56 |
# Return this simple component and an empty dict for state for now
|
57 |
return [debug_markdown], {}
|
58 |
+
# --- END OF PHASE 1 DEBUGGING ---
|
59 |
+
|
60 |
+
# --- ORIGINAL LOGIC (Commented out for Phase 1) ---
|
61 |
+
# print(f"Load/Refresh UI called. TTS Model: {tts_model}")
|
62 |
# unique_speakers = get_speakers_from_script(script_text)
|
63 |
# new_ui_components = []
|
64 |
|
65 |
# if current_configs_state_dict is None:
|
66 |
+
# current_configs_state_dict = {}
|
67 |
|
|
|
68 |
# safe_default_voice = APP_AVAILABLE_VOICES[0] if APP_AVAILABLE_VOICES else "alloy"
|
69 |
|
70 |
# for speaker_name in unique_speakers:
|
71 |
+
# if speaker_name not in current_configs_state_dict:
|
72 |
+
# current_configs_state_dict[speaker_name] = {
|
73 |
+
# "voice": safe_default_voice, "speed": 1.0,
|
74 |
+
# "vibe": DEFAULT_VIBE, "custom_instructions": ""
|
75 |
+
# }
|
76 |
+
# current_configs_state_dict[speaker_name].setdefault("voice", safe_default_voice)
|
77 |
+
# current_configs_state_dict[speaker_name].setdefault("speed", 1.0)
|
78 |
+
# current_configs_state_dict[speaker_name].setdefault("vibe", DEFAULT_VIBE)
|
79 |
+
# current_configs_state_dict[speaker_name].setdefault("custom_instructions", "")
|
|
|
80 |
|
81 |
# if not unique_speakers:
|
82 |
+
# print("No unique speakers found, returning markdown.")
|
83 |
+
# new_ui_components.append(gr.Markdown("No speakers detected in the script, or script is empty. Type a script and click 'Load/Refresh' again, or change the script content."))
|
84 |
+
# return new_ui_components, current_configs_state_dict
|
85 |
|
86 |
+
# print(f"Found speakers: {unique_speakers}. Building UI...")
|
87 |
# for speaker_name in unique_speakers:
|
88 |
+
# speaker_cfg = current_configs_state_dict[speaker_name]
|
89 |
+
|
90 |
+
# speed_interactive = tts_model in ["tts-1", "tts-1-hd"]
|
91 |
+
# instructions_relevant = tts_model == "gpt-4o-mini-tts"
|
92 |
+
|
93 |
+
# accordion_elem_id = f"accordion_speaker_{speaker_name.replace(' ', '_')}"
|
94 |
+
|
95 |
+
# with gr.Accordion(label=f"Settings for: {speaker_name}", open=False, elem_id=accordion_elem_id) as speaker_accordion:
|
96 |
+
# voice_dd = gr.Dropdown(
|
97 |
+
# label="Voice", choices=APP_AVAILABLE_VOICES, value=speaker_cfg.get("voice", safe_default_voice), interactive=True
|
98 |
+
# )
|
99 |
+
# voice_dd.change(
|
100 |
+
# fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="voice", tts_model=tts_model),
|
101 |
+
# inputs=[voice_dd, speaker_configs_state_component],
|
102 |
+
# outputs=[speaker_configs_state_component]
|
103 |
+
# )
|
104 |
+
|
105 |
+
# speed_slider_label = "Speech Speed" + (" (Active for tts-1/hd)" if speed_interactive else " (N/A for this model)")
|
106 |
+
# speed_slider = gr.Slider(
|
107 |
+
# label=speed_slider_label, minimum=0.25, maximum=4.0, value=float(speaker_cfg.get("speed", 1.0)),
|
108 |
+
# step=0.05, interactive=speed_interactive
|
109 |
+
# )
|
110 |
+
# if speed_interactive:
|
111 |
+
# speed_slider.release(
|
112 |
+
# fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="speed", tts_model=tts_model),
|
113 |
+
# inputs=[speed_slider, speaker_configs_state_component],
|
114 |
+
# outputs=[speaker_configs_state_component]
|
115 |
+
# )
|
|
|
|
|
|
|
116 |
|
117 |
+
# vibe_label = "Vibe/Emotion Preset" + (" (For gpt-4o-mini-tts)" if instructions_relevant else " (Less impact on other models)")
|
118 |
+
# vibe_dd = gr.Dropdown(
|
119 |
+
# label=vibe_label, choices=VIBE_CHOICES, value=speaker_cfg.get("vibe", DEFAULT_VIBE), interactive=True
|
120 |
+
# )
|
121 |
+
# vibe_dd.change(
|
122 |
+
# fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="vibe", tts_model=tts_model),
|
123 |
+
# inputs=[vibe_dd, speaker_configs_state_component],
|
124 |
+
# outputs=[speaker_configs_state_component]
|
125 |
+
# )
|
126 |
+
|
127 |
+
# custom_instr_label = "Custom Instructions"
|
128 |
+
# custom_instr_placeholder = "Used if Vibe is 'Custom...'. Overrides Vibe preset."
|
129 |
+
# custom_instr_tb = gr.Textbox(
|
130 |
+
# label=custom_instr_label,
|
131 |
+
# value=speaker_cfg.get("custom_instructions", ""),
|
132 |
+
# placeholder=custom_instr_placeholder,
|
133 |
+
# lines=2, interactive=True
|
134 |
+
# )
|
135 |
+
# custom_instr_tb.input(
|
136 |
+
# fn=partial(handle_dynamic_input_change, speaker_name=speaker_name, config_key="custom_instructions", tts_model=tts_model),
|
137 |
+
# inputs=[custom_instr_tb, speaker_configs_state_component],
|
138 |
+
# outputs=[speaker_configs_state_component]
|
139 |
+
# )
|
140 |
+
# new_ui_components.append(speaker_accordion)
|
|
|
|
|
141 |
|
142 |
+
# print(f"Returning {len(new_ui_components)} UI components for dynamic area.")
|
143 |
# return new_ui_components, current_configs_state_dict
|
144 |
+
# --- END OF ORIGINAL LOGIC ---
|
145 |
|
146 |
|
147 |
async def handle_script_processing(
|
148 |
+
openai_api_key: str, async_openai_client, nsfw_api_url_template: str,
|
149 |
dialogue_script: str, tts_model: str, pause_ms: int,
|
150 |
speaker_config_method: str, global_voice_selection: str,
|
151 |
speaker_configs_state_dict: dict,
|
|
|
173 |
|
174 |
if speaker_configs_state_dict is None: speaker_configs_state_dict = {}
|
175 |
|
|
|
176 |
safe_default_global_voice = global_voice_selection if global_voice_selection in APP_AVAILABLE_VOICES else DEFAULT_FALLBACK_VOICE
|
177 |
|
178 |
speaker_voice_map = {}
|
179 |
if speaker_config_method in ["Random per Speaker", "A/B Round Robin"]:
|
180 |
unique_script_speakers_for_map = get_speakers_from_script(dialogue_script)
|
181 |
temp_voices_pool = APP_AVAILABLE_VOICES.copy()
|
182 |
+
if not temp_voices_pool: temp_voices_pool = [DEFAULT_FALLBACK_VOICE]
|
183 |
|
184 |
if speaker_config_method == "Random per Speaker":
|
185 |
for spk_name in unique_script_speakers_for_map:
|
|
|
189 |
speaker_voice_map[spk_name] = temp_voices_pool[i % len(temp_voices_pool)]
|
190 |
|
191 |
tasks = []
|
|
|
192 |
line_audio_files_map = {}
|
193 |
|
194 |
for i, line_data in enumerate(parsed_lines):
|
|
|
232 |
results = await asyncio.gather(*tasks, return_exceptions=True)
|
233 |
|
234 |
for idx, res_path_or_exc in enumerate(results):
|
235 |
+
original_line_id = parsed_lines[idx]['id']
|
236 |
if isinstance(res_path_or_exc, Exception):
|
237 |
print(f"Error synthesizing line ID {original_line_id} ({parsed_lines[idx]['speaker']}): {res_path_or_exc}")
|
238 |
line_audio_files_map[original_line_id] = None
|
|
|
242 |
else:
|
243 |
line_audio_files_map[original_line_id] = res_path_or_exc
|
244 |
|
|
|
245 |
ordered_files_for_merge_and_zip = []
|
246 |
for p_line in parsed_lines:
|
247 |
file_path = line_audio_files_map.get(p_line['id'])
|
248 |
if file_path and os.path.exists(file_path) and os.path.getsize(file_path) > 0:
|
249 |
ordered_files_for_merge_and_zip.append(file_path)
|
250 |
else:
|
251 |
+
ordered_files_for_merge_and_zip.append(None)
|
252 |
|
253 |
valid_files_for_zip = [f for f in ordered_files_for_merge_and_zip if f]
|
254 |
|
|
|
262 |
zf.write(f_path, os.path.basename(f_path))
|
263 |
|
264 |
merged_fn = os.path.join(job_audio_path_prefix, "merged_dialogue.mp3")
|
|
|
265 |
files_to_actually_merge = [f for f in ordered_files_for_merge_and_zip if f]
|
266 |
merged_path = merge_mp3_files(files_to_actually_merge, merged_fn, pause_ms)
|
267 |
|
|
|
290 |
|
291 |
def update_model_controls_visibility(selected_model: str, script_text_for_refresh: str, current_speaker_configs_for_refresh: dict, speaker_configs_state_comp: gr.State):
|
292 |
"""Updates visibility of global controls and refreshes per-speaker UI when TTS model changes."""
|
293 |
+
print(f"Model changed to: {selected_model}. Refreshing dynamic UI and controls.")
|
294 |
try:
|
|
|
|
|
295 |
dynamic_ui_output, updated_state = load_refresh_per_speaker_ui(
|
296 |
script_text_for_refresh, current_speaker_configs_for_refresh, selected_model, speaker_configs_state_comp
|
297 |
)
|
298 |
except Exception as e:
|
299 |
print(f"Error in load_refresh_per_speaker_ui called from model_controls_visibility: {e}")
|
|
|
300 |
dynamic_ui_output = [gr.Markdown(f"Error refreshing per-speaker UI: {e}")]
|
301 |
+
updated_state = current_speaker_configs_for_refresh
|
302 |
|
303 |
is_tts1_family = selected_model in ["tts-1", "tts-1-hd"]
|
304 |
is_gpt_mini_tts = selected_model == "gpt-4o-mini-tts"
|
305 |
|
306 |
+
# Return a TUPLE of updates, matching the order of components in 'outputs' list of the .change() event
|
307 |
+
return (
|
308 |
+
gr.update(visible=is_tts1_family, interactive=is_tts1_family), # For global_speed_input
|
309 |
+
gr.update(visible=is_gpt_mini_tts, interactive=is_gpt_mini_tts), # For global_instructions_input
|
310 |
+
dynamic_ui_output, # For dynamic_speaker_ui_area
|
311 |
+
updated_state # For speaker_configs_state
|
312 |
+
)
|
|
|
|
|
313 |
|
314 |
|
315 |
def update_speaker_config_method_visibility(method: str):
|
|
|
317 |
is_single = (method == "Single Voice (Global)")
|
318 |
is_detailed_per_speaker = (method == "Detailed Configuration (Per Speaker UI)")
|
319 |
|
320 |
+
# Return a TUPLE of gr.update objects, in the order expected by the outputs list
|
321 |
+
# of the speaker_config_method_dropdown.change() event in app.py
|
322 |
+
# The order in app.py is: outputs=[single_voice_group, detailed_per_speaker_ui_group]
|
323 |
+
return (
|
324 |
+
gr.update(visible=is_single), # For single_voice_group
|
325 |
+
gr.update(visible=is_detailed_per_speaker) # For detailed_per_speaker_ui_group
|
326 |
+
)
|