abocha commited on
Commit
024b7b9
·
1 Parent(s): 801009c

progress bar fix

Browse files
Files changed (1) hide show
  1. event_handlers.py +63 -88
event_handlers.py CHANGED
@@ -1,4 +1,4 @@
1
- # FILE: event_handlers.py
2
  import gradio as gr
3
  import os
4
  import asyncio
@@ -7,11 +7,11 @@ import shutil
7
  import zipfile
8
  import random
9
  from functools import partial
10
- import datetime
11
 
12
  from utils.script_parser import parse_dialogue_script, calculate_cost
13
- from utils.openai_tts import synthesize_speech_line
14
- from utils.merge_audio import merge_mp3_files
15
 
16
  from ui_layout import APP_AVAILABLE_VOICES, DEFAULT_VIBE, VIBE_CHOICES, PREDEFINED_VIBES, DEFAULT_GLOBAL_VOICE
17
 
@@ -20,18 +20,18 @@ def get_speakers_from_script(script_text: str) -> list:
20
  if not script_text or not script_text.strip():
21
  return []
22
  try:
23
- parsed_lines, _ = parse_dialogue_script(script_text) # Assuming this returns (list_of_dicts, total_chars)
24
  if not parsed_lines:
25
  return []
26
  seen_speakers = set()
27
  ordered_unique_speakers = []
28
  for line_data in parsed_lines:
29
  speaker = line_data.get("speaker")
30
- if speaker and speaker not in seen_speakers: # Ensure speaker is not None or empty
31
  ordered_unique_speakers.append(speaker)
32
  seen_speakers.add(speaker)
33
  return ordered_unique_speakers
34
- except ValueError:
35
  print("ValueError during script parsing in get_speakers_from_script.")
36
  return []
37
  except Exception as e:
@@ -40,33 +40,28 @@ def get_speakers_from_script(script_text: str) -> list:
40
 
41
 
42
  def handle_dynamic_accordion_input_change(
43
- new_value, # Value from the changed component (e.g., voice_dropdown)
44
- current_speaker_configs: dict, # Current value of speaker_configs_state
45
- speaker_name: str, # Passed via partial from the event listener
46
- config_key: str # Passed via partial from the event listener
47
  ):
48
- """
49
- Updates the speaker_configs_state when a dynamic UI element within an Accordion changes.
50
- """
51
  if not isinstance(current_speaker_configs, dict):
52
  print(f"Warning: current_speaker_configs was not a dict in handle_dynamic_accordion_input_change. Type: {type(current_speaker_configs)}. Re-initializing.")
53
  current_speaker_configs = {}
54
 
55
- updated_configs = current_speaker_configs.copy()
56
 
57
  if speaker_name not in updated_configs:
58
  updated_configs[speaker_name] = {}
59
-
60
  updated_configs[speaker_name][config_key] = new_value
61
-
62
  updated_configs["_last_dynamic_update_details"] = f"Speaker: {speaker_name}, Key: {config_key}, Val: {str(new_value)[:20]}, TS: {datetime.datetime.now().isoformat()}"
63
-
64
  print(f"DEBUG (dynamic_input_change): Speaker '{speaker_name}' config '{config_key}' to '{str(new_value)[:50]}'. New state hint: {updated_configs.get('_last_dynamic_update_details')}")
65
  return updated_configs
66
 
67
 
68
  async def handle_script_processing(
69
- openai_api_key: str, async_openai_client, nsfw_api_url_template: str,
70
  dialogue_script: str, tts_model: str, pause_ms: int,
71
  speaker_config_method: str, global_voice_selection: str,
72
  speaker_configs_state_dict: dict,
@@ -90,31 +85,35 @@ async def handle_script_processing(
90
  except ValueError as e:
91
  shutil.rmtree(job_audio_path_prefix); return None, None, f"Script parsing error: {str(e)}"
92
 
93
- if not isinstance(speaker_configs_state_dict, dict):
94
  print(f"Warning: speaker_configs_state_dict was not a dict in handle_script_processing. Re-initializing. Type: {type(speaker_configs_state_dict)}")
95
  speaker_configs_state_dict = {}
96
-
97
  safe_default_global_voice = global_voice_selection if global_voice_selection in APP_AVAILABLE_VOICES else (APP_AVAILABLE_VOICES[0] if APP_AVAILABLE_VOICES else "alloy")
98
 
99
- speaker_voice_map = {}
100
  if speaker_config_method in ["Random per Speaker", "A/B Round Robin"]:
101
  unique_script_speakers_for_map = get_speakers_from_script(dialogue_script)
102
  temp_voices_pool = APP_AVAILABLE_VOICES.copy()
103
  if not temp_voices_pool: temp_voices_pool = [safe_default_global_voice]
104
-
105
  if speaker_config_method == "Random per Speaker":
106
  for spk_name in unique_script_speakers_for_map:
107
  speaker_voice_map[spk_name] = random.choice(temp_voices_pool)
108
  elif speaker_config_method == "A/B Round Robin" and temp_voices_pool:
109
  for i, spk_name in enumerate(unique_script_speakers_for_map):
110
  speaker_voice_map[spk_name] = temp_voices_pool[i % len(temp_voices_pool)]
111
-
112
- task_info_list = []
 
 
 
113
  for i, line_data in enumerate(parsed_lines):
114
  speaker_name = line_data["speaker"]
115
  line_text = line_data["text"]
116
  line_id = line_data["id"]
117
 
 
118
  line_voice = safe_default_global_voice
119
  line_speed = global_speed
120
  line_instructions = global_instructions.strip() if global_instructions and global_instructions.strip() else None
@@ -122,68 +121,57 @@ async def handle_script_processing(
122
  if speaker_config_method == "Detailed Configuration (Per Speaker UI)":
123
  spk_cfg = speaker_configs_state_dict.get(speaker_name, {})
124
  line_voice = spk_cfg.get("voice", safe_default_global_voice)
125
-
126
  if tts_model in ["tts-1", "tts-1-hd"]:
127
  line_speed = float(spk_cfg.get("speed", global_speed))
128
- else: line_speed = 1.0
129
-
130
  if tts_model == "gpt-4o-mini-tts":
131
  vibe = spk_cfg.get("vibe", DEFAULT_VIBE)
132
  custom_instr_raw = spk_cfg.get("custom_instructions", "")
133
  custom_instr = custom_instr_raw.strip() if custom_instr_raw else ""
134
-
135
  current_line_specific_instructions = None
136
  if vibe == "Custom..." and custom_instr:
137
  current_line_specific_instructions = custom_instr
138
  elif vibe != "None" and vibe != "Custom..." and PREDEFINED_VIBES.get(vibe):
139
  current_line_specific_instructions = PREDEFINED_VIBES[vibe]
140
-
141
- # If per-speaker instructions are set, they take precedence. Otherwise, fall back to global instructions.
142
  line_instructions = current_line_specific_instructions if current_line_specific_instructions is not None else line_instructions
143
- else: # tts-1, tts-1-hd do not use vibe/custom_instructions from per-speaker UI
144
- # They will use the global_instructions if set.
145
- pass # line_instructions already set to global_instructions or None
146
-
147
  elif speaker_config_method in ["Random per Speaker", "A/B Round Robin"]:
148
  line_voice = speaker_voice_map.get(speaker_name, safe_default_global_voice)
149
- # For these methods, speed and instructions remain global
150
- if tts_model not in ["tts-1", "tts-1-hd"]: line_speed = 1.0
151
-
152
- if tts_model not in ["tts-1", "tts-1-hd"]: line_speed = 1.0
 
153
 
154
  out_fn = os.path.join(job_audio_path_prefix, f"line_{line_id}_{speaker_name.replace(' ','_')}.mp3")
155
- progress(i / len(parsed_lines), desc=f"Synthesizing: Line {i+1}/{len(parsed_lines)} ({speaker_name})")
156
-
157
- current_task = synthesize_speech_line(
158
- client=async_openai_client, text=line_text, voice=line_voice,
159
- output_path=out_fn, model=tts_model, speed=line_speed,
160
- instructions=line_instructions, nsfw_api_url_template=nsfw_api_url_template,
161
- line_index=line_id
162
- )
163
- task_info_list.append({"id": line_id, "speaker": speaker_name, "task": current_task, "out_fn": out_fn})
164
-
165
- processed_results_map = {} # Store by line_id for easier lookup
166
- for info in task_info_list:
167
  try:
168
- result_path = await info['task']
169
- processed_results_map[info['id']] = {"path": result_path, "speaker": info['speaker']}
 
 
 
 
 
170
  except Exception as e:
171
- print(f"Error synthesizing line ID {info['id']} ({info['speaker']}): {e}")
172
- processed_results_map[info['id']] = {"path": None, "error": e, "speaker": info['speaker']}
 
 
173
 
174
  ordered_files_for_merge_and_zip = []
175
- for p_line in parsed_lines: # Iterate through original parsed lines to maintain order
176
  line_id = p_line['id']
177
  res = processed_results_map.get(line_id)
178
  if res and res.get("path") and os.path.exists(res["path"]) and os.path.getsize(res["path"]) > 0:
179
  ordered_files_for_merge_and_zip.append(res["path"])
180
  else:
181
- # File was not successfully created or result not found, append None placeholder
182
- ordered_files_for_merge_and_zip.append(None)
183
- if res: print(f"Skipped or failed synthesizing line ID {line_id} ({res.get('speaker', 'Unknown')}) for merge/zip.")
184
  else: print(f"Result for line ID {line_id} not found in processed_results_map.")
185
 
186
-
187
  valid_files_for_zip = [f for f in ordered_files_for_merge_and_zip if f]
188
 
189
  if not valid_files_for_zip:
@@ -193,21 +181,25 @@ async def handle_script_processing(
193
  with zipfile.ZipFile(zip_fn, 'w') as zf:
194
  for f_path in valid_files_for_zip:
195
  zf.write(f_path, os.path.basename(f_path))
196
-
197
- files_to_actually_merge = valid_files_for_zip # Already ordered and filtered
198
  merged_fn = os.path.join(job_audio_path_prefix, "merged_dialogue.mp3")
199
  merged_path = merge_mp3_files(files_to_actually_merge, merged_fn, pause_ms)
200
 
201
  status_msg = f"Successfully processed {len(valid_files_for_zip)} out of {len(parsed_lines)} lines. "
202
- if len(valid_files_for_zip) < len(parsed_lines): status_msg += "Some lines may have failed. "
203
  if not merged_path and len(valid_files_for_zip) > 0 : status_msg += "Merging audio failed. "
204
  elif not merged_path: status_msg = "No audio to merge (all lines failed or were skipped)."
205
  else: status_msg += "Merged audio generated."
206
-
 
 
207
  return (zip_fn if os.path.exists(zip_fn) else None,
208
  merged_path if merged_path and os.path.exists(merged_path) else None,
209
  status_msg)
210
 
 
 
211
  def handle_calculate_cost(dialogue_script: str, tts_model: str):
212
  if not dialogue_script or not dialogue_script.strip(): return "Cost: $0.00 (Script is empty)"
213
  try:
@@ -222,15 +214,6 @@ def handle_load_refresh_per_speaker_ui_trigger(script_text: str, current_speaker
222
  print(f"DEBUG (Load/Refresh Trigger): Script: '{script_text[:30]}...', Model: {tts_model}, Current State Keys: {list(current_speaker_configs.keys()) if isinstance(current_speaker_configs, dict) else 'Not a dict'}")
223
  if not isinstance(current_speaker_configs, dict): current_speaker_configs = {}
224
  updated_configs = current_speaker_configs.copy()
225
-
226
- # Optionally, you might want to pre-populate settings for new speakers here
227
- # unique_speakers_in_script = get_speakers_from_script(script_text)
228
- # for speaker in unique_speakers_in_script:
229
- # if speaker not in updated_configs:
230
- # updated_configs[speaker] = {"voice": DEFAULT_GLOBAL_VOICE} # Default init
231
- # if tts_model in ["tts-1", "tts-1-hd"]: updated_configs[speaker]["speed"] = 1.0
232
- # elif tts_model == "gpt-4o-mini-tts": updated_configs[speaker]["vibe"] = DEFAULT_VIBE
233
-
234
  updated_configs["_last_action_source"] = "load_refresh_button"
235
  updated_configs["_last_action_timestamp"] = datetime.datetime.now().isoformat()
236
  return updated_configs
@@ -239,11 +222,8 @@ def handle_tts_model_change(selected_model: str, current_speaker_configs: dict):
239
  print(f"DEBUG (TTS Model Change): Model: {selected_model}, Current State Keys: {list(current_speaker_configs.keys()) if isinstance(current_speaker_configs, dict) else 'Not a dict'}")
240
  if not isinstance(current_speaker_configs, dict): current_speaker_configs = {}
241
  updated_configs = current_speaker_configs.copy()
242
-
243
- # When model changes, you might want to reset or adjust model-specific settings for all speakers
244
- # For example, 'speed' is for tts-1, 'vibe' for gpt-4o-mini-tts
245
- for speaker_name_key in list(updated_configs.keys()): # Iterate over keys if modifying dict
246
- if isinstance(updated_configs[speaker_name_key], dict): # Check if it's a speaker config dict
247
  if selected_model == "gpt-4o-mini-tts":
248
  updated_configs[speaker_name_key].pop("speed", None)
249
  if "vibe" not in updated_configs[speaker_name_key]:
@@ -253,26 +233,21 @@ def handle_tts_model_change(selected_model: str, current_speaker_configs: dict):
253
  updated_configs[speaker_name_key].pop("custom_instructions", None)
254
  if "speed" not in updated_configs[speaker_name_key]:
255
  updated_configs[speaker_name_key]["speed"] = 1.0
256
- # Add other model-specific adjustments if needed
257
-
258
  updated_configs["_last_action_source"] = "tts_model_change"
259
  updated_configs["_last_action_timestamp"] = datetime.datetime.now().isoformat()
260
-
261
  is_tts1_family = selected_model in ["tts-1", "tts-1-hd"]
262
  is_gpt_mini_tts = selected_model == "gpt-4o-mini-tts"
263
-
264
  return (
265
- gr.update(visible=is_tts1_family, interactive=is_tts1_family),
266
- gr.update(visible=is_gpt_mini_tts, interactive=is_gpt_mini_tts),
267
- updated_configs
268
  )
269
 
270
  def handle_speaker_config_method_visibility_change(method: str):
271
  print(f"DEBUG (Config Method Change): Method: {method}")
272
  is_single_voice_visible = (method == "Single Voice (Global)")
273
  is_detailed_per_speaker_container_visible = (method == "Detailed Configuration (Per Speaker UI)")
274
-
275
  return (
276
- gr.update(visible=is_single_voice_visible),
277
- gr.update(visible=is_detailed_per_speaker_container_visible)
278
  )
 
1
+ # FILE: esl-dialogue-tts/event_handlers.py
2
  import gradio as gr
3
  import os
4
  import asyncio
 
7
  import zipfile
8
  import random
9
  from functools import partial
10
+ import datetime
11
 
12
  from utils.script_parser import parse_dialogue_script, calculate_cost
13
+ from utils.openai_tts import synthesize_speech_line
14
+ from utils.merge_audio import merge_mp3_files
15
 
16
  from ui_layout import APP_AVAILABLE_VOICES, DEFAULT_VIBE, VIBE_CHOICES, PREDEFINED_VIBES, DEFAULT_GLOBAL_VOICE
17
 
 
20
  if not script_text or not script_text.strip():
21
  return []
22
  try:
23
+ parsed_lines, _ = parse_dialogue_script(script_text)
24
  if not parsed_lines:
25
  return []
26
  seen_speakers = set()
27
  ordered_unique_speakers = []
28
  for line_data in parsed_lines:
29
  speaker = line_data.get("speaker")
30
+ if speaker and speaker not in seen_speakers:
31
  ordered_unique_speakers.append(speaker)
32
  seen_speakers.add(speaker)
33
  return ordered_unique_speakers
34
+ except ValueError:
35
  print("ValueError during script parsing in get_speakers_from_script.")
36
  return []
37
  except Exception as e:
 
40
 
41
 
42
  def handle_dynamic_accordion_input_change(
43
+ new_value,
44
+ current_speaker_configs: dict,
45
+ speaker_name: str,
46
+ config_key: str
47
  ):
 
 
 
48
  if not isinstance(current_speaker_configs, dict):
49
  print(f"Warning: current_speaker_configs was not a dict in handle_dynamic_accordion_input_change. Type: {type(current_speaker_configs)}. Re-initializing.")
50
  current_speaker_configs = {}
51
 
52
+ updated_configs = current_speaker_configs.copy()
53
 
54
  if speaker_name not in updated_configs:
55
  updated_configs[speaker_name] = {}
56
+
57
  updated_configs[speaker_name][config_key] = new_value
 
58
  updated_configs["_last_dynamic_update_details"] = f"Speaker: {speaker_name}, Key: {config_key}, Val: {str(new_value)[:20]}, TS: {datetime.datetime.now().isoformat()}"
 
59
  print(f"DEBUG (dynamic_input_change): Speaker '{speaker_name}' config '{config_key}' to '{str(new_value)[:50]}'. New state hint: {updated_configs.get('_last_dynamic_update_details')}")
60
  return updated_configs
61
 
62
 
63
  async def handle_script_processing(
64
+ openai_api_key: str, async_openai_client, nsfw_api_url_template: str,
65
  dialogue_script: str, tts_model: str, pause_ms: int,
66
  speaker_config_method: str, global_voice_selection: str,
67
  speaker_configs_state_dict: dict,
 
85
  except ValueError as e:
86
  shutil.rmtree(job_audio_path_prefix); return None, None, f"Script parsing error: {str(e)}"
87
 
88
+ if not isinstance(speaker_configs_state_dict, dict):
89
  print(f"Warning: speaker_configs_state_dict was not a dict in handle_script_processing. Re-initializing. Type: {type(speaker_configs_state_dict)}")
90
  speaker_configs_state_dict = {}
91
+
92
  safe_default_global_voice = global_voice_selection if global_voice_selection in APP_AVAILABLE_VOICES else (APP_AVAILABLE_VOICES[0] if APP_AVAILABLE_VOICES else "alloy")
93
 
94
+ speaker_voice_map = {} # Calculated once if needed
95
  if speaker_config_method in ["Random per Speaker", "A/B Round Robin"]:
96
  unique_script_speakers_for_map = get_speakers_from_script(dialogue_script)
97
  temp_voices_pool = APP_AVAILABLE_VOICES.copy()
98
  if not temp_voices_pool: temp_voices_pool = [safe_default_global_voice]
99
+
100
  if speaker_config_method == "Random per Speaker":
101
  for spk_name in unique_script_speakers_for_map:
102
  speaker_voice_map[spk_name] = random.choice(temp_voices_pool)
103
  elif speaker_config_method == "A/B Round Robin" and temp_voices_pool:
104
  for i, spk_name in enumerate(unique_script_speakers_for_map):
105
  speaker_voice_map[spk_name] = temp_voices_pool[i % len(temp_voices_pool)]
106
+
107
+ processed_results_map = {}
108
+ total_lines = len(parsed_lines)
109
+ progress(0, desc="Starting: Preparing for audio synthesis...")
110
+
111
  for i, line_data in enumerate(parsed_lines):
112
  speaker_name = line_data["speaker"]
113
  line_text = line_data["text"]
114
  line_id = line_data["id"]
115
 
116
+ # Determine voice, speed, and instructions for the current line
117
  line_voice = safe_default_global_voice
118
  line_speed = global_speed
119
  line_instructions = global_instructions.strip() if global_instructions and global_instructions.strip() else None
 
121
  if speaker_config_method == "Detailed Configuration (Per Speaker UI)":
122
  spk_cfg = speaker_configs_state_dict.get(speaker_name, {})
123
  line_voice = spk_cfg.get("voice", safe_default_global_voice)
 
124
  if tts_model in ["tts-1", "tts-1-hd"]:
125
  line_speed = float(spk_cfg.get("speed", global_speed))
126
+ # For gpt-4o-mini-tts, detailed instructions/vibe
 
127
  if tts_model == "gpt-4o-mini-tts":
128
  vibe = spk_cfg.get("vibe", DEFAULT_VIBE)
129
  custom_instr_raw = spk_cfg.get("custom_instructions", "")
130
  custom_instr = custom_instr_raw.strip() if custom_instr_raw else ""
 
131
  current_line_specific_instructions = None
132
  if vibe == "Custom..." and custom_instr:
133
  current_line_specific_instructions = custom_instr
134
  elif vibe != "None" and vibe != "Custom..." and PREDEFINED_VIBES.get(vibe):
135
  current_line_specific_instructions = PREDEFINED_VIBES[vibe]
 
 
136
  line_instructions = current_line_specific_instructions if current_line_specific_instructions is not None else line_instructions
 
 
 
 
137
  elif speaker_config_method in ["Random per Speaker", "A/B Round Robin"]:
138
  line_voice = speaker_voice_map.get(speaker_name, safe_default_global_voice)
139
+ # Speed and instructions remain global for these methods
140
+
141
+ # Ensure speed is 1.0 if model does not support it explicitly, or handled globally
142
+ if tts_model not in ["tts-1", "tts-1-hd"]:
143
+ line_speed = 1.0
144
 
145
  out_fn = os.path.join(job_audio_path_prefix, f"line_{line_id}_{speaker_name.replace(' ','_')}.mp3")
146
+
147
+ # Update progress BEFORE awaiting the synthesis for this line
148
+ progress_fraction = (i + 1) / total_lines
149
+ progress(progress_fraction, desc=f"Synthesizing: Line {i+1}/{total_lines} ('{speaker_name}')")
150
+
 
 
 
 
 
 
 
151
  try:
152
+ result_path = await synthesize_speech_line(
153
+ client=async_openai_client, text=line_text, voice=line_voice,
154
+ output_path=out_fn, model=tts_model, speed=line_speed,
155
+ instructions=line_instructions, nsfw_api_url_template=nsfw_api_url_template,
156
+ line_index=line_id
157
+ )
158
+ processed_results_map[line_id] = {"path": result_path, "speaker": speaker_name}
159
  except Exception as e:
160
+ print(f"Error synthesizing line ID {line_id} ({speaker_name}): {e}")
161
+ processed_results_map[line_id] = {"path": None, "error": str(e), "speaker": speaker_name}
162
+
163
+ progress(1.0, desc="Finalizing: Assembling audio files...")
164
 
165
  ordered_files_for_merge_and_zip = []
166
+ for p_line in parsed_lines:
167
  line_id = p_line['id']
168
  res = processed_results_map.get(line_id)
169
  if res and res.get("path") and os.path.exists(res["path"]) and os.path.getsize(res["path"]) > 0:
170
  ordered_files_for_merge_and_zip.append(res["path"])
171
  else:
172
+ if res: print(f"Skipped or failed synthesizing line ID {line_id} ({res.get('speaker', 'Unknown')}) for merge/zip. Error: {res.get('error')}")
 
 
173
  else: print(f"Result for line ID {line_id} not found in processed_results_map.")
174
 
 
175
  valid_files_for_zip = [f for f in ordered_files_for_merge_and_zip if f]
176
 
177
  if not valid_files_for_zip:
 
181
  with zipfile.ZipFile(zip_fn, 'w') as zf:
182
  for f_path in valid_files_for_zip:
183
  zf.write(f_path, os.path.basename(f_path))
184
+
185
+ files_to_actually_merge = valid_files_for_zip
186
  merged_fn = os.path.join(job_audio_path_prefix, "merged_dialogue.mp3")
187
  merged_path = merge_mp3_files(files_to_actually_merge, merged_fn, pause_ms)
188
 
189
  status_msg = f"Successfully processed {len(valid_files_for_zip)} out of {len(parsed_lines)} lines. "
190
+ if len(valid_files_for_zip) < len(parsed_lines): status_msg += "Some lines may have failed. Check console for details. "
191
  if not merged_path and len(valid_files_for_zip) > 0 : status_msg += "Merging audio failed. "
192
  elif not merged_path: status_msg = "No audio to merge (all lines failed or were skipped)."
193
  else: status_msg += "Merged audio generated."
194
+
195
+ progress(1.0, desc="Processing complete!") # Final update
196
+
197
  return (zip_fn if os.path.exists(zip_fn) else None,
198
  merged_path if merged_path and os.path.exists(merged_path) else None,
199
  status_msg)
200
 
201
+ # ... (rest of the event_handlers.py file remains the same) ...
202
+
203
  def handle_calculate_cost(dialogue_script: str, tts_model: str):
204
  if not dialogue_script or not dialogue_script.strip(): return "Cost: $0.00 (Script is empty)"
205
  try:
 
214
  print(f"DEBUG (Load/Refresh Trigger): Script: '{script_text[:30]}...', Model: {tts_model}, Current State Keys: {list(current_speaker_configs.keys()) if isinstance(current_speaker_configs, dict) else 'Not a dict'}")
215
  if not isinstance(current_speaker_configs, dict): current_speaker_configs = {}
216
  updated_configs = current_speaker_configs.copy()
 
 
 
 
 
 
 
 
 
217
  updated_configs["_last_action_source"] = "load_refresh_button"
218
  updated_configs["_last_action_timestamp"] = datetime.datetime.now().isoformat()
219
  return updated_configs
 
222
  print(f"DEBUG (TTS Model Change): Model: {selected_model}, Current State Keys: {list(current_speaker_configs.keys()) if isinstance(current_speaker_configs, dict) else 'Not a dict'}")
223
  if not isinstance(current_speaker_configs, dict): current_speaker_configs = {}
224
  updated_configs = current_speaker_configs.copy()
225
+ for speaker_name_key in list(updated_configs.keys()):
226
+ if isinstance(updated_configs[speaker_name_key], dict):
 
 
 
227
  if selected_model == "gpt-4o-mini-tts":
228
  updated_configs[speaker_name_key].pop("speed", None)
229
  if "vibe" not in updated_configs[speaker_name_key]:
 
233
  updated_configs[speaker_name_key].pop("custom_instructions", None)
234
  if "speed" not in updated_configs[speaker_name_key]:
235
  updated_configs[speaker_name_key]["speed"] = 1.0
 
 
236
  updated_configs["_last_action_source"] = "tts_model_change"
237
  updated_configs["_last_action_timestamp"] = datetime.datetime.now().isoformat()
 
238
  is_tts1_family = selected_model in ["tts-1", "tts-1-hd"]
239
  is_gpt_mini_tts = selected_model == "gpt-4o-mini-tts"
 
240
  return (
241
+ gr.update(visible=is_tts1_family, interactive=is_tts1_family),
242
+ gr.update(visible=is_gpt_mini_tts, interactive=is_gpt_mini_tts),
243
+ updated_configs
244
  )
245
 
246
  def handle_speaker_config_method_visibility_change(method: str):
247
  print(f"DEBUG (Config Method Change): Method: {method}")
248
  is_single_voice_visible = (method == "Single Voice (Global)")
249
  is_detailed_per_speaker_container_visible = (method == "Detailed Configuration (Per Speaker UI)")
 
250
  return (
251
+ gr.update(visible=is_single_voice_visible),
252
+ gr.update(visible=is_detailed_per_speaker_container_visible)
253
  )