Spaces:
Running
Running
debug new 2
Browse files- app.py +9 -14
- event_handlers.py +43 -48
- ui_layout.py +14 -22
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
import asyncio
|
|
@@ -13,8 +14,7 @@ from ui_layout import (
|
|
| 13 |
from event_handlers import (
|
| 14 |
handle_script_processing, handle_calculate_cost,
|
| 15 |
update_model_controls_visibility, update_speaker_config_method_visibility,
|
| 16 |
-
handle_load_refresh_button_click
|
| 17 |
-
# load_refresh_per_speaker_ui_core is now internal to event_handlers.py
|
| 18 |
)
|
| 19 |
|
| 20 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
@@ -22,7 +22,7 @@ NSFW_API_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
|
|
| 22 |
MODEL_DEFAULT_FROM_ENV = os.getenv("MODEL_DEFAULT", MODEL_DEFAULT_ENV)
|
| 23 |
EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_ENV if MODEL_DEFAULT_FROM_ENV in TTS_MODELS_AVAILABLE else MODEL_DEFAULT_ENV
|
| 24 |
async_openai_client = None
|
| 25 |
-
|
| 26 |
if not OPENAI_API_KEY:
|
| 27 |
try:
|
| 28 |
from huggingface_hub import HfApi
|
|
@@ -54,22 +54,19 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 54 |
(script_input, tts_model_dropdown, pause_input,
|
| 55 |
global_speed_input, global_instructions_input) = create_main_input_components(EFFECTIVE_MODEL_DEFAULT)
|
| 56 |
|
| 57 |
-
# Destructure the returned components from create_speaker_config_components
|
| 58 |
(speaker_config_method_dropdown, single_voice_group, global_voice_dropdown,
|
| 59 |
detailed_per_speaker_ui_group, load_per_speaker_ui_button,
|
| 60 |
-
|
| 61 |
|
| 62 |
(calculate_cost_button, generate_button, cost_output,
|
| 63 |
individual_lines_zip_output, merged_dialogue_mp3_output,
|
| 64 |
status_output) = create_action_and_output_components()
|
| 65 |
|
| 66 |
-
# Event handler for TTS Model Dropdown
|
| 67 |
tts_model_dropdown.change(
|
| 68 |
fn=update_model_controls_visibility,
|
| 69 |
-
#
|
| 70 |
-
inputs=[tts_model_dropdown, script_input, speaker_configs_state],
|
| 71 |
-
|
| 72 |
-
outputs=[global_speed_input, global_instructions_input, dynamic_speaker_ui_wrapper, speaker_configs_state]
|
| 73 |
)
|
| 74 |
|
| 75 |
speaker_config_method_dropdown.change(
|
|
@@ -78,13 +75,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 78 |
outputs=[single_voice_group, detailed_per_speaker_ui_group]
|
| 79 |
)
|
| 80 |
|
| 81 |
-
# Event handler for the "Load/Refresh" Button
|
| 82 |
load_per_speaker_ui_button.click(
|
| 83 |
fn=handle_load_refresh_button_click,
|
| 84 |
-
#
|
| 85 |
inputs=[script_input, speaker_configs_state, tts_model_dropdown],
|
| 86 |
-
|
| 87 |
-
outputs=[dynamic_speaker_ui_wrapper, speaker_configs_state]
|
| 88 |
)
|
| 89 |
|
| 90 |
calculate_cost_button.click(
|
|
|
|
| 1 |
+
# FILE: app.py
|
| 2 |
import gradio as gr
|
| 3 |
import os
|
| 4 |
import asyncio
|
|
|
|
| 14 |
from event_handlers import (
|
| 15 |
handle_script_processing, handle_calculate_cost,
|
| 16 |
update_model_controls_visibility, update_speaker_config_method_visibility,
|
| 17 |
+
handle_load_refresh_button_click
|
|
|
|
| 18 |
)
|
| 19 |
|
| 20 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
|
|
| 22 |
MODEL_DEFAULT_FROM_ENV = os.getenv("MODEL_DEFAULT", MODEL_DEFAULT_ENV)
|
| 23 |
EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_ENV if MODEL_DEFAULT_FROM_ENV in TTS_MODELS_AVAILABLE else MODEL_DEFAULT_ENV
|
| 24 |
async_openai_client = None
|
| 25 |
+
|
| 26 |
if not OPENAI_API_KEY:
|
| 27 |
try:
|
| 28 |
from huggingface_hub import HfApi
|
|
|
|
| 54 |
(script_input, tts_model_dropdown, pause_input,
|
| 55 |
global_speed_input, global_instructions_input) = create_main_input_components(EFFECTIVE_MODEL_DEFAULT)
|
| 56 |
|
|
|
|
| 57 |
(speaker_config_method_dropdown, single_voice_group, global_voice_dropdown,
|
| 58 |
detailed_per_speaker_ui_group, load_per_speaker_ui_button,
|
| 59 |
+
dynamic_speaker_ui_area) = create_speaker_config_components() # dynamic_speaker_ui_area is now a gr.Group
|
| 60 |
|
| 61 |
(calculate_cost_button, generate_button, cost_output,
|
| 62 |
individual_lines_zip_output, merged_dialogue_mp3_output,
|
| 63 |
status_output) = create_action_and_output_components()
|
| 64 |
|
|
|
|
| 65 |
tts_model_dropdown.change(
|
| 66 |
fn=update_model_controls_visibility,
|
| 67 |
+
# MODIFIED: Removed duplicate speaker_configs_state
|
| 68 |
+
inputs=[tts_model_dropdown, script_input, speaker_configs_state],
|
| 69 |
+
outputs=[global_speed_input, global_instructions_input, dynamic_speaker_ui_area, speaker_configs_state]
|
|
|
|
| 70 |
)
|
| 71 |
|
| 72 |
speaker_config_method_dropdown.change(
|
|
|
|
| 75 |
outputs=[single_voice_group, detailed_per_speaker_ui_group]
|
| 76 |
)
|
| 77 |
|
|
|
|
| 78 |
load_per_speaker_ui_button.click(
|
| 79 |
fn=handle_load_refresh_button_click,
|
| 80 |
+
# MODIFIED: Removed duplicate speaker_configs_state
|
| 81 |
inputs=[script_input, speaker_configs_state, tts_model_dropdown],
|
| 82 |
+
outputs=[dynamic_speaker_ui_area, speaker_configs_state]
|
|
|
|
| 83 |
)
|
| 84 |
|
| 85 |
calculate_cost_button.click(
|
event_handlers.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
-
# ... (other imports remain the same) ...
|
| 3 |
import os
|
| 4 |
import asyncio
|
| 5 |
import tempfile
|
|
@@ -18,7 +18,6 @@ DEFAULT_FALLBACK_VOICE = APP_AVAILABLE_VOICES[0] if APP_AVAILABLE_VOICES else "a
|
|
| 18 |
|
| 19 |
|
| 20 |
def get_speakers_from_script(script_text: str):
|
| 21 |
-
# ... (no change) ...
|
| 22 |
if not script_text.strip():
|
| 23 |
return []
|
| 24 |
try:
|
|
@@ -34,7 +33,6 @@ def get_speakers_from_script(script_text: str):
|
|
| 34 |
return []
|
| 35 |
|
| 36 |
def handle_dynamic_input_change(new_value, current_configs_state_dict: dict, speaker_name: str, config_key: str, tts_model: str):
|
| 37 |
-
# ... (no change) ...
|
| 38 |
if current_configs_state_dict is None:
|
| 39 |
current_configs_state_dict = {}
|
| 40 |
if speaker_name not in current_configs_state_dict:
|
|
@@ -44,29 +42,37 @@ def handle_dynamic_input_change(new_value, current_configs_state_dict: dict, spe
|
|
| 44 |
return current_configs_state_dict
|
| 45 |
|
| 46 |
|
| 47 |
-
#
|
| 48 |
def load_refresh_per_speaker_ui_core(script_text: str, current_configs_state_dict: dict, tts_model: str):
|
| 49 |
"""
|
| 50 |
Core logic for generating per-speaker UI components.
|
| 51 |
-
Returns: (
|
| 52 |
"""
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
async def handle_script_processing(
|
| 60 |
openai_api_key: str, async_openai_client, nsfw_api_url_template: str,
|
| 61 |
dialogue_script: str, tts_model: str, pause_ms: int,
|
| 62 |
-
# ... (rest of function signature and body unchanged) ...
|
| 63 |
speaker_config_method: str, global_voice_selection: str,
|
| 64 |
speaker_configs_state_dict: dict,
|
| 65 |
global_speed: float,
|
| 66 |
global_instructions: str,
|
| 67 |
progress=gr.Progress(track_tqdm=True)
|
| 68 |
):
|
| 69 |
-
# ... (content of this function remains unchanged) ...
|
| 70 |
if not openai_api_key or not async_openai_client:
|
| 71 |
return None, None, "Error: OpenAI API Key or client is not configured."
|
| 72 |
if not dialogue_script.strip():
|
|
@@ -123,15 +129,15 @@ async def handle_script_processing(
|
|
| 123 |
line_instructions = custom_instr
|
| 124 |
elif vibe != "None" and vibe != "Custom...":
|
| 125 |
line_instructions = PREDEFINED_VIBES.get(vibe, "")
|
| 126 |
-
if not line_instructions and global_instructions and global_instructions.strip():
|
| 127 |
line_instructions = global_instructions
|
| 128 |
-
elif not line_instructions:
|
| 129 |
line_instructions = None
|
| 130 |
elif speaker_config_method in ["Random per Speaker", "A/B Round Robin"]:
|
| 131 |
line_voice = speaker_voice_map.get(speaker_name, safe_default_global_voice)
|
| 132 |
|
| 133 |
-
if tts_model not in ["tts-1", "tts-1-hd"]:
|
| 134 |
-
line_speed = 1.0
|
| 135 |
|
| 136 |
out_fn = os.path.join(job_audio_path_prefix, f"line_{line_data['id']}_{speaker_name.replace(' ','_')}.mp3")
|
| 137 |
progress(i / len(parsed_lines), desc=f"Synthesizing: Line {i+1}/{len(parsed_lines)} ({speaker_name})")
|
|
@@ -176,13 +182,13 @@ async def handle_script_processing(
|
|
| 176 |
zf.write(f_path, os.path.basename(f_path))
|
| 177 |
|
| 178 |
merged_fn = os.path.join(job_audio_path_prefix, "merged_dialogue.mp3")
|
| 179 |
-
files_to_actually_merge = [f for f in ordered_files_for_merge_and_zip if f]
|
| 180 |
merged_path = merge_mp3_files(files_to_actually_merge, merged_fn, pause_ms)
|
| 181 |
|
| 182 |
status = f"Successfully processed {len(valid_files_for_zip)} out of {len(parsed_lines)} lines. "
|
| 183 |
if len(valid_files_for_zip) < len(parsed_lines): status += "Some lines may have failed. "
|
| 184 |
if not merged_path and len(valid_files_for_zip) > 0: status += "Merging audio failed. "
|
| 185 |
-
elif not merged_path: status = "No audio to merge."
|
| 186 |
else: status += "Merged audio generated."
|
| 187 |
|
| 188 |
return (zip_fn if os.path.exists(zip_fn) else None,
|
|
@@ -190,7 +196,6 @@ async def handle_script_processing(
|
|
| 190 |
status)
|
| 191 |
|
| 192 |
def handle_calculate_cost(dialogue_script: str, tts_model: str):
|
| 193 |
-
# ... (no change) ...
|
| 194 |
if not dialogue_script.strip(): return "Cost: $0.00 (Script is empty)"
|
| 195 |
try:
|
| 196 |
parsed, chars = parse_dialogue_script(dialogue_script)
|
|
@@ -199,46 +204,37 @@ def handle_calculate_cost(dialogue_script: str, tts_model: str):
|
|
| 199 |
return f"Estimated Cost for {len(parsed)} lines ({chars} chars): ${cost:.6f}"
|
| 200 |
except ValueError as e:
|
| 201 |
return f"Cost calculation error: {str(e)}"
|
| 202 |
-
except Exception as e:
|
| 203 |
return f"An unexpected error occurred during cost calculation: {str(e)}"
|
| 204 |
|
| 205 |
-
#
|
| 206 |
-
# CLEANUP: Signature changed
|
| 207 |
def handle_load_refresh_button_click(script_text: str, current_configs_state_dict: dict, tts_model: str):
|
| 208 |
-
|
| 209 |
-
script_text, current_configs_state_dict, tts_model #
|
| 210 |
)
|
| 211 |
-
|
| 212 |
-
# Create a new instance of the inner gr.Column with the dynamic children
|
| 213 |
-
new_inner_column = gr.Column(children=components_list_for_inner_column, elem_id="dynamic_ui_area_for_speakers")
|
| 214 |
-
|
| 215 |
-
# Return gr.update for the wrapper group (to replace its children with the new_inner_column),
|
| 216 |
# and the raw state dict for the gr.State component.
|
| 217 |
-
return gr.update(children=
|
| 218 |
|
| 219 |
-
#
|
| 220 |
-
# CLEANUP: Signature changed
|
| 221 |
def update_model_controls_visibility(selected_model: str, script_text_for_refresh: str, current_configs_state_dict: dict):
|
| 222 |
"""Updates visibility of global controls and refreshes per-speaker UI when TTS model changes."""
|
| 223 |
-
print(f"Model changed to: {selected_model}. Refreshing dynamic UI
|
|
|
|
|
|
|
|
|
|
| 224 |
try:
|
| 225 |
-
|
| 226 |
-
script_text_for_refresh, current_configs_state_dict, selected_model #
|
| 227 |
)
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
new_inner_column_for_model_change = gr.Column(children=dynamic_ui_children_list, elem_id="dynamic_ui_area_for_speakers")
|
| 231 |
-
|
| 232 |
-
# This will be the update for the dynamic_speaker_ui_wrapper
|
| 233 |
-
dynamic_ui_update_for_wrapper = gr.update(children=[new_inner_column_for_model_change])
|
| 234 |
|
| 235 |
except Exception as e:
|
| 236 |
print(f"Error in load_refresh_per_speaker_ui_core called from model_controls_visibility: {e}")
|
| 237 |
error_markdown = gr.Markdown(f"Error refreshing per-speaker UI: {e}")
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
dynamic_ui_update_for_wrapper = gr.update(children=[error_inner_column])
|
| 241 |
-
updated_state_dict = current_configs_state_dict
|
| 242 |
|
| 243 |
is_tts1_family = selected_model in ["tts-1", "tts-1-hd"]
|
| 244 |
is_gpt_mini_tts = selected_model == "gpt-4o-mini-tts"
|
|
@@ -246,12 +242,11 @@ def update_model_controls_visibility(selected_model: str, script_text_for_refres
|
|
| 246 |
return (
|
| 247 |
gr.update(visible=is_tts1_family, interactive=is_tts1_family),
|
| 248 |
gr.update(visible=is_gpt_mini_tts, interactive=is_gpt_mini_tts),
|
| 249 |
-
|
| 250 |
-
|
| 251 |
)
|
| 252 |
|
| 253 |
def update_speaker_config_method_visibility(method: str):
|
| 254 |
-
# ... (no change) ...
|
| 255 |
is_single = (method == "Single Voice (Global)")
|
| 256 |
is_detailed_per_speaker = (method == "Detailed Configuration (Per Speaker UI)")
|
| 257 |
|
|
|
|
| 1 |
+
# FILE: event_handlers.py
|
| 2 |
import gradio as gr
|
|
|
|
| 3 |
import os
|
| 4 |
import asyncio
|
| 5 |
import tempfile
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
def get_speakers_from_script(script_text: str):
|
|
|
|
| 21 |
if not script_text.strip():
|
| 22 |
return []
|
| 23 |
try:
|
|
|
|
| 33 |
return []
|
| 34 |
|
| 35 |
def handle_dynamic_input_change(new_value, current_configs_state_dict: dict, speaker_name: str, config_key: str, tts_model: str):
|
|
|
|
| 36 |
if current_configs_state_dict is None:
|
| 37 |
current_configs_state_dict = {}
|
| 38 |
if speaker_name not in current_configs_state_dict:
|
|
|
|
| 42 |
return current_configs_state_dict
|
| 43 |
|
| 44 |
|
| 45 |
+
# MODIFIED: Removed speaker_configs_state_component from signature
|
| 46 |
def load_refresh_per_speaker_ui_core(script_text: str, current_configs_state_dict: dict, tts_model: str):
|
| 47 |
"""
|
| 48 |
Core logic for generating per-speaker UI components.
|
| 49 |
+
Returns: (list_of_components_for_group, updated_state_dict)
|
| 50 |
"""
|
| 51 |
+
# --- START OF PHASE 1 DEBUGGING (returns list of components directly) ---
|
| 52 |
+
print(f"DEBUG: load_refresh_per_speaker_ui_core CALLED. TTS Model: {tts_model}. Script: '{script_text[:50]}...'")
|
| 53 |
+
# For now, we just return a list containing a single Markdown component.
|
| 54 |
+
# This list will become the children of the dynamic_speaker_ui_area (which is a gr.Group).
|
| 55 |
+
debug_markdown = gr.Markdown(f"## !! Dynamic Area Test Content !!\nModel: {tts_model}\nTimestamp: {gr.utils.now()}", ) # Added timestamp for visual refresh confirmation
|
| 56 |
+
|
| 57 |
+
# When implementing the full logic, this function should:
|
| 58 |
+
# 1. Parse script_text to get unique_speakers.
|
| 59 |
+
# 2. For each speaker, create a gr.Accordion with relevant controls (voice dropdown, speed slider OR vibe/instructions).
|
| 60 |
+
# 3. Populate these controls based on current_configs_state_dict or defaults.
|
| 61 |
+
# 4. Ensure these controls are set up to call handle_dynamic_input_change, updating speaker_configs_state.
|
| 62 |
+
# 5. Return the list of gr.Accordion components and the (potentially modified) current_configs_state_dict.
|
| 63 |
+
|
| 64 |
+
# For PHASE 1, we return a simple list and an empty (or unchanged) state.
|
| 65 |
+
return [debug_markdown], current_configs_state_dict # Return current_configs_state_dict as it might be used/updated later
|
| 66 |
|
| 67 |
async def handle_script_processing(
|
| 68 |
openai_api_key: str, async_openai_client, nsfw_api_url_template: str,
|
| 69 |
dialogue_script: str, tts_model: str, pause_ms: int,
|
|
|
|
| 70 |
speaker_config_method: str, global_voice_selection: str,
|
| 71 |
speaker_configs_state_dict: dict,
|
| 72 |
global_speed: float,
|
| 73 |
global_instructions: str,
|
| 74 |
progress=gr.Progress(track_tqdm=True)
|
| 75 |
):
|
|
|
|
| 76 |
if not openai_api_key or not async_openai_client:
|
| 77 |
return None, None, "Error: OpenAI API Key or client is not configured."
|
| 78 |
if not dialogue_script.strip():
|
|
|
|
| 129 |
line_instructions = custom_instr
|
| 130 |
elif vibe != "None" and vibe != "Custom...":
|
| 131 |
line_instructions = PREDEFINED_VIBES.get(vibe, "")
|
| 132 |
+
if not line_instructions and global_instructions and global_instructions.strip(): # Fallback to global if specific instructions are empty
|
| 133 |
line_instructions = global_instructions
|
| 134 |
+
elif not line_instructions: # Ensure it's None if truly empty
|
| 135 |
line_instructions = None
|
| 136 |
elif speaker_config_method in ["Random per Speaker", "A/B Round Robin"]:
|
| 137 |
line_voice = speaker_voice_map.get(speaker_name, safe_default_global_voice)
|
| 138 |
|
| 139 |
+
if tts_model not in ["tts-1", "tts-1-hd"]: # Ensure speed is only applied to tts-1 models
|
| 140 |
+
line_speed = 1.0 # Default speed for other models
|
| 141 |
|
| 142 |
out_fn = os.path.join(job_audio_path_prefix, f"line_{line_data['id']}_{speaker_name.replace(' ','_')}.mp3")
|
| 143 |
progress(i / len(parsed_lines), desc=f"Synthesizing: Line {i+1}/{len(parsed_lines)} ({speaker_name})")
|
|
|
|
| 182 |
zf.write(f_path, os.path.basename(f_path))
|
| 183 |
|
| 184 |
merged_fn = os.path.join(job_audio_path_prefix, "merged_dialogue.mp3")
|
| 185 |
+
files_to_actually_merge = [f for f in ordered_files_for_merge_and_zip if f] # Only non-None paths
|
| 186 |
merged_path = merge_mp3_files(files_to_actually_merge, merged_fn, pause_ms)
|
| 187 |
|
| 188 |
status = f"Successfully processed {len(valid_files_for_zip)} out of {len(parsed_lines)} lines. "
|
| 189 |
if len(valid_files_for_zip) < len(parsed_lines): status += "Some lines may have failed. "
|
| 190 |
if not merged_path and len(valid_files_for_zip) > 0: status += "Merging audio failed. "
|
| 191 |
+
elif not merged_path: status = "No audio to merge." # Or all failed
|
| 192 |
else: status += "Merged audio generated."
|
| 193 |
|
| 194 |
return (zip_fn if os.path.exists(zip_fn) else None,
|
|
|
|
| 196 |
status)
|
| 197 |
|
| 198 |
def handle_calculate_cost(dialogue_script: str, tts_model: str):
|
|
|
|
| 199 |
if not dialogue_script.strip(): return "Cost: $0.00 (Script is empty)"
|
| 200 |
try:
|
| 201 |
parsed, chars = parse_dialogue_script(dialogue_script)
|
|
|
|
| 204 |
return f"Estimated Cost for {len(parsed)} lines ({chars} chars): ${cost:.6f}"
|
| 205 |
except ValueError as e:
|
| 206 |
return f"Cost calculation error: {str(e)}"
|
| 207 |
+
except Exception as e: # Generic fallback
|
| 208 |
return f"An unexpected error occurred during cost calculation: {str(e)}"
|
| 209 |
|
| 210 |
+
# MODIFIED: Removed speaker_configs_state_comp from signature
|
|
|
|
| 211 |
def handle_load_refresh_button_click(script_text: str, current_configs_state_dict: dict, tts_model: str):
|
| 212 |
+
components_list_for_group, new_state_dict = load_refresh_per_speaker_ui_core(
|
| 213 |
+
script_text, current_configs_state_dict, tts_model # MODIFIED: Call without speaker_configs_state_comp
|
| 214 |
)
|
| 215 |
+
# Return gr.update for the dynamic_speaker_ui_area (which is a gr.Group),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
# and the raw state dict for the gr.State component.
|
| 217 |
+
return gr.update(children=components_list_for_group), new_state_dict
|
| 218 |
|
| 219 |
+
# MODIFIED: Removed speaker_configs_state_comp from signature
|
|
|
|
| 220 |
def update_model_controls_visibility(selected_model: str, script_text_for_refresh: str, current_configs_state_dict: dict):
|
| 221 |
"""Updates visibility of global controls and refreshes per-speaker UI when TTS model changes."""
|
| 222 |
+
print(f"Model changed to: {selected_model}. Refreshing dynamic UI and controls.")
|
| 223 |
+
dynamic_ui_update_for_group = None
|
| 224 |
+
updated_state_dict_for_return = current_configs_state_dict
|
| 225 |
+
|
| 226 |
try:
|
| 227 |
+
dynamic_ui_components_list, updated_state_dict = load_refresh_per_speaker_ui_core(
|
| 228 |
+
script_text_for_refresh, current_configs_state_dict, selected_model # MODIFIED: Call without speaker_configs_state_comp
|
| 229 |
)
|
| 230 |
+
dynamic_ui_update_for_group = gr.update(children=dynamic_ui_components_list)
|
| 231 |
+
updated_state_dict_for_return = updated_state_dict
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
except Exception as e:
|
| 234 |
print(f"Error in load_refresh_per_speaker_ui_core called from model_controls_visibility: {e}")
|
| 235 |
error_markdown = gr.Markdown(f"Error refreshing per-speaker UI: {e}")
|
| 236 |
+
dynamic_ui_update_for_group = gr.update(children=[error_markdown])
|
| 237 |
+
# Keep current_configs_state_dict as is on error
|
|
|
|
|
|
|
| 238 |
|
| 239 |
is_tts1_family = selected_model in ["tts-1", "tts-1-hd"]
|
| 240 |
is_gpt_mini_tts = selected_model == "gpt-4o-mini-tts"
|
|
|
|
| 242 |
return (
|
| 243 |
gr.update(visible=is_tts1_family, interactive=is_tts1_family),
|
| 244 |
gr.update(visible=is_gpt_mini_tts, interactive=is_gpt_mini_tts),
|
| 245 |
+
dynamic_ui_update_for_group,
|
| 246 |
+
updated_state_dict_for_return
|
| 247 |
)
|
| 248 |
|
| 249 |
def update_speaker_config_method_visibility(method: str):
|
|
|
|
| 250 |
is_single = (method == "Single Voice (Global)")
|
| 251 |
is_detailed_per_speaker = (method == "Detailed Configuration (Per Speaker UI)")
|
| 252 |
|
ui_layout.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from utils.openai_tts import OPENAI_VOICES as ALL_TTS_VOICES # Import directly for APP_AVAILABLE_VOICES
|
| 3 |
|
| 4 |
-
# --- UI Constants and Configuration
|
| 5 |
TTS_MODELS_AVAILABLE = ["tts-1", "tts-1-hd", "gpt-4o-mini-tts"]
|
| 6 |
MODEL_DEFAULT_ENV = "tts-1-hd" # Default if env var not set or invalid
|
| 7 |
|
|
@@ -42,7 +42,7 @@ def create_main_input_components(model_default_value):
|
|
| 42 |
"""Creates the main input components for script, model, pause, and global settings."""
|
| 43 |
with gr.Row():
|
| 44 |
with gr.Column(scale=2):
|
| 45 |
-
script_input = gr.TextArea(label="Dialogue Script", placeholder="[Speaker1] Hello world
|
| 46 |
with gr.Column(scale=1):
|
| 47 |
tts_model_dropdown = gr.Dropdown(TTS_MODELS_AVAILABLE, label="TTS Model", value=model_default_value)
|
| 48 |
pause_input = gr.Number(label="Pause Between Lines (ms)", value=500, minimum=0, maximum=5000, step=50)
|
|
@@ -80,23 +80,17 @@ def create_speaker_config_components():
|
|
| 80 |
with gr.Column(visible=(DEFAULT_SPEAKER_CONFIG_METHOD == "Detailed Configuration (Per Speaker UI)")) as detailed_per_speaker_ui_group:
|
| 81 |
load_per_speaker_ui_button = gr.Button("Load/Refresh Per-Speaker Settings UI (from Script Above)")
|
| 82 |
gr.Markdown("<small>Click button above to populate settings for each speaker found in the script. Settings are applied per-speaker. If script changes, click again to refresh.</small>")
|
| 83 |
-
|
| 84 |
-
#
|
| 85 |
-
|
| 86 |
-
dynamic_speaker_ui_wrapper = gr.Group(elem_id="dynamic_ui_wrapper_for_speakers")
|
| 87 |
-
with dynamic_speaker_ui_wrapper:
|
| 88 |
-
# The actual column is now a child of the wrapper.
|
| 89 |
-
# It can be initially empty or have a placeholder if desired.
|
| 90 |
-
dynamic_speaker_ui_area = gr.Column(elem_id="dynamic_ui_area_for_speakers")
|
| 91 |
|
| 92 |
-
# Ensure the new wrapper is returned correctly
|
| 93 |
return (
|
| 94 |
speaker_config_method_dropdown,
|
| 95 |
single_voice_group,
|
| 96 |
global_voice_dropdown,
|
| 97 |
detailed_per_speaker_ui_group,
|
| 98 |
load_per_speaker_ui_button,
|
| 99 |
-
|
| 100 |
)
|
| 101 |
|
| 102 |
def create_action_and_output_components():
|
|
@@ -112,11 +106,11 @@ def create_action_and_output_components():
|
|
| 112 |
status_output = gr.Textbox(label="Status", interactive=False, lines=2, max_lines=5)
|
| 113 |
return calculate_cost_button, generate_button, cost_output, individual_lines_zip_output, merged_dialogue_mp3_output, status_output
|
| 114 |
|
| 115 |
-
def create_examples_ui(inputs_for_examples, process_fn, outputs_for_examples=None):
|
| 116 |
"""Creates the examples section."""
|
| 117 |
gr.Markdown("## Example Scripts")
|
| 118 |
-
example_script_1 = "[Alice] Hello Bob, this is a test using the detailed configuration method
|
| 119 |
-
example_script_2 = "[Narrator] This is a short story
|
| 120 |
|
| 121 |
examples_data = [
|
| 122 |
[example_script_1, "tts-1-hd", 300, "Detailed Configuration (Per Speaker UI)", DEFAULT_GLOBAL_VOICE, {}, 1.0, ""],
|
|
@@ -136,23 +130,21 @@ def create_examples_ui(inputs_for_examples, process_fn, outputs_for_examples=Non
|
|
| 136 |
gr.Markdown("<p style='color: orange;'>No valid examples could be loaded due to configuration mismatch.</p>")
|
| 137 |
return None
|
| 138 |
|
| 139 |
-
# If process_fn and outputs_for_examples are provided, make examples runnable
|
| 140 |
if process_fn and outputs_for_examples:
|
| 141 |
return gr.Examples(
|
| 142 |
examples=valid_examples_data,
|
| 143 |
inputs=inputs_for_examples,
|
| 144 |
outputs=outputs_for_examples,
|
| 145 |
fn=process_fn,
|
| 146 |
-
cache_examples=False,
|
| 147 |
-
examples_per_page=5,
|
| 148 |
label="Example Scripts (Click to Load & Run)",
|
| 149 |
-
run_on_click=True
|
| 150 |
)
|
| 151 |
-
else:
|
| 152 |
return gr.Examples(
|
| 153 |
examples=valid_examples_data,
|
| 154 |
inputs=inputs_for_examples,
|
| 155 |
-
examples_per_page=5,
|
| 156 |
label="Example Scripts (Click to Load Inputs)",
|
| 157 |
-
# No fn, outputs, or run_on_click if process_fn is None
|
| 158 |
)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from utils.openai_tts import OPENAI_VOICES as ALL_TTS_VOICES # Import directly for APP_AVAILABLE_VOICES
|
| 3 |
|
| 4 |
+
# --- UI Constants and Configuration ---\
|
| 5 |
TTS_MODELS_AVAILABLE = ["tts-1", "tts-1-hd", "gpt-4o-mini-tts"]
|
| 6 |
MODEL_DEFAULT_ENV = "tts-1-hd" # Default if env var not set or invalid
|
| 7 |
|
|
|
|
| 42 |
"""Creates the main input components for script, model, pause, and global settings."""
|
| 43 |
with gr.Row():
|
| 44 |
with gr.Column(scale=2):
|
| 45 |
+
script_input = gr.TextArea(label="Dialogue Script", placeholder="[Speaker1] Hello world!\\n[Speaker2] How are you today?", lines=10)
|
| 46 |
with gr.Column(scale=1):
|
| 47 |
tts_model_dropdown = gr.Dropdown(TTS_MODELS_AVAILABLE, label="TTS Model", value=model_default_value)
|
| 48 |
pause_input = gr.Number(label="Pause Between Lines (ms)", value=500, minimum=0, maximum=5000, step=50)
|
|
|
|
| 80 |
with gr.Column(visible=(DEFAULT_SPEAKER_CONFIG_METHOD == "Detailed Configuration (Per Speaker UI)")) as detailed_per_speaker_ui_group:
|
| 81 |
load_per_speaker_ui_button = gr.Button("Load/Refresh Per-Speaker Settings UI (from Script Above)")
|
| 82 |
gr.Markdown("<small>Click button above to populate settings for each speaker found in the script. Settings are applied per-speaker. If script changes, click again to refresh.</small>")
|
| 83 |
+
# MODIFIED: dynamic_speaker_ui_area is now a gr.Group.
|
| 84 |
+
# Children will be added/updated dynamically to this group.
|
| 85 |
+
dynamic_speaker_ui_area = gr.Group(elem_id="dynamic_ui_area_for_speakers")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
|
|
|
| 87 |
return (
|
| 88 |
speaker_config_method_dropdown,
|
| 89 |
single_voice_group,
|
| 90 |
global_voice_dropdown,
|
| 91 |
detailed_per_speaker_ui_group,
|
| 92 |
load_per_speaker_ui_button,
|
| 93 |
+
dynamic_speaker_ui_area # This is now a gr.Group
|
| 94 |
)
|
| 95 |
|
| 96 |
def create_action_and_output_components():
|
|
|
|
| 106 |
status_output = gr.Textbox(label="Status", interactive=False, lines=2, max_lines=5)
|
| 107 |
return calculate_cost_button, generate_button, cost_output, individual_lines_zip_output, merged_dialogue_mp3_output, status_output
|
| 108 |
|
| 109 |
+
def create_examples_ui(inputs_for_examples, process_fn, outputs_for_examples=None):
|
| 110 |
"""Creates the examples section."""
|
| 111 |
gr.Markdown("## Example Scripts")
|
| 112 |
+
example_script_1 = "[Alice] Hello Bob, this is a test using the detailed configuration method.\\n[Bob] Hi Alice! I'm Bob, and I'll have my own voice settings.\\n[Alice] Let's see how this sounds."
|
| 113 |
+
example_script_2 = "[Narrator] This is a short story.\\n[CharacterA] Once upon a time...\\n[Narrator] ...there was a Gradio app.\\n[CharacterB] And it could talk!"
|
| 114 |
|
| 115 |
examples_data = [
|
| 116 |
[example_script_1, "tts-1-hd", 300, "Detailed Configuration (Per Speaker UI)", DEFAULT_GLOBAL_VOICE, {}, 1.0, ""],
|
|
|
|
| 130 |
gr.Markdown("<p style='color: orange;'>No valid examples could be loaded due to configuration mismatch.</p>")
|
| 131 |
return None
|
| 132 |
|
|
|
|
| 133 |
if process_fn and outputs_for_examples:
|
| 134 |
return gr.Examples(
|
| 135 |
examples=valid_examples_data,
|
| 136 |
inputs=inputs_for_examples,
|
| 137 |
outputs=outputs_for_examples,
|
| 138 |
fn=process_fn,
|
| 139 |
+
cache_examples=False,
|
| 140 |
+
examples_per_page=5,
|
| 141 |
label="Example Scripts (Click to Load & Run)",
|
| 142 |
+
run_on_click=True
|
| 143 |
)
|
| 144 |
+
else:
|
| 145 |
return gr.Examples(
|
| 146 |
examples=valid_examples_data,
|
| 147 |
inputs=inputs_for_examples,
|
| 148 |
+
examples_per_page=5,
|
| 149 |
label="Example Scripts (Click to Load Inputs)",
|
|
|
|
| 150 |
)
|