Spaces:
Running
Running
File size: 11,156 Bytes
e5a707f 1190db4 635a2fb 49a48a4 8468afb 66f012e 8468afb 0b876d2 49a48a4 66f012e 8468afb 92c9b3d 7633d98 49a48a4 8468afb 7633d98 66f012e d44dfc0 7633d98 92c9b3d 2e02a22 66f012e 1190db4 92c9b3d d44dfc0 0b876d2 8468afb d44dfc0 8468afb 49a48a4 92c9b3d d44dfc0 8468afb 5c85d81 7633d98 66f012e 92c9b3d 66f012e 0b876d2 92c9b3d 7633d98 0b876d2 b7680b4 0b876d2 66f012e 0b876d2 66f012e 8468afb 62d5317 66f012e b95b786 8ccb15b b95b786 b1347ef b95b786 8ccb15b b95b786 b1347ef b95b786 66f012e b1347ef 62d5317 66f012e 0b876d2 66f012e 0b876d2 66f012e 0b876d2 66f012e 0b876d2 66f012e b1347ef 66f012e 0b876d2 66f012e 0b876d2 b7680b4 7633d98 1190db4 49a48a4 d48101f b7680b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
# FILE: app.py
import gradio as gr
import os
import asyncio
from openai import AsyncOpenAI
from functools import partial
import datetime
# Remove create_examples_ui from ui_layout imports if it's not used elsewhere
from ui_layout import (
create_main_input_components, create_speaker_config_components,
create_action_and_output_components, # Removed create_examples_ui
TTS_MODELS_AVAILABLE, MODEL_DEFAULT_ENV, APP_AVAILABLE_VOICES,
DEFAULT_GLOBAL_VOICE, VIBE_CHOICES, DEFAULT_VIBE, PREDEFINED_VIBES
)
from event_handlers import (
handle_script_processing, handle_calculate_cost,
handle_speaker_config_method_visibility_change,
handle_tts_model_change,
handle_load_refresh_per_speaker_ui_trigger,
handle_dynamic_accordion_input_change,
get_speakers_from_script
)
# --- Secrets and Client Setup (Same as before) ---
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
NSFW_API_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
MODEL_DEFAULT_FROM_ENV = os.getenv("MODEL_DEFAULT", MODEL_DEFAULT_ENV)
EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_ENV if MODEL_DEFAULT_FROM_ENV in TTS_MODELS_AVAILABLE else MODEL_DEFAULT_ENV
async_openai_client = None
if not OPENAI_API_KEY:
# ... (secret loading logic) ...
pass
if OPENAI_API_KEY:
async_openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY)
else:
print("CRITICAL ERROR: OPENAI_API_KEY secret is not set.")
# --- Main Blocks UI Definition ---
with gr.Blocks(theme=gr.themes.Soft(), elem_id="main_blocks_ui") as demo:
gr.Markdown("# Dialogue Script to Speech (OpenAI TTS) 💪💪💪 TTS = Teachers Together Strong 💪💪💪")
if not OPENAI_API_KEY or not async_openai_client:
gr.Markdown("<h3 style='color:red;'>⚠️ Warning: OPENAI_API_KEY not set or invalid. Audio generation will fail.</h3>")
speaker_configs_state = gr.State({})
# --- Create Main UI Components ---
(script_input, tts_model_dropdown, pause_input,
global_speed_input, global_instructions_input) = create_main_input_components(EFFECTIVE_MODEL_DEFAULT)
(speaker_config_method_dropdown, single_voice_group, global_voice_dropdown,
detailed_per_speaker_ui_group_container,
load_per_speaker_ui_button) = create_speaker_config_components()
(calculate_cost_button, generate_button, cost_output,
individual_lines_zip_output, merged_dialogue_mp3_output,
status_output) = create_action_and_output_components()
# --- Dynamic UI (@gr.render) Definition (Same as before) ---
with detailed_per_speaker_ui_group_container:
@gr.render(
inputs=[script_input, speaker_configs_state, tts_model_dropdown],
triggers=[load_per_speaker_ui_button.click, tts_model_dropdown.change]
)
def render_dynamic_speaker_ui(current_script_text: str, current_speaker_configs: dict, current_tts_model: str):
# ... (Full @gr.render implementation from previous correct step) ...
print(f"DEBUG: @gr.render CALLED. Model: {current_tts_model}. Script: '{current_script_text[:30]}...'. State Keys: {list(current_speaker_configs.keys()) if isinstance(current_speaker_configs,dict) else 'Not a dict'}")
unique_speakers = get_speakers_from_script(current_script_text)
if not unique_speakers:
gr.Markdown("<p style='color: #888; margin-top:10px;'>Enter script & click 'Load/Refresh' for per-speaker settings.</p>")
return
for speaker_idx, speaker_name in enumerate(unique_speakers):
if not isinstance(current_speaker_configs, dict): current_speaker_configs = {}
speaker_specific_config = current_speaker_configs.get(speaker_name, {})
accordion_elem_id = f"accordion_spk_{speaker_idx}_{speaker_name.replace(' ','_').lower()}"
with gr.Accordion(f"Settings for Speaker: {speaker_name}", open=False, elem_id=accordion_elem_id):
gr.Markdown(f"Configure voice for **{speaker_name}** using **{current_tts_model}** model.")
default_voice = speaker_specific_config.get("voice", DEFAULT_GLOBAL_VOICE)
voice_dd_elem_id = f"voice_dd_spk_{speaker_idx}"
voice_dropdown = gr.Dropdown(APP_AVAILABLE_VOICES, value=default_voice, label="Voice", elem_id=voice_dd_elem_id)
voice_dropdown.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="voice"), inputs=[voice_dropdown, speaker_configs_state], outputs=[speaker_configs_state])
if current_tts_model in ["tts-1", "tts-1-hd"]:
default_speed = float(speaker_specific_config.get("speed", 1.0))
speed_slider_elem_id = f"speed_slider_spk_{speaker_idx}"
speed_slider = gr.Slider(minimum=0.25, maximum=4.0, value=default_speed, step=0.05, label="Speed", elem_id=speed_slider_elem_id)
speed_slider.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="speed"), inputs=[speed_slider, speaker_configs_state], outputs=[speaker_configs_state])
elif current_tts_model == "gpt-4o-mini-tts":
default_vibe = speaker_specific_config.get("vibe", DEFAULT_VIBE)
vibe_dd_elem_id = f"vibe_dd_spk_{speaker_idx}"
vibe_dropdown = gr.Dropdown(VIBE_CHOICES, value=default_vibe, label="Vibe/Emotion", elem_id=vibe_dd_elem_id)
default_custom_instructions = speaker_specific_config.get("custom_instructions", "")
custom_instr_tb_elem_id = f"custom_instr_tb_spk_{speaker_idx}"
custom_instructions_textbox = gr.Textbox(label="Custom Instructions", value=default_custom_instructions, placeholder="e.g., Speak slightly hesitant.", lines=2, visible=(default_vibe == "Custom..."), elem_id=custom_instr_tb_elem_id)
vibe_dropdown.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="vibe"), inputs=[vibe_dropdown, speaker_configs_state], outputs=[speaker_configs_state]).then(fn=lambda vibe_val: gr.update(visible=(vibe_val == "Custom...")), inputs=[vibe_dropdown], outputs=[custom_instructions_textbox])
custom_instructions_textbox.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="custom_instructions"), inputs=[custom_instructions_textbox, speaker_configs_state], outputs=[speaker_configs_state])
# --- Event Listeners (Same as before) ---
tts_model_dropdown.change(fn=handle_tts_model_change, inputs=[tts_model_dropdown, speaker_configs_state], outputs=[global_speed_input, global_instructions_input, speaker_configs_state])
speaker_config_method_dropdown.change(fn=handle_speaker_config_method_visibility_change, inputs=[speaker_config_method_dropdown], outputs=[single_voice_group, detailed_per_speaker_ui_group_container])
load_per_speaker_ui_button.click(fn=handle_load_refresh_per_speaker_ui_trigger, inputs=[script_input, speaker_configs_state, tts_model_dropdown], outputs=[speaker_configs_state])
calculate_cost_button.click(fn=handle_calculate_cost, inputs=[script_input, tts_model_dropdown], outputs=[cost_output])
generate_button_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
generate_button.click(fn=generate_button_fn, inputs=[script_input, tts_model_dropdown, pause_input, speaker_config_method_dropdown, global_voice_dropdown, speaker_configs_state, global_speed_input, global_instructions_input], outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output])
# --- Examples Section Definition (Moved here) ---
gr.Markdown("## Example Scripts") # Keep the header if desired
# Define the lists needed for Examples right here
example_inputs_list_comps = [
script_input, tts_model_dropdown, pause_input,
speaker_config_method_dropdown, global_voice_dropdown,
global_speed_input, global_instructions_input
]
example_outputs_list_comps = [individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
example_process_fn_actual = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE) if OPENAI_API_KEY else None
# Define the example data directly
examples_data = [
[
"""[Alice] Hello Bob, this is a test using the detailed configuration method.
[Bob] Hi Alice! I'm Bob, and I'll have my own voice settings.
[Alice] Let's see how this sounds.""",
"tts-1-hd",
300,
"Random per Speaker",
DEFAULT_GLOBAL_VOICE,
1.0,
""
],
[
"""[Narrator] Once upon a time, there was a gentle breeze over the hills.
[Narrator] The village below prepared for the annual festival as the sun set.""",
"gpt-4o-mini-tts",
200,
"Detailed Configuration (Per Speaker UI)",
DEFAULT_GLOBAL_VOICE,
1.0,
"Speak with a gentle, storytelling tone."
],
[
"""[Solo] This is a quick single‑voice demo for testing purposes.""",
"tts-1",
0,
"Single Voice (Global)",
"fable",
1.2,
""
],
]
# Validate example data length against input components length
num_inputs_expected = len(example_inputs_list_comps)
valid_examples_data_inline = []
for ex_data in examples_data:
if len(ex_data) == num_inputs_expected:
valid_examples_data_inline.append(ex_data)
else:
print(f"Warning (Inline Examples): Example data mismatch. Expected {num_inputs_expected}, got {len(ex_data)}. Skipping.")
# Directly instantiate gr.Examples if valid data exists
if valid_examples_data_inline:
if example_process_fn_actual:
gr.Examples(
examples=valid_examples_data_inline,
inputs=example_inputs_list_comps,
outputs=example_outputs_list_comps,
fn=example_process_fn_actual,
cache_examples=False,
examples_per_page=5,
label="Example Scripts (Click to Load)", # Label is optional if header exists
run_on_click=False
)
else:
gr.Examples(
examples=valid_examples_data_inline,
inputs=example_inputs_list_comps,
examples_per_page=5,
label="Example Scripts (Click to Load Inputs)", # Label is optional if header exists
)
else:
gr.Markdown("<p style='color: orange;'>No valid examples could be loaded due to configuration mismatch.</p>")
# --- Launch ---
if __name__ == "__main__":
if os.name == 'nt':
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
demo.queue().launch(debug=True, share=False) |