abocha's picture
Update app.py
2e02a22 verified
# FILE: app.py
import gradio as gr
import os
import asyncio
from openai import AsyncOpenAI
from functools import partial
import datetime
# Remove create_examples_ui from ui_layout imports if it's not used elsewhere
from ui_layout import (
create_main_input_components, create_speaker_config_components,
create_action_and_output_components, # Removed create_examples_ui
TTS_MODELS_AVAILABLE, MODEL_DEFAULT_ENV, APP_AVAILABLE_VOICES,
DEFAULT_GLOBAL_VOICE, VIBE_CHOICES, DEFAULT_VIBE, PREDEFINED_VIBES
)
from event_handlers import (
handle_script_processing, handle_calculate_cost,
handle_speaker_config_method_visibility_change,
handle_tts_model_change,
handle_load_refresh_per_speaker_ui_trigger,
handle_dynamic_accordion_input_change,
get_speakers_from_script
)
# --- Secrets and Client Setup (Same as before) ---
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
NSFW_API_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
MODEL_DEFAULT_FROM_ENV = os.getenv("MODEL_DEFAULT", MODEL_DEFAULT_ENV)
EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_ENV if MODEL_DEFAULT_FROM_ENV in TTS_MODELS_AVAILABLE else MODEL_DEFAULT_ENV
async_openai_client = None
if not OPENAI_API_KEY:
# ... (secret loading logic) ...
pass
if OPENAI_API_KEY:
async_openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY)
else:
print("CRITICAL ERROR: OPENAI_API_KEY secret is not set.")
# --- Main Blocks UI Definition ---
with gr.Blocks(theme=gr.themes.Soft(), elem_id="main_blocks_ui") as demo:
gr.Markdown("# Dialogue Script to Speech (OpenAI TTS) 💪💪💪 TTS = Teachers Together Strong 💪💪💪")
if not OPENAI_API_KEY or not async_openai_client:
gr.Markdown("<h3 style='color:red;'>⚠️ Warning: OPENAI_API_KEY not set or invalid. Audio generation will fail.</h3>")
speaker_configs_state = gr.State({})
# --- Create Main UI Components ---
(script_input, tts_model_dropdown, pause_input,
global_speed_input, global_instructions_input) = create_main_input_components(EFFECTIVE_MODEL_DEFAULT)
(speaker_config_method_dropdown, single_voice_group, global_voice_dropdown,
detailed_per_speaker_ui_group_container,
load_per_speaker_ui_button) = create_speaker_config_components()
(calculate_cost_button, generate_button, cost_output,
individual_lines_zip_output, merged_dialogue_mp3_output,
status_output) = create_action_and_output_components()
# --- Dynamic UI (@gr.render) Definition (Same as before) ---
with detailed_per_speaker_ui_group_container:
@gr.render(
inputs=[script_input, speaker_configs_state, tts_model_dropdown],
triggers=[load_per_speaker_ui_button.click, tts_model_dropdown.change]
)
def render_dynamic_speaker_ui(current_script_text: str, current_speaker_configs: dict, current_tts_model: str):
# ... (Full @gr.render implementation from previous correct step) ...
print(f"DEBUG: @gr.render CALLED. Model: {current_tts_model}. Script: '{current_script_text[:30]}...'. State Keys: {list(current_speaker_configs.keys()) if isinstance(current_speaker_configs,dict) else 'Not a dict'}")
unique_speakers = get_speakers_from_script(current_script_text)
if not unique_speakers:
gr.Markdown("<p style='color: #888; margin-top:10px;'>Enter script & click 'Load/Refresh' for per-speaker settings.</p>")
return
for speaker_idx, speaker_name in enumerate(unique_speakers):
if not isinstance(current_speaker_configs, dict): current_speaker_configs = {}
speaker_specific_config = current_speaker_configs.get(speaker_name, {})
accordion_elem_id = f"accordion_spk_{speaker_idx}_{speaker_name.replace(' ','_').lower()}"
with gr.Accordion(f"Settings for Speaker: {speaker_name}", open=False, elem_id=accordion_elem_id):
gr.Markdown(f"Configure voice for **{speaker_name}** using **{current_tts_model}** model.")
default_voice = speaker_specific_config.get("voice", DEFAULT_GLOBAL_VOICE)
voice_dd_elem_id = f"voice_dd_spk_{speaker_idx}"
voice_dropdown = gr.Dropdown(APP_AVAILABLE_VOICES, value=default_voice, label="Voice", elem_id=voice_dd_elem_id)
voice_dropdown.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="voice"), inputs=[voice_dropdown, speaker_configs_state], outputs=[speaker_configs_state])
if current_tts_model in ["tts-1", "tts-1-hd"]:
default_speed = float(speaker_specific_config.get("speed", 1.0))
speed_slider_elem_id = f"speed_slider_spk_{speaker_idx}"
speed_slider = gr.Slider(minimum=0.25, maximum=4.0, value=default_speed, step=0.05, label="Speed", elem_id=speed_slider_elem_id)
speed_slider.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="speed"), inputs=[speed_slider, speaker_configs_state], outputs=[speaker_configs_state])
elif current_tts_model == "gpt-4o-mini-tts":
default_vibe = speaker_specific_config.get("vibe", DEFAULT_VIBE)
vibe_dd_elem_id = f"vibe_dd_spk_{speaker_idx}"
vibe_dropdown = gr.Dropdown(VIBE_CHOICES, value=default_vibe, label="Vibe/Emotion", elem_id=vibe_dd_elem_id)
default_custom_instructions = speaker_specific_config.get("custom_instructions", "")
custom_instr_tb_elem_id = f"custom_instr_tb_spk_{speaker_idx}"
custom_instructions_textbox = gr.Textbox(label="Custom Instructions", value=default_custom_instructions, placeholder="e.g., Speak slightly hesitant.", lines=2, visible=(default_vibe == "Custom..."), elem_id=custom_instr_tb_elem_id)
vibe_dropdown.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="vibe"), inputs=[vibe_dropdown, speaker_configs_state], outputs=[speaker_configs_state]).then(fn=lambda vibe_val: gr.update(visible=(vibe_val == "Custom...")), inputs=[vibe_dropdown], outputs=[custom_instructions_textbox])
custom_instructions_textbox.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="custom_instructions"), inputs=[custom_instructions_textbox, speaker_configs_state], outputs=[speaker_configs_state])
# --- Event Listeners (Same as before) ---
tts_model_dropdown.change(fn=handle_tts_model_change, inputs=[tts_model_dropdown, speaker_configs_state], outputs=[global_speed_input, global_instructions_input, speaker_configs_state])
speaker_config_method_dropdown.change(fn=handle_speaker_config_method_visibility_change, inputs=[speaker_config_method_dropdown], outputs=[single_voice_group, detailed_per_speaker_ui_group_container])
load_per_speaker_ui_button.click(fn=handle_load_refresh_per_speaker_ui_trigger, inputs=[script_input, speaker_configs_state, tts_model_dropdown], outputs=[speaker_configs_state])
calculate_cost_button.click(fn=handle_calculate_cost, inputs=[script_input, tts_model_dropdown], outputs=[cost_output])
generate_button_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
generate_button.click(fn=generate_button_fn, inputs=[script_input, tts_model_dropdown, pause_input, speaker_config_method_dropdown, global_voice_dropdown, speaker_configs_state, global_speed_input, global_instructions_input], outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output])
# --- Examples Section Definition (Moved here) ---
gr.Markdown("## Example Scripts") # Keep the header if desired
# Define the lists needed for Examples right here
example_inputs_list_comps = [
script_input, tts_model_dropdown, pause_input,
speaker_config_method_dropdown, global_voice_dropdown,
global_speed_input, global_instructions_input
]
example_outputs_list_comps = [individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
example_process_fn_actual = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE) if OPENAI_API_KEY else None
# Define the example data directly
examples_data = [
[
"""[Alice] Hello Bob, this is a test using the detailed configuration method.
[Bob] Hi Alice! I'm Bob, and I'll have my own voice settings.
[Alice] Let's see how this sounds.""",
"tts-1-hd",
300,
"Random per Speaker",
DEFAULT_GLOBAL_VOICE,
1.0,
""
],
[
"""[Narrator] Once upon a time, there was a gentle breeze over the hills.
[Narrator] The village below prepared for the annual festival as the sun set.""",
"gpt-4o-mini-tts",
200,
"Detailed Configuration (Per Speaker UI)",
DEFAULT_GLOBAL_VOICE,
1.0,
"Speak with a gentle, storytelling tone."
],
[
"""[Solo] This is a quick single‑voice demo for testing purposes.""",
"tts-1",
0,
"Single Voice (Global)",
"fable",
1.2,
""
],
]
# Validate example data length against input components length
num_inputs_expected = len(example_inputs_list_comps)
valid_examples_data_inline = []
for ex_data in examples_data:
if len(ex_data) == num_inputs_expected:
valid_examples_data_inline.append(ex_data)
else:
print(f"Warning (Inline Examples): Example data mismatch. Expected {num_inputs_expected}, got {len(ex_data)}. Skipping.")
# Directly instantiate gr.Examples if valid data exists
if valid_examples_data_inline:
if example_process_fn_actual:
gr.Examples(
examples=valid_examples_data_inline,
inputs=example_inputs_list_comps,
outputs=example_outputs_list_comps,
fn=example_process_fn_actual,
cache_examples=False,
examples_per_page=5,
label="Example Scripts (Click to Load)", # Label is optional if header exists
run_on_click=False
)
else:
gr.Examples(
examples=valid_examples_data_inline,
inputs=example_inputs_list_comps,
examples_per_page=5,
label="Example Scripts (Click to Load Inputs)", # Label is optional if header exists
)
else:
gr.Markdown("<p style='color: orange;'>No valid examples could be loaded due to configuration mismatch.</p>")
# --- Launch ---
if __name__ == "__main__":
if os.name == 'nt':
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
demo.queue().launch(debug=True, share=False)