# FILE: app.py import gradio as gr import os import asyncio from openai import AsyncOpenAI from functools import partial import datetime from ui_layout import ( create_main_input_components, create_speaker_config_components, create_action_and_output_components, create_examples_ui, TTS_MODELS_AVAILABLE, MODEL_DEFAULT_ENV, APP_AVAILABLE_VOICES, DEFAULT_GLOBAL_VOICE, VIBE_CHOICES, DEFAULT_VIBE, PREDEFINED_VIBES ) from event_handlers import ( handle_script_processing, handle_calculate_cost, handle_speaker_config_method_visibility_change, handle_tts_model_change, handle_load_refresh_per_speaker_ui_trigger, handle_dynamic_accordion_input_change, # For controls inside @gr.render get_speakers_from_script ) OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") NSFW_API_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE") MODEL_DEFAULT_FROM_ENV = os.getenv("MODEL_DEFAULT", MODEL_DEFAULT_ENV) EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_ENV if MODEL_DEFAULT_FROM_ENV in TTS_MODELS_AVAILABLE else MODEL_DEFAULT_ENV async_openai_client = None if not OPENAI_API_KEY: try: from huggingface_hub import HfApi api = HfApi() space_id = os.getenv("SPACE_ID") if space_id: secrets = api.get_space_secrets(repo_id=space_id) OPENAI_API_KEY = secrets.get("OPENAI_API_KEY") NSFW_API_URL_TEMPLATE = secrets.get("NSFW_API_URL_TEMPLATE", NSFW_API_URL_TEMPLATE) MODEL_DEFAULT_FROM_HUB = secrets.get("MODEL_DEFAULT", EFFECTIVE_MODEL_DEFAULT) EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_HUB if MODEL_DEFAULT_FROM_HUB in TTS_MODELS_AVAILABLE else EFFECTIVE_MODEL_DEFAULT print("Loaded secrets from Hugging Face Hub.") except Exception as e: print(f"Could not retrieve secrets from Hugging Face Hub: {e}. OPENAI_API_KEY might be missing.") if OPENAI_API_KEY: async_openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY) else: print("CRITICAL ERROR: OPENAI_API_KEY secret is not set. The application will not function properly.") with gr.Blocks(theme=gr.themes.Soft(), elem_id="main_blocks_ui") as demo: gr.Markdown("# Dialogue Script to Speech (OpenAI TTS) - Using @gr.render") if not OPENAI_API_KEY or not async_openai_client: gr.Markdown("
Enter a script with speaker tags (e.g., `[SpeakerName] Text`) and click 'Load/Refresh' to see per-speaker settings.
") return for speaker_idx, speaker_name in enumerate(unique_speakers): # Ensure current_speaker_configs is a valid dictionary if not isinstance(current_speaker_configs, dict): current_speaker_configs = {} speaker_specific_config = current_speaker_configs.get(speaker_name, {}) accordion_elem_id = f"accordion_spk_{speaker_idx}_{speaker_name.replace(' ','_').lower()}" with gr.Accordion(f"Settings for Speaker: {speaker_name}", open=False, elem_id=accordion_elem_id): gr.Markdown(f"Configure voice for **{speaker_name}** using **{current_tts_model}** model.") default_voice = speaker_specific_config.get("voice", DEFAULT_GLOBAL_VOICE) voice_dd_elem_id = f"voice_dd_spk_{speaker_idx}" voice_dropdown = gr.Dropdown( APP_AVAILABLE_VOICES, value=default_voice, label="Voice", elem_id=voice_dd_elem_id ) voice_dropdown.change( fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="voice"), inputs=[voice_dropdown, speaker_configs_state], # Pass current state to handler outputs=[speaker_configs_state] ) if current_tts_model in ["tts-1", "tts-1-hd"]: default_speed = float(speaker_specific_config.get("speed", 1.0)) speed_slider_elem_id = f"speed_slider_spk_{speaker_idx}" speed_slider = gr.Slider( minimum=0.25, maximum=4.0, value=default_speed, step=0.05, label="Speed", elem_id=speed_slider_elem_id ) speed_slider.change( fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="speed"), inputs=[speed_slider, speaker_configs_state], outputs=[speaker_configs_state] ) elif current_tts_model == "gpt-4o-mini-tts": default_vibe = speaker_specific_config.get("vibe", DEFAULT_VIBE) vibe_dd_elem_id = f"vibe_dd_spk_{speaker_idx}" vibe_dropdown = gr.Dropdown( VIBE_CHOICES, value=default_vibe, label="Vibe/Emotion", elem_id=vibe_dd_elem_id ) default_custom_instructions = speaker_specific_config.get("custom_instructions", "") custom_instr_tb_elem_id = f"custom_instr_tb_spk_{speaker_idx}" custom_instructions_textbox = gr.Textbox( label="Custom Instructions", value=default_custom_instructions, placeholder="e.g., Speak with a slightly hesitant tone.", lines=2, visible=(default_vibe == "Custom..."), # Initial visibility elem_id=custom_instr_tb_elem_id ) vibe_dropdown.change( fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="vibe"), inputs=[vibe_dropdown, speaker_configs_state], outputs=[speaker_configs_state] ).then( # Chain another update for visibility fn=lambda vibe_val: gr.update(visible=(vibe_val == "Custom...")), inputs=[vibe_dropdown], outputs=[custom_instructions_textbox] ) custom_instructions_textbox.change( fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="custom_instructions"), inputs=[custom_instructions_textbox, speaker_configs_state], outputs=[speaker_configs_state] ) # --- Event Listeners for other UI elements --- tts_model_dropdown.change( fn=handle_tts_model_change, inputs=[tts_model_dropdown, speaker_configs_state], outputs=[global_speed_input, global_instructions_input, speaker_configs_state] ) speaker_config_method_dropdown.change( fn=handle_speaker_config_method_visibility_change, inputs=[speaker_config_method_dropdown], outputs=[single_voice_group, detailed_per_speaker_ui_group_container] ) load_per_speaker_ui_button.click( fn=handle_load_refresh_per_speaker_ui_trigger, inputs=[script_input, speaker_configs_state, tts_model_dropdown], outputs=[speaker_configs_state] ) calculate_cost_button.click( fn=handle_calculate_cost, inputs=[script_input, tts_model_dropdown], outputs=[cost_output] ) generate_button_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE) generate_button.click( fn=generate_button_fn, inputs=[ script_input, tts_model_dropdown, pause_input, speaker_config_method_dropdown, global_voice_dropdown, speaker_configs_state, global_speed_input, global_instructions_input ], outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output] ) example_inputs_list = [ script_input, tts_model_dropdown, pause_input, speaker_config_method_dropdown, global_voice_dropdown, speaker_configs_state, global_speed_input, global_instructions_input ] example_outputs_list = [individual_lines_zip_output, merged_dialogue_mp3_output, status_output] example_process_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE) _ = create_examples_ui( inputs_for_examples=example_inputs_list, process_fn=example_process_fn if OPENAI_API_KEY else None, outputs_for_examples=example_outputs_list if OPENAI_API_KEY else None ) if __name__ == "__main__": if os.name == 'nt': asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) demo.queue().launch(debug=True, share=False)