Spaces:
Running
Running
# FILE: app.py | |
import gradio as gr | |
import os | |
import asyncio | |
from openai import AsyncOpenAI | |
from functools import partial | |
import datetime | |
from ui_layout import ( | |
create_main_input_components, create_speaker_config_components, | |
create_action_and_output_components, create_examples_ui, | |
TTS_MODELS_AVAILABLE, MODEL_DEFAULT_ENV, APP_AVAILABLE_VOICES, | |
DEFAULT_GLOBAL_VOICE, VIBE_CHOICES, DEFAULT_VIBE, PREDEFINED_VIBES | |
) | |
from event_handlers import ( | |
handle_script_processing, handle_calculate_cost, | |
handle_speaker_config_method_visibility_change, | |
handle_tts_model_change, | |
handle_load_refresh_per_speaker_ui_trigger, | |
handle_dynamic_accordion_input_change, # For controls inside @gr.render | |
get_speakers_from_script | |
) | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
NSFW_API_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE") | |
MODEL_DEFAULT_FROM_ENV = os.getenv("MODEL_DEFAULT", MODEL_DEFAULT_ENV) | |
EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_ENV if MODEL_DEFAULT_FROM_ENV in TTS_MODELS_AVAILABLE else MODEL_DEFAULT_ENV | |
async_openai_client = None | |
if not OPENAI_API_KEY: | |
try: | |
from huggingface_hub import HfApi | |
api = HfApi() | |
space_id = os.getenv("SPACE_ID") | |
if space_id: | |
secrets = api.get_space_secrets(repo_id=space_id) | |
OPENAI_API_KEY = secrets.get("OPENAI_API_KEY") | |
NSFW_API_URL_TEMPLATE = secrets.get("NSFW_API_URL_TEMPLATE", NSFW_API_URL_TEMPLATE) | |
MODEL_DEFAULT_FROM_HUB = secrets.get("MODEL_DEFAULT", EFFECTIVE_MODEL_DEFAULT) | |
EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_HUB if MODEL_DEFAULT_FROM_HUB in TTS_MODELS_AVAILABLE else EFFECTIVE_MODEL_DEFAULT | |
print("Loaded secrets from Hugging Face Hub.") | |
except Exception as e: | |
print(f"Could not retrieve secrets from Hugging Face Hub: {e}. OPENAI_API_KEY might be missing.") | |
if OPENAI_API_KEY: | |
async_openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY) | |
else: | |
print("CRITICAL ERROR: OPENAI_API_KEY secret is not set. The application will not function properly.") | |
with gr.Blocks(theme=gr.themes.Soft(), elem_id="main_blocks_ui") as demo: | |
gr.Markdown("# Dialogue Script to Speech (OpenAI TTS) - Using @gr.render") | |
if not OPENAI_API_KEY or not async_openai_client: | |
gr.Markdown("<h3 style='color:red;'>⚠️ Warning: OPENAI_API_KEY not set or invalid. Audio generation will fail. Please configure it in your Space settings.</h3>") | |
speaker_configs_state = gr.State({}) | |
(script_input, tts_model_dropdown, pause_input, | |
global_speed_input, global_instructions_input) = create_main_input_components(EFFECTIVE_MODEL_DEFAULT) | |
(speaker_config_method_dropdown, single_voice_group, global_voice_dropdown, | |
detailed_per_speaker_ui_group_container, | |
load_per_speaker_ui_button) = create_speaker_config_components() | |
(calculate_cost_button, generate_button, cost_output, | |
individual_lines_zip_output, merged_dialogue_mp3_output, | |
status_output) = create_action_and_output_components() | |
with detailed_per_speaker_ui_group_container: # Define @gr.render inside this column | |
def render_dynamic_speaker_ui(current_script_text: str, current_speaker_configs: dict, current_tts_model: str): | |
"""Defines and re-renders the dynamic UI for per-speaker configuration.""" | |
print(f"DEBUG: @gr.render CALLED. Model: {current_tts_model}. Script: '{current_script_text[:30]}...'. State Keys: {list(current_speaker_configs.keys()) if isinstance(current_speaker_configs,dict) else 'Not a dict'}") | |
unique_speakers = get_speakers_from_script(current_script_text) | |
if not unique_speakers: | |
gr.Markdown("<p style='color: #888; margin-top:10px;'>Enter a script with speaker tags (e.g., `[SpeakerName] Text`) and click 'Load/Refresh' to see per-speaker settings.</p>") | |
return | |
for speaker_idx, speaker_name in enumerate(unique_speakers): | |
# Ensure current_speaker_configs is a valid dictionary | |
if not isinstance(current_speaker_configs, dict): | |
current_speaker_configs = {} | |
speaker_specific_config = current_speaker_configs.get(speaker_name, {}) | |
accordion_elem_id = f"accordion_spk_{speaker_idx}_{speaker_name.replace(' ','_').lower()}" | |
with gr.Accordion(f"Settings for Speaker: {speaker_name}", open=False, elem_id=accordion_elem_id): | |
gr.Markdown(f"Configure voice for **{speaker_name}** using **{current_tts_model}** model.") | |
default_voice = speaker_specific_config.get("voice", DEFAULT_GLOBAL_VOICE) | |
voice_dd_elem_id = f"voice_dd_spk_{speaker_idx}" | |
voice_dropdown = gr.Dropdown( | |
APP_AVAILABLE_VOICES, | |
value=default_voice, | |
label="Voice", | |
elem_id=voice_dd_elem_id | |
) | |
voice_dropdown.change( | |
fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="voice"), | |
inputs=[voice_dropdown, speaker_configs_state], # Pass current state to handler | |
outputs=[speaker_configs_state] | |
) | |
if current_tts_model in ["tts-1", "tts-1-hd"]: | |
default_speed = float(speaker_specific_config.get("speed", 1.0)) | |
speed_slider_elem_id = f"speed_slider_spk_{speaker_idx}" | |
speed_slider = gr.Slider( | |
minimum=0.25, maximum=4.0, value=default_speed, step=0.05, | |
label="Speed", elem_id=speed_slider_elem_id | |
) | |
speed_slider.change( | |
fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="speed"), | |
inputs=[speed_slider, speaker_configs_state], | |
outputs=[speaker_configs_state] | |
) | |
elif current_tts_model == "gpt-4o-mini-tts": | |
default_vibe = speaker_specific_config.get("vibe", DEFAULT_VIBE) | |
vibe_dd_elem_id = f"vibe_dd_spk_{speaker_idx}" | |
vibe_dropdown = gr.Dropdown( | |
VIBE_CHOICES, value=default_vibe, label="Vibe/Emotion", elem_id=vibe_dd_elem_id | |
) | |
default_custom_instructions = speaker_specific_config.get("custom_instructions", "") | |
custom_instr_tb_elem_id = f"custom_instr_tb_spk_{speaker_idx}" | |
custom_instructions_textbox = gr.Textbox( | |
label="Custom Instructions", | |
value=default_custom_instructions, | |
placeholder="e.g., Speak with a slightly hesitant tone.", | |
lines=2, | |
visible=(default_vibe == "Custom..."), # Initial visibility | |
elem_id=custom_instr_tb_elem_id | |
) | |
vibe_dropdown.change( | |
fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="vibe"), | |
inputs=[vibe_dropdown, speaker_configs_state], | |
outputs=[speaker_configs_state] | |
).then( # Chain another update for visibility | |
fn=lambda vibe_val: gr.update(visible=(vibe_val == "Custom...")), | |
inputs=[vibe_dropdown], | |
outputs=[custom_instructions_textbox] | |
) | |
custom_instructions_textbox.change( | |
fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="custom_instructions"), | |
inputs=[custom_instructions_textbox, speaker_configs_state], | |
outputs=[speaker_configs_state] | |
) | |
# --- Event Listeners for other UI elements --- | |
tts_model_dropdown.change( | |
fn=handle_tts_model_change, | |
inputs=[tts_model_dropdown, speaker_configs_state], | |
outputs=[global_speed_input, global_instructions_input, speaker_configs_state] | |
) | |
speaker_config_method_dropdown.change( | |
fn=handle_speaker_config_method_visibility_change, | |
inputs=[speaker_config_method_dropdown], | |
outputs=[single_voice_group, detailed_per_speaker_ui_group_container] | |
) | |
load_per_speaker_ui_button.click( | |
fn=handle_load_refresh_per_speaker_ui_trigger, | |
inputs=[script_input, speaker_configs_state, tts_model_dropdown], | |
outputs=[speaker_configs_state] | |
) | |
calculate_cost_button.click( | |
fn=handle_calculate_cost, | |
inputs=[script_input, tts_model_dropdown], | |
outputs=[cost_output] | |
) | |
generate_button_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE) | |
generate_button.click( | |
fn=generate_button_fn, | |
inputs=[ | |
script_input, tts_model_dropdown, pause_input, | |
speaker_config_method_dropdown, global_voice_dropdown, | |
speaker_configs_state, | |
global_speed_input, global_instructions_input | |
], | |
outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output] | |
) | |
example_inputs_list = [ | |
script_input, tts_model_dropdown, pause_input, | |
speaker_config_method_dropdown, global_voice_dropdown, | |
speaker_configs_state, | |
global_speed_input, global_instructions_input | |
] | |
example_outputs_list = [individual_lines_zip_output, merged_dialogue_mp3_output, status_output] | |
example_process_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE) | |
_ = create_examples_ui( | |
inputs_for_examples=example_inputs_list, | |
process_fn=example_process_fn if OPENAI_API_KEY else None, | |
outputs_for_examples=example_outputs_list if OPENAI_API_KEY else None | |
) | |
if __name__ == "__main__": | |
if os.name == 'nt': | |
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) | |
demo.queue().launch(debug=True, share=False) |