Spaces:
Running
Running
File size: 10,785 Bytes
e5a707f 1190db4 635a2fb 49a48a4 8468afb 49a48a4 8468afb 92c9b3d 49a48a4 8468afb 1190db4 5c85d81 8468afb e5a707f 1190db4 635a2fb 1190db4 8468afb 1190db4 8468afb 1190db4 8468afb 1190db4 d44dfc0 92c9b3d 1190db4 8468afb 1190db4 92c9b3d d44dfc0 8468afb d44dfc0 8468afb 49a48a4 92c9b3d d44dfc0 8468afb 5c85d81 49a48a4 92c9b3d 49a48a4 92c9b3d 49a48a4 92c9b3d 59d4d10 92c9b3d 59d4d10 5c85d81 59d4d10 92c9b3d 8468afb 92c9b3d 59d4d10 a2f0e99 d44dfc0 92c9b3d e73ce82 92c9b3d a2f0e99 5c85d81 8468afb d44dfc0 b7680b4 1190db4 b7680b4 d44dfc0 8468afb 635a2fb d44dfc0 8468afb d48101f 1190db4 8468afb b7680b4 8468afb b7680b4 8468afb b7680b4 635a2fb b7680b4 1190db4 49a48a4 d48101f b7680b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
# FILE: app.py
import gradio as gr
import os
import asyncio
from openai import AsyncOpenAI
from functools import partial
import datetime
from ui_layout import (
create_main_input_components, create_speaker_config_components,
create_action_and_output_components, create_examples_ui,
TTS_MODELS_AVAILABLE, MODEL_DEFAULT_ENV, APP_AVAILABLE_VOICES,
DEFAULT_GLOBAL_VOICE, VIBE_CHOICES, DEFAULT_VIBE, PREDEFINED_VIBES
)
from event_handlers import (
handle_script_processing, handle_calculate_cost,
handle_speaker_config_method_visibility_change,
handle_tts_model_change,
handle_load_refresh_per_speaker_ui_trigger,
handle_dynamic_accordion_input_change, # For controls inside @gr.render
get_speakers_from_script
)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
NSFW_API_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
MODEL_DEFAULT_FROM_ENV = os.getenv("MODEL_DEFAULT", MODEL_DEFAULT_ENV)
EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_ENV if MODEL_DEFAULT_FROM_ENV in TTS_MODELS_AVAILABLE else MODEL_DEFAULT_ENV
async_openai_client = None
if not OPENAI_API_KEY:
try:
from huggingface_hub import HfApi
api = HfApi()
space_id = os.getenv("SPACE_ID")
if space_id:
secrets = api.get_space_secrets(repo_id=space_id)
OPENAI_API_KEY = secrets.get("OPENAI_API_KEY")
NSFW_API_URL_TEMPLATE = secrets.get("NSFW_API_URL_TEMPLATE", NSFW_API_URL_TEMPLATE)
MODEL_DEFAULT_FROM_HUB = secrets.get("MODEL_DEFAULT", EFFECTIVE_MODEL_DEFAULT)
EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_HUB if MODEL_DEFAULT_FROM_HUB in TTS_MODELS_AVAILABLE else EFFECTIVE_MODEL_DEFAULT
print("Loaded secrets from Hugging Face Hub.")
except Exception as e:
print(f"Could not retrieve secrets from Hugging Face Hub: {e}. OPENAI_API_KEY might be missing.")
if OPENAI_API_KEY:
async_openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY)
else:
print("CRITICAL ERROR: OPENAI_API_KEY secret is not set. The application will not function properly.")
with gr.Blocks(theme=gr.themes.Soft(), elem_id="main_blocks_ui") as demo:
gr.Markdown("# Dialogue Script to Speech (OpenAI TTS) - Using @gr.render")
if not OPENAI_API_KEY or not async_openai_client:
gr.Markdown("<h3 style='color:red;'>⚠️ Warning: OPENAI_API_KEY not set or invalid. Audio generation will fail. Please configure it in your Space settings.</h3>")
speaker_configs_state = gr.State({})
(script_input, tts_model_dropdown, pause_input,
global_speed_input, global_instructions_input) = create_main_input_components(EFFECTIVE_MODEL_DEFAULT)
(speaker_config_method_dropdown, single_voice_group, global_voice_dropdown,
detailed_per_speaker_ui_group_container,
load_per_speaker_ui_button) = create_speaker_config_components()
(calculate_cost_button, generate_button, cost_output,
individual_lines_zip_output, merged_dialogue_mp3_output,
status_output) = create_action_and_output_components()
with detailed_per_speaker_ui_group_container: # Define @gr.render inside this column
@gr.render(
inputs=[script_input, speaker_configs_state, tts_model_dropdown],
triggers=[load_per_speaker_ui_button.click, tts_model_dropdown.change]
)
def render_dynamic_speaker_ui(current_script_text: str, current_speaker_configs: dict, current_tts_model: str):
"""Defines and re-renders the dynamic UI for per-speaker configuration."""
print(f"DEBUG: @gr.render CALLED. Model: {current_tts_model}. Script: '{current_script_text[:30]}...'. State Keys: {list(current_speaker_configs.keys()) if isinstance(current_speaker_configs,dict) else 'Not a dict'}")
unique_speakers = get_speakers_from_script(current_script_text)
if not unique_speakers:
gr.Markdown("<p style='color: #888; margin-top:10px;'>Enter a script with speaker tags (e.g., `[SpeakerName] Text`) and click 'Load/Refresh' to see per-speaker settings.</p>")
return
for speaker_idx, speaker_name in enumerate(unique_speakers):
# Ensure current_speaker_configs is a valid dictionary
if not isinstance(current_speaker_configs, dict):
current_speaker_configs = {}
speaker_specific_config = current_speaker_configs.get(speaker_name, {})
accordion_elem_id = f"accordion_spk_{speaker_idx}_{speaker_name.replace(' ','_').lower()}"
with gr.Accordion(f"Settings for Speaker: {speaker_name}", open=False, elem_id=accordion_elem_id):
gr.Markdown(f"Configure voice for **{speaker_name}** using **{current_tts_model}** model.")
default_voice = speaker_specific_config.get("voice", DEFAULT_GLOBAL_VOICE)
voice_dd_elem_id = f"voice_dd_spk_{speaker_idx}"
voice_dropdown = gr.Dropdown(
APP_AVAILABLE_VOICES,
value=default_voice,
label="Voice",
elem_id=voice_dd_elem_id
)
voice_dropdown.change(
fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="voice"),
inputs=[voice_dropdown, speaker_configs_state], # Pass current state to handler
outputs=[speaker_configs_state]
)
if current_tts_model in ["tts-1", "tts-1-hd"]:
default_speed = float(speaker_specific_config.get("speed", 1.0))
speed_slider_elem_id = f"speed_slider_spk_{speaker_idx}"
speed_slider = gr.Slider(
minimum=0.25, maximum=4.0, value=default_speed, step=0.05,
label="Speed", elem_id=speed_slider_elem_id
)
speed_slider.change(
fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="speed"),
inputs=[speed_slider, speaker_configs_state],
outputs=[speaker_configs_state]
)
elif current_tts_model == "gpt-4o-mini-tts":
default_vibe = speaker_specific_config.get("vibe", DEFAULT_VIBE)
vibe_dd_elem_id = f"vibe_dd_spk_{speaker_idx}"
vibe_dropdown = gr.Dropdown(
VIBE_CHOICES, value=default_vibe, label="Vibe/Emotion", elem_id=vibe_dd_elem_id
)
default_custom_instructions = speaker_specific_config.get("custom_instructions", "")
custom_instr_tb_elem_id = f"custom_instr_tb_spk_{speaker_idx}"
custom_instructions_textbox = gr.Textbox(
label="Custom Instructions",
value=default_custom_instructions,
placeholder="e.g., Speak with a slightly hesitant tone.",
lines=2,
visible=(default_vibe == "Custom..."), # Initial visibility
elem_id=custom_instr_tb_elem_id
)
vibe_dropdown.change(
fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="vibe"),
inputs=[vibe_dropdown, speaker_configs_state],
outputs=[speaker_configs_state]
).then( # Chain another update for visibility
fn=lambda vibe_val: gr.update(visible=(vibe_val == "Custom...")),
inputs=[vibe_dropdown],
outputs=[custom_instructions_textbox]
)
custom_instructions_textbox.change(
fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="custom_instructions"),
inputs=[custom_instructions_textbox, speaker_configs_state],
outputs=[speaker_configs_state]
)
# --- Event Listeners for other UI elements ---
tts_model_dropdown.change(
fn=handle_tts_model_change,
inputs=[tts_model_dropdown, speaker_configs_state],
outputs=[global_speed_input, global_instructions_input, speaker_configs_state]
)
speaker_config_method_dropdown.change(
fn=handle_speaker_config_method_visibility_change,
inputs=[speaker_config_method_dropdown],
outputs=[single_voice_group, detailed_per_speaker_ui_group_container]
)
load_per_speaker_ui_button.click(
fn=handle_load_refresh_per_speaker_ui_trigger,
inputs=[script_input, speaker_configs_state, tts_model_dropdown],
outputs=[speaker_configs_state]
)
calculate_cost_button.click(
fn=handle_calculate_cost,
inputs=[script_input, tts_model_dropdown],
outputs=[cost_output]
)
generate_button_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
generate_button.click(
fn=generate_button_fn,
inputs=[
script_input, tts_model_dropdown, pause_input,
speaker_config_method_dropdown, global_voice_dropdown,
speaker_configs_state,
global_speed_input, global_instructions_input
],
outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
)
example_inputs_list = [
script_input, tts_model_dropdown, pause_input,
speaker_config_method_dropdown, global_voice_dropdown,
speaker_configs_state,
global_speed_input, global_instructions_input
]
example_outputs_list = [individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
example_process_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
_ = create_examples_ui(
inputs_for_examples=example_inputs_list,
process_fn=example_process_fn if OPENAI_API_KEY else None,
outputs_for_examples=example_outputs_list if OPENAI_API_KEY else None
)
if __name__ == "__main__":
if os.name == 'nt':
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
demo.queue().launch(debug=True, share=False) |