abocha's picture
hotfix
b7680b4
raw
history blame
6.26 kB
import gradio as gr
import os
import asyncio
from openai import AsyncOpenAI
from functools import partial # For handle_script_processing
# Import UI creation functions and constants
from ui_layout import (
create_main_input_components, create_speaker_config_components,
create_action_and_output_components, create_examples_ui,
TTS_MODELS_AVAILABLE, MODEL_DEFAULT_ENV
)
# Import event handler functions
from event_handlers import (
handle_script_processing, handle_calculate_cost,
update_model_controls_visibility, update_speaker_config_method_visibility,
load_refresh_per_speaker_ui
)
# --- Application Secrets and Global Client ---
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
NSFW_API_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
MODEL_DEFAULT_FROM_ENV = os.getenv("MODEL_DEFAULT", MODEL_DEFAULT_ENV)
# Validate MODEL_DEFAULT_FROM_ENV or use hardcoded default
EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_ENV if MODEL_DEFAULT_FROM_ENV in TTS_MODELS_AVAILABLE else MODEL_DEFAULT_ENV
async_openai_client = None
if not OPENAI_API_KEY:
try:
# Attempt to load from Hugging Face Hub secrets if not in env
from huggingface_hub import HfApi
api = HfApi()
space_id = os.getenv("SPACE_ID") # Provided by HF Spaces
if space_id:
secrets = api.get_space_secrets(repo_id=space_id)
OPENAI_API_KEY = secrets.get("OPENAI_API_KEY")
NSFW_API_URL_TEMPLATE = secrets.get("NSFW_API_URL_TEMPLATE", NSFW_API_URL_TEMPLATE)
MODEL_DEFAULT_FROM_HUB = secrets.get("MODEL_DEFAULT", EFFECTIVE_MODEL_DEFAULT)
EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_HUB if MODEL_DEFAULT_FROM_HUB in TTS_MODELS_AVAILABLE else EFFECTIVE_MODEL_DEFAULT
print("Loaded secrets from Hugging Face Hub.")
except Exception as e:
print(f"Could not retrieve secrets from Hugging Face Hub: {e}. OPENAI_API_KEY might be missing.")
if OPENAI_API_KEY:
async_openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY)
else:
print("CRITICAL ERROR: OPENAI_API_KEY secret is not set. The application will not function properly.")
# --- Gradio Application UI and Logic ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Dialogue Script to Speech (OpenAI TTS) - Refactored")
if not OPENAI_API_KEY or not async_openai_client:
gr.Markdown("<h3 style='color:red;'>⚠️ Warning: OPENAI_API_KEY not set or invalid. Audio generation will fail. Please configure it in your Space settings.</h3>")
# Central state for detailed speaker configurations
speaker_configs_state = gr.State({}) # This is crucial for dynamic UI
# --- Define UI Components by calling layout functions ---
(script_input, tts_model_dropdown, pause_input,
global_speed_input, global_instructions_input) = create_main_input_components(EFFECTIVE_MODEL_DEFAULT)
(speaker_config_method_dropdown, single_voice_group, global_voice_dropdown,
detailed_per_speaker_ui_group, load_per_speaker_ui_button,
dynamic_speaker_ui_area) = create_speaker_config_components()
(calculate_cost_button, generate_button, cost_output,
individual_lines_zip_output, merged_dialogue_mp3_output,
status_output) = create_action_and_output_components()
# --- Event Wiring ---
# When TTS model changes, update visibility of global speed/instructions & refresh dynamic UI
tts_model_dropdown.change(
fn=update_model_controls_visibility,
inputs=[tts_model_dropdown, script_input, speaker_configs_state, speaker_configs_state],
outputs=[global_speed_input, global_instructions_input, dynamic_speaker_ui_area, speaker_configs_state]
)
# When speaker config method changes, update visibility of relevant UI groups
speaker_config_method_dropdown.change(
fn=update_speaker_config_method_visibility,
inputs=[speaker_config_method_dropdown],
outputs=[single_voice_group, detailed_per_speaker_ui_group]
)
# Button to load/refresh the detailed per-speaker UI configurations
load_per_speaker_ui_button.click(
fn=load_refresh_per_speaker_ui,
inputs=[script_input, speaker_configs_state, tts_model_dropdown, speaker_configs_state],
outputs=[dynamic_speaker_ui_area, speaker_configs_state]
)
# Calculate cost button
calculate_cost_button.click(
fn=handle_calculate_cost,
inputs=[script_input, tts_model_dropdown],
outputs=[cost_output]
)
# Generate audio button
# Use functools.partial to pass fixed arguments like API key and client to the handler
# Gradio inputs will be appended to these fixed arguments when the handler is called.
generate_button_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
generate_button.click(
fn=generate_button_fn,
inputs=[
script_input, tts_model_dropdown, pause_input,
speaker_config_method_dropdown, global_voice_dropdown,
speaker_configs_state, # The gr.State object itself
global_speed_input, global_instructions_input
],
outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
)
# --- Examples UI ---
example_inputs_list = [
script_input, tts_model_dropdown, pause_input,
speaker_config_method_dropdown, global_voice_dropdown,
speaker_configs_state,
global_speed_input, global_instructions_input
]
example_outputs_list = [individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
# Make examples runnable
example_process_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
_ = create_examples_ui(
inputs_for_examples=example_inputs_list,
process_fn=example_process_fn if OPENAI_API_KEY else None, # Only make runnable if API key exists
outputs_for_examples=example_outputs_list if OPENAI_API_KEY else None
)
# --- Launch ---
if __name__ == "__main__":
if os.name == 'nt':
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
demo.queue().launch(debug=True, share=False)