Spaces:

abocha
/

esl-dialogue-tts

Running

App Files Files Community

esl-dialogue-tts / app.py

abocha

hotfix

b7680b4 4 months ago

raw

history blame

6.26 kB

	import gradio as gr
	import os
	import asyncio
	from openai import AsyncOpenAI
	from functools import partial # For handle_script_processing

	# Import UI creation functions and constants
	from ui_layout import (
	create_main_input_components, create_speaker_config_components,
	create_action_and_output_components, create_examples_ui,
	TTS_MODELS_AVAILABLE, MODEL_DEFAULT_ENV
	)

	# Import event handler functions
	from event_handlers import (
	handle_script_processing, handle_calculate_cost,
	update_model_controls_visibility, update_speaker_config_method_visibility,
	load_refresh_per_speaker_ui
	)

	# --- Application Secrets and Global Client ---
	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	NSFW_API_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
	MODEL_DEFAULT_FROM_ENV = os.getenv("MODEL_DEFAULT", MODEL_DEFAULT_ENV)

	# Validate MODEL_DEFAULT_FROM_ENV or use hardcoded default
	EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_ENV if MODEL_DEFAULT_FROM_ENV in TTS_MODELS_AVAILABLE else MODEL_DEFAULT_ENV

	async_openai_client = None
	if not OPENAI_API_KEY:
	try:
	# Attempt to load from Hugging Face Hub secrets if not in env
	from huggingface_hub import HfApi
	api = HfApi()
	space_id = os.getenv("SPACE_ID") # Provided by HF Spaces
	if space_id:
	secrets = api.get_space_secrets(repo_id=space_id)
	OPENAI_API_KEY = secrets.get("OPENAI_API_KEY")
	NSFW_API_URL_TEMPLATE = secrets.get("NSFW_API_URL_TEMPLATE", NSFW_API_URL_TEMPLATE)
	MODEL_DEFAULT_FROM_HUB = secrets.get("MODEL_DEFAULT", EFFECTIVE_MODEL_DEFAULT)
	EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_HUB if MODEL_DEFAULT_FROM_HUB in TTS_MODELS_AVAILABLE else EFFECTIVE_MODEL_DEFAULT
	print("Loaded secrets from Hugging Face Hub.")
	except Exception as e:
	print(f"Could not retrieve secrets from Hugging Face Hub: {e}. OPENAI_API_KEY might be missing.")

	if OPENAI_API_KEY:
	async_openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY)
	else:
	print("CRITICAL ERROR: OPENAI_API_KEY secret is not set. The application will not function properly.")


	# --- Gradio Application UI and Logic ---
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# Dialogue Script to Speech (OpenAI TTS) - Refactored")
	if not OPENAI_API_KEY or not async_openai_client:
	gr.Markdown("<h3 style='color:red;'>⚠️ Warning: OPENAI_API_KEY not set or invalid. Audio generation will fail. Please configure it in your Space settings.</h3>")

	# Central state for detailed speaker configurations
	speaker_configs_state = gr.State({}) # This is crucial for dynamic UI

	# --- Define UI Components by calling layout functions ---
	(script_input, tts_model_dropdown, pause_input,
	global_speed_input, global_instructions_input) = create_main_input_components(EFFECTIVE_MODEL_DEFAULT)

	(speaker_config_method_dropdown, single_voice_group, global_voice_dropdown,
	detailed_per_speaker_ui_group, load_per_speaker_ui_button,
	dynamic_speaker_ui_area) = create_speaker_config_components()

	(calculate_cost_button, generate_button, cost_output,
	individual_lines_zip_output, merged_dialogue_mp3_output,
	status_output) = create_action_and_output_components()

	# --- Event Wiring ---

	# When TTS model changes, update visibility of global speed/instructions & refresh dynamic UI
	tts_model_dropdown.change(
	fn=update_model_controls_visibility,
	inputs=[tts_model_dropdown, script_input, speaker_configs_state, speaker_configs_state],
	outputs=[global_speed_input, global_instructions_input, dynamic_speaker_ui_area, speaker_configs_state]
	)

	# When speaker config method changes, update visibility of relevant UI groups
	speaker_config_method_dropdown.change(
	fn=update_speaker_config_method_visibility,
	inputs=[speaker_config_method_dropdown],
	outputs=[single_voice_group, detailed_per_speaker_ui_group]
	)

	# Button to load/refresh the detailed per-speaker UI configurations
	load_per_speaker_ui_button.click(
	fn=load_refresh_per_speaker_ui,
	inputs=[script_input, speaker_configs_state, tts_model_dropdown, speaker_configs_state],
	outputs=[dynamic_speaker_ui_area, speaker_configs_state]
	)

	# Calculate cost button
	calculate_cost_button.click(
	fn=handle_calculate_cost,
	inputs=[script_input, tts_model_dropdown],
	outputs=[cost_output]
	)

	# Generate audio button
	# Use functools.partial to pass fixed arguments like API key and client to the handler
	# Gradio inputs will be appended to these fixed arguments when the handler is called.
	generate_button_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
	generate_button.click(
	fn=generate_button_fn,
	inputs=[
	script_input, tts_model_dropdown, pause_input,
	speaker_config_method_dropdown, global_voice_dropdown,
	speaker_configs_state, # The gr.State object itself
	global_speed_input, global_instructions_input
	],
	outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
	)

	# --- Examples UI ---
	example_inputs_list = [
	script_input, tts_model_dropdown, pause_input,
	speaker_config_method_dropdown, global_voice_dropdown,
	speaker_configs_state,
	global_speed_input, global_instructions_input
	]

	example_outputs_list = [individual_lines_zip_output, merged_dialogue_mp3_output, status_output]

	# Make examples runnable
	example_process_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)

	_ = create_examples_ui(
	inputs_for_examples=example_inputs_list,
	process_fn=example_process_fn if OPENAI_API_KEY else None, # Only make runnable if API key exists
	outputs_for_examples=example_outputs_list if OPENAI_API_KEY else None
	)


	# --- Launch ---
	if __name__ == "__main__":
	if os.name == 'nt':
	asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
	demo.queue().launch(debug=True, share=False)