Spaces:
Running
Running
File size: 8,461 Bytes
92c9b3d 8468afb 92c9b3d 8468afb 92c9b3d 8468afb 49a48a4 92c9b3d 8468afb 92c9b3d 8468afb 49a48a4 8468afb 49a48a4 92c9b3d 49a48a4 92c9b3d 8468afb 92c9b3d 8468afb 92c9b3d 8468afb 92c9b3d 8468afb 92c9b3d 8468afb 49a48a4 8468afb 92c9b3d 8468afb 92c9b3d 49a48a4 8468afb e73ce82 49a48a4 92c9b3d e73ce82 8468afb 49a48a4 92c9b3d 8468afb 92c9b3d 49a48a4 92c9b3d 8468afb e5a707f 8468afb e5a707f 8468afb b7680b4 8468afb b7680b4 49a48a4 e5a707f b7680b4 92c9b3d b7680b4 e5a707f b7680b4 e5a707f b7680b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
# FILE: ui_layout.py
import gradio as gr
# --- UI Constants and Configuration ---
TTS_MODELS_AVAILABLE = ["tts-1", "tts-1-hd", "gpt-4o-mini-tts"]
MODEL_DEFAULT_ENV = "tts-1-hd"
SPEAKER_CONFIG_METHODS = [
"Single Voice (Global)",
"Random per Speaker",
"A/B Round Robin",
"Detailed Configuration (Per Speaker UI)"
]
DEFAULT_SPEAKER_CONFIG_METHOD = "Random per Speaker"
# Assuming utils.openai_tts.OPENAI_VOICES exists and provides a list of voice names
# If not, define APP_AVAILABLE_VOICES directly here for now.
try:
from utils.openai_tts import OPENAI_VOICES as ALL_TTS_VOICES
APP_AVAILABLE_VOICES = ALL_TTS_VOICES.copy()
if not APP_AVAILABLE_VOICES: # Fallback if OPENAI_VOICES is empty
APP_AVAILABLE_VOICES = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
except ImportError:
print("Warning: Could not import OPENAI_VOICES from utils.openai_tts. Using default list.")
APP_AVAILABLE_VOICES = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
PREDEFINED_VIBES = {
"None": "", "Calm": "Speak in a calm, composed, and relaxed manner.",
"Excited": "Speak with an energetic, enthusiastic, and lively tone.",
"Happy": "Speak with a cheerful, bright, and joyful voice.",
"Sad": "Speak with a sorrowful, melancholic, and dejected tone.",
"Whisper": "Speak softly, as if whispering.",
"Angry": "Speak with a strong, firm, and possibly agitated voice.",
"Fearful": "Speak with a trembling, hesitant, and scared voice.",
"Formal": "Speak in a clear, precise, and professional tone, suitable for a formal address.",
"Authoritative": "Speak with a commanding, confident, and firm voice.",
"Friendly": "Speak in a warm, approachable, and amiable manner.",
"Custom...": "CUSTOM"
}
VIBE_CHOICES = list(PREDEFINED_VIBES.keys())
DEFAULT_VIBE = "None"
DEFAULT_GLOBAL_VOICE = APP_AVAILABLE_VOICES[0] if APP_AVAILABLE_VOICES else "alloy"
# --- UI Element Creation Functions ---
def create_main_input_components(model_default_value):
"""Creates the main input components for script, model, pause, and global settings."""
with gr.Row(elem_id="main_input_row"):
with gr.Column(scale=2, elem_id="script_input_column"):
script_input = gr.TextArea(label="Dialogue Script", placeholder="[Speaker1] Hello world!\\n[Speaker2] How are you today?", lines=10, elem_id="script_input_area")
with gr.Column(scale=1, elem_id="model_config_column"):
tts_model_dropdown = gr.Dropdown(TTS_MODELS_AVAILABLE, label="TTS Model", value=model_default_value, elem_id="tts_model_dropdown")
pause_input = gr.Number(label="Pause Between Lines (ms)", value=500, minimum=0, maximum=5000, step=50, elem_id="pause_input")
is_tts1_family_default = model_default_value in ["tts-1", "tts-1-hd"]
is_gpt_mini_tts_default = model_default_value == "gpt-4o-mini-tts"
global_speed_input = gr.Slider(
minimum=0.25, maximum=4.0, value=1.0, step=0.05,
label="Global Speed (for tts-1/hd)",
visible=is_tts1_family_default, interactive=True, elem_id="global_speed_input"
)
global_instructions_input = gr.Textbox(
label="Global Instructions (for gpt-4o-mini-tts)",
placeholder="e.g., Speak with a calm tone.",
visible=is_gpt_mini_tts_default, interactive=True, lines=2, elem_id="global_instructions_input"
)
return script_input, tts_model_dropdown, pause_input, global_speed_input, global_instructions_input
def create_speaker_config_components():
"""Creates components for speaker configuration method selection and the container for detailed UI."""
gr.Markdown("### Speaker Voice & Style Configuration")
speaker_config_method_dropdown = gr.Dropdown(
SPEAKER_CONFIG_METHODS, label="Configuration Method", value=DEFAULT_SPEAKER_CONFIG_METHOD,
elem_id="speaker_config_method_dropdown"
)
with gr.Group(visible=(DEFAULT_SPEAKER_CONFIG_METHOD == "Single Voice (Global)"), elem_id="single_voice_config_group") as single_voice_group:
global_voice_dropdown = gr.Dropdown(
APP_AVAILABLE_VOICES, label="Global Voice", value=DEFAULT_GLOBAL_VOICE, interactive=True,
elem_id="global_voice_dropdown"
)
detailed_per_speaker_ui_group_container = gr.Column(
visible=(DEFAULT_SPEAKER_CONFIG_METHOD == "Detailed Configuration (Per Speaker UI)"),
elem_id="detailed_per_speaker_ui_group_container"
)
with detailed_per_speaker_ui_group_container:
load_per_speaker_ui_button = gr.Button(
"Load/Refresh Per-Speaker Settings UI (from Script Above)",
elem_id="load_per_speaker_ui_button"
)
gr.Markdown(
"<small>Click button above to populate settings for each speaker found in the script. "
"Settings are applied per-speaker. If script changes, click again to refresh.</small>"
)
# The @gr.render dynamic UI will be placed here by app.py
return (
speaker_config_method_dropdown,
single_voice_group,
global_voice_dropdown,
detailed_per_speaker_ui_group_container,
load_per_speaker_ui_button
)
def create_action_and_output_components():
"""Creates buttons for actions (cost, generate) and output display areas."""
with gr.Row(elem_id="action_buttons_row"):
calculate_cost_button = gr.Button("Calculate Estimated Cost", elem_id="calculate_cost_button")
generate_button = gr.Button("Generate Audio", variant="primary", elem_id="generate_button")
cost_output = gr.Textbox(label="Estimated Cost", interactive=False, elem_id="cost_output")
with gr.Row(elem_id="output_files_row"):
individual_lines_zip_output = gr.File(label="Download Individual Lines (ZIP)", elem_id="individual_lines_zip_output")
merged_dialogue_mp3_output = gr.Audio(label="Play/Download Merged Dialogue (MP3)", type="filepath", elem_id="merged_dialogue_mp3_output")
status_output = gr.Textbox(label="Status", interactive=False, lines=2, max_lines=5, elem_id="status_output")
return calculate_cost_button, generate_button, cost_output, individual_lines_zip_output, merged_dialogue_mp3_output, status_output
def create_examples_ui(inputs_for_examples, process_fn, outputs_for_examples=None):
"""Creates the examples section."""
gr.Markdown("## Example Scripts")
example_script_1 = "[Alice] Hello Bob, this is a test using the detailed configuration method.\\n[Bob] Hi Alice! I'm Bob, and I'll have my own voice settings.\\n[Alice] Let's see how this sounds."
example_script_2 = "[Narrator] This is a short story.\\n[CharacterA] Once upon a time...\\n[Narrator] ...there was a Gradio app.\\n[CharacterB] And it could talk!"
examples_data = [
[example_script_1, "tts-1-hd", 300, "Detailed Configuration (Per Speaker UI)", DEFAULT_GLOBAL_VOICE, {}, 1.0, ""],
[example_script_2, "gpt-4o-mini-tts", 200, "Random per Speaker", DEFAULT_GLOBAL_VOICE, {}, 1.0, "Speak with a gentle, storytelling voice for the narrator."],
["[Solo] Just one line, using global voice and speed.", "tts-1", 0, "Single Voice (Global)", "fable", {}, 1.2, ""],
]
num_inputs = len(inputs_for_examples)
valid_examples_data = []
for ex_data in examples_data:
if len(ex_data) == num_inputs:
valid_examples_data.append(ex_data)
else:
print(f"Warning: Example data mismatch. Expected {num_inputs} items, got {len(ex_data)}. Skipping example: {ex_data[0][:30]}...")
if not valid_examples_data:
gr.Markdown("<p style='color: orange;'>No valid examples could be loaded due to configuration mismatch.</p>")
return None
if process_fn and outputs_for_examples:
return gr.Examples(
examples=valid_examples_data,
inputs=inputs_for_examples,
outputs=outputs_for_examples,
fn=process_fn,
cache_examples=False,
examples_per_page=5,
label="Example Scripts (Click to Load & Run)",
run_on_click=True
)
else:
return gr.Examples(
examples=valid_examples_data,
inputs=inputs_for_examples,
examples_per_page=5,
label="Example Scripts (Click to Load Inputs)",
) |