File size: 7,772 Bytes
8468afb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7680b4
8468afb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7680b4
8468afb
b7680b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import gradio as gr
from utils.openai_tts import OPENAI_VOICES as ALL_TTS_VOICES # Import directly for APP_AVAILABLE_VOICES

# --- UI Constants and Configuration ---
TTS_MODELS_AVAILABLE = ["tts-1", "tts-1-hd", "gpt-4o-mini-tts"]
MODEL_DEFAULT_ENV = "tts-1-hd" # Default if env var not set or invalid

SPEAKER_CONFIG_METHODS = [
    "Single Voice (Global)",
    "Random per Speaker",
    "A/B Round Robin",
    "Detailed Configuration (Per Speaker UI)"
]
DEFAULT_SPEAKER_CONFIG_METHOD = "Random per Speaker"
APP_AVAILABLE_VOICES = ALL_TTS_VOICES.copy()
# Ensure APP_AVAILABLE_VOICES is never empty for safety, though ALL_TTS_VOICES has defaults
if not APP_AVAILABLE_VOICES:
    APP_AVAILABLE_VOICES = ["alloy"] # Absolute fallback

PREDEFINED_VIBES = {
    "None": "",
    "Calm": "Speak in a calm, composed, and relaxed manner.",
    "Excited": "Speak with an energetic, enthusiastic, and lively tone.",
    "Happy": "Speak with a cheerful, bright, and joyful voice.",
    "Sad": "Speak with a sorrowful, melancholic, and dejected tone.",
    "Whisper": "Speak softly, as if whispering.",
    "Angry": "Speak with a strong, firm, and possibly agitated voice.",
    "Fearful": "Speak with a trembling, hesitant, and scared voice.",
    "Formal": "Speak in a clear, precise, and professional tone, suitable for a formal address.",
    "Authoritative": "Speak with a commanding, confident, and firm voice.",
    "Friendly": "Speak in a warm, approachable, and amiable manner.",
    "Custom...": "CUSTOM"
}
VIBE_CHOICES = list(PREDEFINED_VIBES.keys())
DEFAULT_VIBE = "None"
DEFAULT_GLOBAL_VOICE = APP_AVAILABLE_VOICES[0] if APP_AVAILABLE_VOICES else "alloy"


# --- UI Element Creation Functions ---

def create_main_input_components(model_default_value):
    """Creates the main input components for script, model, pause, and global settings."""
    with gr.Row():
        with gr.Column(scale=2):
            script_input = gr.TextArea(label="Dialogue Script", placeholder="[Speaker1] Hello world!\n[Speaker2] How are you today?", lines=10)
        with gr.Column(scale=1):
            tts_model_dropdown = gr.Dropdown(TTS_MODELS_AVAILABLE, label="TTS Model", value=model_default_value)
            pause_input = gr.Number(label="Pause Between Lines (ms)", value=500, minimum=0, maximum=5000, step=50)
            
            is_tts1_family_default = model_default_value in ["tts-1", "tts-1-hd"]
            is_gpt_mini_tts_default = model_default_value == "gpt-4o-mini-tts"

            global_speed_input = gr.Slider(
                minimum=0.25, maximum=4.0, value=1.0, step=0.05,
                label="Global Speed (for tts-1/hd)",
                visible=is_tts1_family_default,
                interactive=True
            )
            global_instructions_input = gr.Textbox(
                label="Global Instructions (for gpt-4o-mini-tts)",
                placeholder="e.g., Speak with a calm tone.",
                visible=is_gpt_mini_tts_default,
                interactive=True, lines=2
            )
    return script_input, tts_model_dropdown, pause_input, global_speed_input, global_instructions_input

def create_speaker_config_components():
    """Creates components for speaker configuration method and detailed settings UI."""
    gr.Markdown("### Speaker Voice & Style Configuration")
    speaker_config_method_dropdown = gr.Dropdown(
        SPEAKER_CONFIG_METHODS, label="Configuration Method", value=DEFAULT_SPEAKER_CONFIG_METHOD
    )

    # Visibility is controlled by event handlers based on speaker_config_method_dropdown
    with gr.Group(visible=(DEFAULT_SPEAKER_CONFIG_METHOD == "Single Voice (Global)")) as single_voice_group:
        global_voice_dropdown = gr.Dropdown(
            APP_AVAILABLE_VOICES, label="Global Voice", value=DEFAULT_GLOBAL_VOICE, interactive=True
        )

    with gr.Column(visible=(DEFAULT_SPEAKER_CONFIG_METHOD == "Detailed Configuration (Per Speaker UI)")) as detailed_per_speaker_ui_group:
        load_per_speaker_ui_button = gr.Button("Load/Refresh Per-Speaker Settings UI (from Script Above)")
        gr.Markdown("<small>Click button above to populate settings for each speaker found in the script. Settings are applied per-speaker. If script changes, click again to refresh.</small>")
        dynamic_speaker_ui_area = gr.Column(elem_id="dynamic_ui_area_for_speakers")
    
    return speaker_config_method_dropdown, single_voice_group, global_voice_dropdown, detailed_per_speaker_ui_group, load_per_speaker_ui_button, dynamic_speaker_ui_area

def create_action_and_output_components():
    """Creates buttons for actions (cost, generate) and output display areas."""
    with gr.Row():
        calculate_cost_button = gr.Button("Calculate Estimated Cost")
        generate_button = gr.Button("Generate Audio", variant="primary")
    
    cost_output = gr.Textbox(label="Estimated Cost", interactive=False)
    with gr.Row():
        individual_lines_zip_output = gr.File(label="Download Individual Lines (ZIP)")
        merged_dialogue_mp3_output = gr.Audio(label="Play/Download Merged Dialogue (MP3)", type="filepath")
    status_output = gr.Textbox(label="Status", interactive=False, lines=2, max_lines=5)
    return calculate_cost_button, generate_button, cost_output, individual_lines_zip_output, merged_dialogue_mp3_output, status_output

def create_examples_ui(inputs_for_examples, process_fn, outputs_for_examples=None): # Added outputs_for_examples
    """Creates the examples section."""
    gr.Markdown("## Example Scripts")
    example_script_1 = "[Alice] Hello Bob, this is a test using the detailed configuration method.\n[Bob] Hi Alice! I'm Bob, and I'll have my own voice settings.\n[Alice] Let's see how this sounds."
    example_script_2 = "[Narrator] This is a short story.\n[CharacterA] Once upon a time...\n[Narrator] ...there was a Gradio app.\n[CharacterB] And it could talk!"
    
    examples_data = [
        [example_script_1, "tts-1-hd", 300, "Detailed Configuration (Per Speaker UI)", DEFAULT_GLOBAL_VOICE, {}, 1.0, ""],
        [example_script_2, "gpt-4o-mini-tts", 200, "Random per Speaker", DEFAULT_GLOBAL_VOICE, {}, 1.0, "Speak with a gentle, storytelling voice for the narrator."],
        ["[Solo] Just one line, using global voice and speed.", "tts-1", 0, "Single Voice (Global)", "fable", {}, 1.2, ""],
    ]
    
    num_inputs = len(inputs_for_examples)
    valid_examples_data = []
    for ex_data in examples_data:
        if len(ex_data) == num_inputs:
            valid_examples_data.append(ex_data)
        else:
            print(f"Warning: Example data mismatch. Expected {num_inputs} items, got {len(ex_data)}. Skipping example: {ex_data[0][:30]}...")

    if not valid_examples_data:
        gr.Markdown("<p style='color: orange;'>No valid examples could be loaded due to configuration mismatch.</p>")
        return None

    # If process_fn and outputs_for_examples are provided, make examples runnable
    if process_fn and outputs_for_examples:
        return gr.Examples(
            examples=valid_examples_data,
            inputs=inputs_for_examples,
            outputs=outputs_for_examples,
            fn=process_fn,
            cache_examples=False, # Caching can be complex with external API calls
            examples_per_page=5, # Corrected parameter name
            label="Example Scripts (Click to Load & Run)",
            run_on_click=True # Make examples run when clicked
        )
    else: # Otherwise, just load inputs
        return gr.Examples(
            examples=valid_examples_data,
            inputs=inputs_for_examples,
            examples_per_page=5, # Corrected parameter name
            label="Example Scripts (Click to Load Inputs)",
            # No fn, outputs, or run_on_click if process_fn is None
        )