Spaces:
Running
Running
examples
Browse files
app.py
CHANGED
@@ -6,12 +6,12 @@ from openai import AsyncOpenAI
|
|
6 |
from functools import partial
|
7 |
import datetime
|
8 |
|
|
|
9 |
from ui_layout import (
|
10 |
create_main_input_components, create_speaker_config_components,
|
11 |
create_action_and_output_components, # Removed create_examples_ui
|
12 |
TTS_MODELS_AVAILABLE, MODEL_DEFAULT_ENV, APP_AVAILABLE_VOICES,
|
13 |
-
DEFAULT_GLOBAL_VOICE, VIBE_CHOICES, DEFAULT_VIBE, PREDEFINED_VIBES
|
14 |
-
DEFAULT_SPEAKER_CONFIG_METHOD # Added default config method for examples
|
15 |
)
|
16 |
|
17 |
from event_handlers import (
|
@@ -24,11 +24,25 @@ from event_handlers import (
|
|
24 |
)
|
25 |
|
26 |
# --- Secrets and Client Setup (Same as before) ---
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
# --- Main Blocks UI Definition ---
|
30 |
with gr.Blocks(theme=gr.themes.Soft(), elem_id="main_blocks_ui") as demo:
|
31 |
-
#
|
|
|
|
|
32 |
|
33 |
speaker_configs_state = gr.State({})
|
34 |
|
@@ -45,16 +59,45 @@ with gr.Blocks(theme=gr.themes.Soft(), elem_id="main_blocks_ui") as demo:
|
|
45 |
status_output) = create_action_and_output_components()
|
46 |
|
47 |
# --- Dynamic UI (@gr.render) Definition (Same as before) ---
|
48 |
-
# ... (Keep the full @gr.render block here) ...
|
49 |
with detailed_per_speaker_ui_group_container:
|
50 |
-
@gr.render(
|
|
|
|
|
|
|
51 |
def render_dynamic_speaker_ui(current_script_text: str, current_speaker_configs: dict, current_tts_model: str):
|
52 |
-
# ... (@gr.render
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
|
56 |
# --- Event Listeners (Same as before) ---
|
57 |
-
# ... (Keep all event listeners: tts_model_dropdown.change, speaker_config_method_dropdown.change, etc.) ...
|
58 |
tts_model_dropdown.change(fn=handle_tts_model_change, inputs=[tts_model_dropdown, speaker_configs_state], outputs=[global_speed_input, global_instructions_input, speaker_configs_state])
|
59 |
speaker_config_method_dropdown.change(fn=handle_speaker_config_method_visibility_change, inputs=[speaker_config_method_dropdown], outputs=[single_voice_group, detailed_per_speaker_ui_group_container])
|
60 |
load_per_speaker_ui_button.click(fn=handle_load_refresh_per_speaker_ui_trigger, inputs=[script_input, speaker_configs_state, tts_model_dropdown], outputs=[speaker_configs_state])
|
@@ -62,67 +105,56 @@ with gr.Blocks(theme=gr.themes.Soft(), elem_id="main_blocks_ui") as demo:
|
|
62 |
generate_button_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
|
63 |
generate_button.click(fn=generate_button_fn, inputs=[script_input, tts_model_dropdown, pause_input, speaker_config_method_dropdown, global_voice_dropdown, speaker_configs_state, global_speed_input, global_instructions_input], outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output])
|
64 |
|
|
|
|
|
65 |
|
66 |
-
#
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
example_inputs_list_comps_simplified = [
|
72 |
-
script_input, # Always Visible
|
73 |
-
tts_model_dropdown, # Always Visible
|
74 |
-
pause_input, # Always Visible
|
75 |
-
speaker_config_method_dropdown, # Always Visible
|
76 |
-
# Excluded: global_voice_dropdown (visibility depends on speaker_config_method)
|
77 |
-
# Excluded: speaker_configs_state (not a standard UI input for examples)
|
78 |
-
# Excluded: global_speed_input (visibility depends on tts_model)
|
79 |
-
# Excluded: global_instructions_input (visibility depends on tts_model)
|
80 |
]
|
81 |
-
|
82 |
-
# Define the example data matching the SIMPLIFIED inputs list (length should be 4)
|
83 |
-
examples_data_simplified = [
|
84 |
-
# Script, TTS Model, Pause, Config Method
|
85 |
-
["[Alice] Hello Bob, this is a test using the detailed configuration method.\\n[Bob] Hi Alice! I'm Bob, and I'll have my own voice settings.\\n[Alice] Let's see how this sounds.",
|
86 |
-
"tts-1-hd", 300, "Detailed Configuration (Per Speaker UI)"],
|
87 |
-
["[Narrator] This is a short story.\\n[CharacterA] Once upon a time...\\n[Narrator] ...there was a Gradio app.\\n[CharacterB] And it could talk!",
|
88 |
-
"gpt-4o-mini-tts", 200, "Random per Speaker"],
|
89 |
-
["[Solo] Just one line, using global voice and speed.",
|
90 |
-
"tts-1", 0, "Single Voice (Global)"],
|
91 |
-
]
|
92 |
-
|
93 |
-
# Outputs list remains the same
|
94 |
example_outputs_list_comps = [individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
-
#
|
97 |
-
|
98 |
-
# For RENDERING TEST purposes, we might temporarily disable running from examples.
|
99 |
-
# example_process_fn_actual = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE) if OPENAI_API_KEY else None
|
100 |
-
# Set to None for RENDERING TEST:
|
101 |
-
example_process_fn_actual = None
|
102 |
-
|
103 |
-
num_inputs_expected = len(example_inputs_list_comps_simplified)
|
104 |
valid_examples_data_inline = []
|
105 |
-
for ex_data in
|
106 |
if len(ex_data) == num_inputs_expected:
|
107 |
valid_examples_data_inline.append(ex_data)
|
108 |
else:
|
109 |
-
print(f"Warning (Inline Examples
|
110 |
|
|
|
111 |
if valid_examples_data_inline:
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
else:
|
125 |
-
gr.Markdown("<p style='color: orange;'>No valid examples
|
126 |
|
127 |
|
128 |
# --- Launch ---
|
|
|
6 |
from functools import partial
|
7 |
import datetime
|
8 |
|
9 |
+
# Remove create_examples_ui from ui_layout imports if it's not used elsewhere
|
10 |
from ui_layout import (
|
11 |
create_main_input_components, create_speaker_config_components,
|
12 |
create_action_and_output_components, # Removed create_examples_ui
|
13 |
TTS_MODELS_AVAILABLE, MODEL_DEFAULT_ENV, APP_AVAILABLE_VOICES,
|
14 |
+
DEFAULT_GLOBAL_VOICE, VIBE_CHOICES, DEFAULT_VIBE, PREDEFINED_VIBES
|
|
|
15 |
)
|
16 |
|
17 |
from event_handlers import (
|
|
|
24 |
)
|
25 |
|
26 |
# --- Secrets and Client Setup (Same as before) ---
|
27 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
28 |
+
NSFW_API_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
|
29 |
+
MODEL_DEFAULT_FROM_ENV = os.getenv("MODEL_DEFAULT", MODEL_DEFAULT_ENV)
|
30 |
+
EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_ENV if MODEL_DEFAULT_FROM_ENV in TTS_MODELS_AVAILABLE else MODEL_DEFAULT_ENV
|
31 |
+
async_openai_client = None
|
32 |
+
if not OPENAI_API_KEY:
|
33 |
+
# ... (secret loading logic) ...
|
34 |
+
pass
|
35 |
+
if OPENAI_API_KEY:
|
36 |
+
async_openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY)
|
37 |
+
else:
|
38 |
+
print("CRITICAL ERROR: OPENAI_API_KEY secret is not set.")
|
39 |
+
|
40 |
|
41 |
# --- Main Blocks UI Definition ---
|
42 |
with gr.Blocks(theme=gr.themes.Soft(), elem_id="main_blocks_ui") as demo:
|
43 |
+
gr.Markdown("# Dialogue Script to Speech (OpenAI TTS) - Using @gr.render")
|
44 |
+
if not OPENAI_API_KEY or not async_openai_client:
|
45 |
+
gr.Markdown("<h3 style='color:red;'>⚠️ Warning: OPENAI_API_KEY not set or invalid. Audio generation will fail.</h3>")
|
46 |
|
47 |
speaker_configs_state = gr.State({})
|
48 |
|
|
|
59 |
status_output) = create_action_and_output_components()
|
60 |
|
61 |
# --- Dynamic UI (@gr.render) Definition (Same as before) ---
|
|
|
62 |
with detailed_per_speaker_ui_group_container:
|
63 |
+
@gr.render(
|
64 |
+
inputs=[script_input, speaker_configs_state, tts_model_dropdown],
|
65 |
+
triggers=[load_per_speaker_ui_button.click, tts_model_dropdown.change]
|
66 |
+
)
|
67 |
def render_dynamic_speaker_ui(current_script_text: str, current_speaker_configs: dict, current_tts_model: str):
|
68 |
+
# ... (Full @gr.render implementation from previous correct step) ...
|
69 |
+
print(f"DEBUG: @gr.render CALLED. Model: {current_tts_model}. Script: '{current_script_text[:30]}...'. State Keys: {list(current_speaker_configs.keys()) if isinstance(current_speaker_configs,dict) else 'Not a dict'}")
|
70 |
+
unique_speakers = get_speakers_from_script(current_script_text)
|
71 |
+
if not unique_speakers:
|
72 |
+
gr.Markdown("<p style='color: #888; margin-top:10px;'>Enter script & click 'Load/Refresh' for per-speaker settings.</p>")
|
73 |
+
return
|
74 |
+
for speaker_idx, speaker_name in enumerate(unique_speakers):
|
75 |
+
if not isinstance(current_speaker_configs, dict): current_speaker_configs = {}
|
76 |
+
speaker_specific_config = current_speaker_configs.get(speaker_name, {})
|
77 |
+
accordion_elem_id = f"accordion_spk_{speaker_idx}_{speaker_name.replace(' ','_').lower()}"
|
78 |
+
with gr.Accordion(f"Settings for Speaker: {speaker_name}", open=False, elem_id=accordion_elem_id):
|
79 |
+
gr.Markdown(f"Configure voice for **{speaker_name}** using **{current_tts_model}** model.")
|
80 |
+
default_voice = speaker_specific_config.get("voice", DEFAULT_GLOBAL_VOICE)
|
81 |
+
voice_dd_elem_id = f"voice_dd_spk_{speaker_idx}"
|
82 |
+
voice_dropdown = gr.Dropdown(APP_AVAILABLE_VOICES, value=default_voice, label="Voice", elem_id=voice_dd_elem_id)
|
83 |
+
voice_dropdown.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="voice"), inputs=[voice_dropdown, speaker_configs_state], outputs=[speaker_configs_state])
|
84 |
+
if current_tts_model in ["tts-1", "tts-1-hd"]:
|
85 |
+
default_speed = float(speaker_specific_config.get("speed", 1.0))
|
86 |
+
speed_slider_elem_id = f"speed_slider_spk_{speaker_idx}"
|
87 |
+
speed_slider = gr.Slider(minimum=0.25, maximum=4.0, value=default_speed, step=0.05, label="Speed", elem_id=speed_slider_elem_id)
|
88 |
+
speed_slider.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="speed"), inputs=[speed_slider, speaker_configs_state], outputs=[speaker_configs_state])
|
89 |
+
elif current_tts_model == "gpt-4o-mini-tts":
|
90 |
+
default_vibe = speaker_specific_config.get("vibe", DEFAULT_VIBE)
|
91 |
+
vibe_dd_elem_id = f"vibe_dd_spk_{speaker_idx}"
|
92 |
+
vibe_dropdown = gr.Dropdown(VIBE_CHOICES, value=default_vibe, label="Vibe/Emotion", elem_id=vibe_dd_elem_id)
|
93 |
+
default_custom_instructions = speaker_specific_config.get("custom_instructions", "")
|
94 |
+
custom_instr_tb_elem_id = f"custom_instr_tb_spk_{speaker_idx}"
|
95 |
+
custom_instructions_textbox = gr.Textbox(label="Custom Instructions", value=default_custom_instructions, placeholder="e.g., Speak slightly hesitant.", lines=2, visible=(default_vibe == "Custom..."), elem_id=custom_instr_tb_elem_id)
|
96 |
+
vibe_dropdown.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="vibe"), inputs=[vibe_dropdown, speaker_configs_state], outputs=[speaker_configs_state]).then(fn=lambda vibe_val: gr.update(visible=(vibe_val == "Custom...")), inputs=[vibe_dropdown], outputs=[custom_instructions_textbox])
|
97 |
+
custom_instructions_textbox.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="custom_instructions"), inputs=[custom_instructions_textbox, speaker_configs_state], outputs=[speaker_configs_state])
|
98 |
|
99 |
|
100 |
# --- Event Listeners (Same as before) ---
|
|
|
101 |
tts_model_dropdown.change(fn=handle_tts_model_change, inputs=[tts_model_dropdown, speaker_configs_state], outputs=[global_speed_input, global_instructions_input, speaker_configs_state])
|
102 |
speaker_config_method_dropdown.change(fn=handle_speaker_config_method_visibility_change, inputs=[speaker_config_method_dropdown], outputs=[single_voice_group, detailed_per_speaker_ui_group_container])
|
103 |
load_per_speaker_ui_button.click(fn=handle_load_refresh_per_speaker_ui_trigger, inputs=[script_input, speaker_configs_state, tts_model_dropdown], outputs=[speaker_configs_state])
|
|
|
105 |
generate_button_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
|
106 |
generate_button.click(fn=generate_button_fn, inputs=[script_input, tts_model_dropdown, pause_input, speaker_config_method_dropdown, global_voice_dropdown, speaker_configs_state, global_speed_input, global_instructions_input], outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output])
|
107 |
|
108 |
+
# --- Examples Section Definition (Moved here) ---
|
109 |
+
gr.Markdown("## Example Scripts") # Keep the header if desired
|
110 |
|
111 |
+
# Define the lists needed for Examples right here
|
112 |
+
example_inputs_list_comps = [
|
113 |
+
script_input, tts_model_dropdown, pause_input,
|
114 |
+
speaker_config_method_dropdown, global_voice_dropdown,
|
115 |
+
global_speed_input, global_instructions_input
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
example_outputs_list_comps = [individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
|
118 |
+
example_process_fn_actual = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE) if OPENAI_API_KEY else None
|
119 |
+
|
120 |
+
# Define the example data directly
|
121 |
+
examples_data = [
|
122 |
+
["[Alice] Hello Bob, this is a test using the detailed configuration method.\\n[Bob] Hi Alice! I'm Bob, and I'll have my own voice settings.\\n[Alice] Let's see how this sounds.", "tts-1-hd", 300, "Detailed Configuration (Per Speaker UI)", DEFAULT_GLOBAL_VOICE, 1.0, ""],
|
123 |
+
["[Narrator] This is a short story.\\n[CharacterA] Once upon a time...\\n[Narrator] ...there was a Gradio app.\\n[CharacterB] And it could talk!", "gpt-4o-mini-tts", 200, "Random per Speaker", DEFAULT_GLOBAL_VOICE, 1.0, "Speak with a gentle, storytelling voice for the narrator."],
|
124 |
+
["[Solo] Just one line, using global voice and speed.", "tts-1", 0, "Single Voice (Global)", "fable", 1.2, ""],
|
125 |
+
]
|
126 |
|
127 |
+
# Validate example data length against input components length
|
128 |
+
num_inputs_expected = len(example_inputs_list_comps)
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
valid_examples_data_inline = []
|
130 |
+
for ex_data in examples_data:
|
131 |
if len(ex_data) == num_inputs_expected:
|
132 |
valid_examples_data_inline.append(ex_data)
|
133 |
else:
|
134 |
+
print(f"Warning (Inline Examples): Example data mismatch. Expected {num_inputs_expected}, got {len(ex_data)}. Skipping.")
|
135 |
|
136 |
+
# Directly instantiate gr.Examples if valid data exists
|
137 |
if valid_examples_data_inline:
|
138 |
+
if example_process_fn_actual:
|
139 |
+
gr.Examples(
|
140 |
+
examples=valid_examples_data_inline,
|
141 |
+
inputs=example_inputs_list_comps,
|
142 |
+
outputs=example_outputs_list_comps,
|
143 |
+
fn=example_process_fn_actual,
|
144 |
+
cache_examples=False,
|
145 |
+
examples_per_page=5,
|
146 |
+
label="Example Scripts (Click to Load & Run)", # Label is optional if header exists
|
147 |
+
run_on_click=True
|
148 |
+
)
|
149 |
+
else:
|
150 |
+
gr.Examples(
|
151 |
+
examples=valid_examples_data_inline,
|
152 |
+
inputs=example_inputs_list_comps,
|
153 |
+
examples_per_page=5,
|
154 |
+
label="Example Scripts (Click to Load Inputs)", # Label is optional if header exists
|
155 |
+
)
|
156 |
else:
|
157 |
+
gr.Markdown("<p style='color: orange;'>No valid examples could be loaded due to configuration mismatch.</p>")
|
158 |
|
159 |
|
160 |
# --- Launch ---
|