abocha commited on
Commit
66f012e
·
1 Parent(s): 62d5317
Files changed (1) hide show
  1. app.py +91 -59
app.py CHANGED
@@ -6,12 +6,12 @@ from openai import AsyncOpenAI
6
  from functools import partial
7
  import datetime
8
 
 
9
  from ui_layout import (
10
  create_main_input_components, create_speaker_config_components,
11
  create_action_and_output_components, # Removed create_examples_ui
12
  TTS_MODELS_AVAILABLE, MODEL_DEFAULT_ENV, APP_AVAILABLE_VOICES,
13
- DEFAULT_GLOBAL_VOICE, VIBE_CHOICES, DEFAULT_VIBE, PREDEFINED_VIBES,
14
- DEFAULT_SPEAKER_CONFIG_METHOD # Added default config method for examples
15
  )
16
 
17
  from event_handlers import (
@@ -24,11 +24,25 @@ from event_handlers import (
24
  )
25
 
26
  # --- Secrets and Client Setup (Same as before) ---
27
- # ...
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  # --- Main Blocks UI Definition ---
30
  with gr.Blocks(theme=gr.themes.Soft(), elem_id="main_blocks_ui") as demo:
31
- # ... (Markdown Header, API Key Check) ...
 
 
32
 
33
  speaker_configs_state = gr.State({})
34
 
@@ -45,16 +59,45 @@ with gr.Blocks(theme=gr.themes.Soft(), elem_id="main_blocks_ui") as demo:
45
  status_output) = create_action_and_output_components()
46
 
47
  # --- Dynamic UI (@gr.render) Definition (Same as before) ---
48
- # ... (Keep the full @gr.render block here) ...
49
  with detailed_per_speaker_ui_group_container:
50
- @gr.render( inputs=[script_input, speaker_configs_state, tts_model_dropdown], triggers=[load_per_speaker_ui_button.click, tts_model_dropdown.change])
 
 
 
51
  def render_dynamic_speaker_ui(current_script_text: str, current_speaker_configs: dict, current_tts_model: str):
52
- # ... (@gr.render content from previous step) ...
53
- pass # Keep full implementation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
 
56
  # --- Event Listeners (Same as before) ---
57
- # ... (Keep all event listeners: tts_model_dropdown.change, speaker_config_method_dropdown.change, etc.) ...
58
  tts_model_dropdown.change(fn=handle_tts_model_change, inputs=[tts_model_dropdown, speaker_configs_state], outputs=[global_speed_input, global_instructions_input, speaker_configs_state])
59
  speaker_config_method_dropdown.change(fn=handle_speaker_config_method_visibility_change, inputs=[speaker_config_method_dropdown], outputs=[single_voice_group, detailed_per_speaker_ui_group_container])
60
  load_per_speaker_ui_button.click(fn=handle_load_refresh_per_speaker_ui_trigger, inputs=[script_input, speaker_configs_state, tts_model_dropdown], outputs=[speaker_configs_state])
@@ -62,67 +105,56 @@ with gr.Blocks(theme=gr.themes.Soft(), elem_id="main_blocks_ui") as demo:
62
  generate_button_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
63
  generate_button.click(fn=generate_button_fn, inputs=[script_input, tts_model_dropdown, pause_input, speaker_config_method_dropdown, global_voice_dropdown, speaker_configs_state, global_speed_input, global_instructions_input], outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output])
64
 
 
 
65
 
66
- # --- Examples Section Definition (SIMPLIFIED INPUTS) ---
67
- gr.Markdown("## Example Scripts")
68
-
69
- # Define the SIMPLIFIED list of input components for Examples
70
- # Only include components that are always visible or whose values are fundamental.
71
- example_inputs_list_comps_simplified = [
72
- script_input, # Always Visible
73
- tts_model_dropdown, # Always Visible
74
- pause_input, # Always Visible
75
- speaker_config_method_dropdown, # Always Visible
76
- # Excluded: global_voice_dropdown (visibility depends on speaker_config_method)
77
- # Excluded: speaker_configs_state (not a standard UI input for examples)
78
- # Excluded: global_speed_input (visibility depends on tts_model)
79
- # Excluded: global_instructions_input (visibility depends on tts_model)
80
  ]
81
-
82
- # Define the example data matching the SIMPLIFIED inputs list (length should be 4)
83
- examples_data_simplified = [
84
- # Script, TTS Model, Pause, Config Method
85
- ["[Alice] Hello Bob, this is a test using the detailed configuration method.\\n[Bob] Hi Alice! I'm Bob, and I'll have my own voice settings.\\n[Alice] Let's see how this sounds.",
86
- "tts-1-hd", 300, "Detailed Configuration (Per Speaker UI)"],
87
- ["[Narrator] This is a short story.\\n[CharacterA] Once upon a time...\\n[Narrator] ...there was a Gradio app.\\n[CharacterB] And it could talk!",
88
- "gpt-4o-mini-tts", 200, "Random per Speaker"],
89
- ["[Solo] Just one line, using global voice and speed.",
90
- "tts-1", 0, "Single Voice (Global)"],
91
- ]
92
-
93
- # Outputs list remains the same
94
  example_outputs_list_comps = [individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
 
 
 
 
 
 
 
 
95
 
96
- # Process function remains the same, BUT it will receive fewer inputs now when run from Examples.
97
- # This might cause an error inside handle_script_processing if it expects all original inputs.
98
- # For RENDERING TEST purposes, we might temporarily disable running from examples.
99
- # example_process_fn_actual = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE) if OPENAI_API_KEY else None
100
- # Set to None for RENDERING TEST:
101
- example_process_fn_actual = None
102
-
103
- num_inputs_expected = len(example_inputs_list_comps_simplified)
104
  valid_examples_data_inline = []
105
- for ex_data in examples_data_simplified:
106
  if len(ex_data) == num_inputs_expected:
107
  valid_examples_data_inline.append(ex_data)
108
  else:
109
- print(f"Warning (Inline Examples Simplified): Example data mismatch. Expected {num_inputs_expected}, got {len(ex_data)}. Skipping.")
110
 
 
111
  if valid_examples_data_inline:
112
- # Instantiate gr.Examples with simplified inputs
113
- # Temporarily disable running from examples (fn=None) to avoid errors due to changed input list length
114
- gr.Examples(
115
- examples=valid_examples_data_inline,
116
- inputs=example_inputs_list_comps_simplified, # Use simplified inputs list
117
- # outputs=example_outputs_list_comps, # Not needed if fn=None
118
- # fn=example_process_fn_actual, # Set to None for rendering test
119
- cache_examples=False,
120
- examples_per_page=5,
121
- label="Example Scripts (Click to Load Simplified Inputs)",
122
- # run_on_click=True # Cannot run if fn=None
123
- )
 
 
 
 
 
 
124
  else:
125
- gr.Markdown("<p style='color: orange;'>No valid examples (simplified) could be loaded.</p>")
126
 
127
 
128
  # --- Launch ---
 
6
  from functools import partial
7
  import datetime
8
 
9
+ # Remove create_examples_ui from ui_layout imports if it's not used elsewhere
10
  from ui_layout import (
11
  create_main_input_components, create_speaker_config_components,
12
  create_action_and_output_components, # Removed create_examples_ui
13
  TTS_MODELS_AVAILABLE, MODEL_DEFAULT_ENV, APP_AVAILABLE_VOICES,
14
+ DEFAULT_GLOBAL_VOICE, VIBE_CHOICES, DEFAULT_VIBE, PREDEFINED_VIBES
 
15
  )
16
 
17
  from event_handlers import (
 
24
  )
25
 
26
  # --- Secrets and Client Setup (Same as before) ---
27
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
28
+ NSFW_API_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
29
+ MODEL_DEFAULT_FROM_ENV = os.getenv("MODEL_DEFAULT", MODEL_DEFAULT_ENV)
30
+ EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_ENV if MODEL_DEFAULT_FROM_ENV in TTS_MODELS_AVAILABLE else MODEL_DEFAULT_ENV
31
+ async_openai_client = None
32
+ if not OPENAI_API_KEY:
33
+ # ... (secret loading logic) ...
34
+ pass
35
+ if OPENAI_API_KEY:
36
+ async_openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY)
37
+ else:
38
+ print("CRITICAL ERROR: OPENAI_API_KEY secret is not set.")
39
+
40
 
41
  # --- Main Blocks UI Definition ---
42
  with gr.Blocks(theme=gr.themes.Soft(), elem_id="main_blocks_ui") as demo:
43
+ gr.Markdown("# Dialogue Script to Speech (OpenAI TTS) - Using @gr.render")
44
+ if not OPENAI_API_KEY or not async_openai_client:
45
+ gr.Markdown("<h3 style='color:red;'>⚠️ Warning: OPENAI_API_KEY not set or invalid. Audio generation will fail.</h3>")
46
 
47
  speaker_configs_state = gr.State({})
48
 
 
59
  status_output) = create_action_and_output_components()
60
 
61
  # --- Dynamic UI (@gr.render) Definition (Same as before) ---
 
62
  with detailed_per_speaker_ui_group_container:
63
+ @gr.render(
64
+ inputs=[script_input, speaker_configs_state, tts_model_dropdown],
65
+ triggers=[load_per_speaker_ui_button.click, tts_model_dropdown.change]
66
+ )
67
  def render_dynamic_speaker_ui(current_script_text: str, current_speaker_configs: dict, current_tts_model: str):
68
+ # ... (Full @gr.render implementation from previous correct step) ...
69
+ print(f"DEBUG: @gr.render CALLED. Model: {current_tts_model}. Script: '{current_script_text[:30]}...'. State Keys: {list(current_speaker_configs.keys()) if isinstance(current_speaker_configs,dict) else 'Not a dict'}")
70
+ unique_speakers = get_speakers_from_script(current_script_text)
71
+ if not unique_speakers:
72
+ gr.Markdown("<p style='color: #888; margin-top:10px;'>Enter script & click 'Load/Refresh' for per-speaker settings.</p>")
73
+ return
74
+ for speaker_idx, speaker_name in enumerate(unique_speakers):
75
+ if not isinstance(current_speaker_configs, dict): current_speaker_configs = {}
76
+ speaker_specific_config = current_speaker_configs.get(speaker_name, {})
77
+ accordion_elem_id = f"accordion_spk_{speaker_idx}_{speaker_name.replace(' ','_').lower()}"
78
+ with gr.Accordion(f"Settings for Speaker: {speaker_name}", open=False, elem_id=accordion_elem_id):
79
+ gr.Markdown(f"Configure voice for **{speaker_name}** using **{current_tts_model}** model.")
80
+ default_voice = speaker_specific_config.get("voice", DEFAULT_GLOBAL_VOICE)
81
+ voice_dd_elem_id = f"voice_dd_spk_{speaker_idx}"
82
+ voice_dropdown = gr.Dropdown(APP_AVAILABLE_VOICES, value=default_voice, label="Voice", elem_id=voice_dd_elem_id)
83
+ voice_dropdown.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="voice"), inputs=[voice_dropdown, speaker_configs_state], outputs=[speaker_configs_state])
84
+ if current_tts_model in ["tts-1", "tts-1-hd"]:
85
+ default_speed = float(speaker_specific_config.get("speed", 1.0))
86
+ speed_slider_elem_id = f"speed_slider_spk_{speaker_idx}"
87
+ speed_slider = gr.Slider(minimum=0.25, maximum=4.0, value=default_speed, step=0.05, label="Speed", elem_id=speed_slider_elem_id)
88
+ speed_slider.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="speed"), inputs=[speed_slider, speaker_configs_state], outputs=[speaker_configs_state])
89
+ elif current_tts_model == "gpt-4o-mini-tts":
90
+ default_vibe = speaker_specific_config.get("vibe", DEFAULT_VIBE)
91
+ vibe_dd_elem_id = f"vibe_dd_spk_{speaker_idx}"
92
+ vibe_dropdown = gr.Dropdown(VIBE_CHOICES, value=default_vibe, label="Vibe/Emotion", elem_id=vibe_dd_elem_id)
93
+ default_custom_instructions = speaker_specific_config.get("custom_instructions", "")
94
+ custom_instr_tb_elem_id = f"custom_instr_tb_spk_{speaker_idx}"
95
+ custom_instructions_textbox = gr.Textbox(label="Custom Instructions", value=default_custom_instructions, placeholder="e.g., Speak slightly hesitant.", lines=2, visible=(default_vibe == "Custom..."), elem_id=custom_instr_tb_elem_id)
96
+ vibe_dropdown.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="vibe"), inputs=[vibe_dropdown, speaker_configs_state], outputs=[speaker_configs_state]).then(fn=lambda vibe_val: gr.update(visible=(vibe_val == "Custom...")), inputs=[vibe_dropdown], outputs=[custom_instructions_textbox])
97
+ custom_instructions_textbox.change(fn=partial(handle_dynamic_accordion_input_change, speaker_name=speaker_name, config_key="custom_instructions"), inputs=[custom_instructions_textbox, speaker_configs_state], outputs=[speaker_configs_state])
98
 
99
 
100
  # --- Event Listeners (Same as before) ---
 
101
  tts_model_dropdown.change(fn=handle_tts_model_change, inputs=[tts_model_dropdown, speaker_configs_state], outputs=[global_speed_input, global_instructions_input, speaker_configs_state])
102
  speaker_config_method_dropdown.change(fn=handle_speaker_config_method_visibility_change, inputs=[speaker_config_method_dropdown], outputs=[single_voice_group, detailed_per_speaker_ui_group_container])
103
  load_per_speaker_ui_button.click(fn=handle_load_refresh_per_speaker_ui_trigger, inputs=[script_input, speaker_configs_state, tts_model_dropdown], outputs=[speaker_configs_state])
 
105
  generate_button_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
106
  generate_button.click(fn=generate_button_fn, inputs=[script_input, tts_model_dropdown, pause_input, speaker_config_method_dropdown, global_voice_dropdown, speaker_configs_state, global_speed_input, global_instructions_input], outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output])
107
 
108
+ # --- Examples Section Definition (Moved here) ---
109
+ gr.Markdown("## Example Scripts") # Keep the header if desired
110
 
111
+ # Define the lists needed for Examples right here
112
+ example_inputs_list_comps = [
113
+ script_input, tts_model_dropdown, pause_input,
114
+ speaker_config_method_dropdown, global_voice_dropdown,
115
+ global_speed_input, global_instructions_input
 
 
 
 
 
 
 
 
 
116
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  example_outputs_list_comps = [individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
118
+ example_process_fn_actual = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE) if OPENAI_API_KEY else None
119
+
120
+ # Define the example data directly
121
+ examples_data = [
122
+ ["[Alice] Hello Bob, this is a test using the detailed configuration method.\\n[Bob] Hi Alice! I'm Bob, and I'll have my own voice settings.\\n[Alice] Let's see how this sounds.", "tts-1-hd", 300, "Detailed Configuration (Per Speaker UI)", DEFAULT_GLOBAL_VOICE, 1.0, ""],
123
+ ["[Narrator] This is a short story.\\n[CharacterA] Once upon a time...\\n[Narrator] ...there was a Gradio app.\\n[CharacterB] And it could talk!", "gpt-4o-mini-tts", 200, "Random per Speaker", DEFAULT_GLOBAL_VOICE, 1.0, "Speak with a gentle, storytelling voice for the narrator."],
124
+ ["[Solo] Just one line, using global voice and speed.", "tts-1", 0, "Single Voice (Global)", "fable", 1.2, ""],
125
+ ]
126
 
127
+ # Validate example data length against input components length
128
+ num_inputs_expected = len(example_inputs_list_comps)
 
 
 
 
 
 
129
  valid_examples_data_inline = []
130
+ for ex_data in examples_data:
131
  if len(ex_data) == num_inputs_expected:
132
  valid_examples_data_inline.append(ex_data)
133
  else:
134
+ print(f"Warning (Inline Examples): Example data mismatch. Expected {num_inputs_expected}, got {len(ex_data)}. Skipping.")
135
 
136
+ # Directly instantiate gr.Examples if valid data exists
137
  if valid_examples_data_inline:
138
+ if example_process_fn_actual:
139
+ gr.Examples(
140
+ examples=valid_examples_data_inline,
141
+ inputs=example_inputs_list_comps,
142
+ outputs=example_outputs_list_comps,
143
+ fn=example_process_fn_actual,
144
+ cache_examples=False,
145
+ examples_per_page=5,
146
+ label="Example Scripts (Click to Load & Run)", # Label is optional if header exists
147
+ run_on_click=True
148
+ )
149
+ else:
150
+ gr.Examples(
151
+ examples=valid_examples_data_inline,
152
+ inputs=example_inputs_list_comps,
153
+ examples_per_page=5,
154
+ label="Example Scripts (Click to Load Inputs)", # Label is optional if header exists
155
+ )
156
  else:
157
+ gr.Markdown("<p style='color: orange;'>No valid examples could be loaded due to configuration mismatch.</p>")
158
 
159
 
160
  # --- Launch ---