abocha commited on
Commit
635a2fb
·
1 Parent(s): 186ebe6
Files changed (2) hide show
  1. app.py +10 -32
  2. event_handlers.py +33 -18
app.py CHANGED
@@ -2,37 +2,32 @@ import gradio as gr
2
  import os
3
  import asyncio
4
  from openai import AsyncOpenAI
5
- from functools import partial # For handle_script_processing
6
 
7
- # Import UI creation functions and constants
8
  from ui_layout import (
9
  create_main_input_components, create_speaker_config_components,
10
  create_action_and_output_components, create_examples_ui,
11
  TTS_MODELS_AVAILABLE, MODEL_DEFAULT_ENV
12
  )
13
 
14
- # Import event handler functions
15
  from event_handlers import (
16
  handle_script_processing, handle_calculate_cost,
17
  update_model_controls_visibility, update_speaker_config_method_visibility,
18
- load_refresh_per_speaker_ui
 
19
  )
20
 
21
- # --- Application Secrets and Global Client ---
22
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
23
  NSFW_API_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
24
  MODEL_DEFAULT_FROM_ENV = os.getenv("MODEL_DEFAULT", MODEL_DEFAULT_ENV)
25
-
26
- # Validate MODEL_DEFAULT_FROM_ENV or use hardcoded default
27
  EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_ENV if MODEL_DEFAULT_FROM_ENV in TTS_MODELS_AVAILABLE else MODEL_DEFAULT_ENV
28
-
29
  async_openai_client = None
 
30
  if not OPENAI_API_KEY:
31
  try:
32
- # Attempt to load from Hugging Face Hub secrets if not in env
33
  from huggingface_hub import HfApi
34
  api = HfApi()
35
- space_id = os.getenv("SPACE_ID") # Provided by HF Spaces
36
  if space_id:
37
  secrets = api.get_space_secrets(repo_id=space_id)
38
  OPENAI_API_KEY = secrets.get("OPENAI_API_KEY")
@@ -49,16 +44,13 @@ else:
49
  print("CRITICAL ERROR: OPENAI_API_KEY secret is not set. The application will not function properly.")
50
 
51
 
52
- # --- Gradio Application UI and Logic ---
53
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
54
  gr.Markdown("# Dialogue Script to Speech (OpenAI TTS) - Refactored")
55
  if not OPENAI_API_KEY or not async_openai_client:
56
  gr.Markdown("<h3 style='color:red;'>⚠️ Warning: OPENAI_API_KEY not set or invalid. Audio generation will fail. Please configure it in your Space settings.</h3>")
57
 
58
- # Central state for detailed speaker configurations
59
- speaker_configs_state = gr.State({}) # This is crucial for dynamic UI
60
 
61
- # --- Define UI Components by calling layout functions ---
62
  (script_input, tts_model_dropdown, pause_input,
63
  global_speed_input, global_instructions_input) = create_main_input_components(EFFECTIVE_MODEL_DEFAULT)
64
 
@@ -70,72 +62,58 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
70
  individual_lines_zip_output, merged_dialogue_mp3_output,
71
  status_output) = create_action_and_output_components()
72
 
73
- # --- Event Wiring ---
74
-
75
- # When TTS model changes, update visibility of global speed/instructions & refresh dynamic UI
76
  tts_model_dropdown.change(
77
  fn=update_model_controls_visibility,
78
  inputs=[tts_model_dropdown, script_input, speaker_configs_state, speaker_configs_state],
79
  outputs=[global_speed_input, global_instructions_input, dynamic_speaker_ui_area, speaker_configs_state]
80
  )
81
 
82
- # When speaker config method changes, update visibility of relevant UI groups
83
  speaker_config_method_dropdown.change(
84
  fn=update_speaker_config_method_visibility,
85
  inputs=[speaker_config_method_dropdown],
86
  outputs=[single_voice_group, detailed_per_speaker_ui_group]
87
  )
88
 
89
- # Button to load/refresh the detailed per-speaker UI configurations
90
  load_per_speaker_ui_button.click(
91
- fn=load_refresh_per_speaker_ui,
92
  inputs=[script_input, speaker_configs_state, tts_model_dropdown, speaker_configs_state],
93
  outputs=[dynamic_speaker_ui_area, speaker_configs_state]
94
  )
95
 
96
- # Calculate cost button
97
  calculate_cost_button.click(
98
  fn=handle_calculate_cost,
99
  inputs=[script_input, tts_model_dropdown],
100
  outputs=[cost_output]
101
  )
102
 
103
- # Generate audio button
104
- # Use functools.partial to pass fixed arguments like API key and client to the handler
105
- # Gradio inputs will be appended to these fixed arguments when the handler is called.
106
  generate_button_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
107
  generate_button.click(
108
  fn=generate_button_fn,
109
  inputs=[
110
  script_input, tts_model_dropdown, pause_input,
111
  speaker_config_method_dropdown, global_voice_dropdown,
112
- speaker_configs_state, # The gr.State object itself
113
  global_speed_input, global_instructions_input
114
  ],
115
  outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
116
  )
117
 
118
- # --- Examples UI ---
119
  example_inputs_list = [
120
  script_input, tts_model_dropdown, pause_input,
121
  speaker_config_method_dropdown, global_voice_dropdown,
122
  speaker_configs_state,
123
  global_speed_input, global_instructions_input
124
  ]
125
-
126
  example_outputs_list = [individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
127
-
128
- # Make examples runnable
129
  example_process_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
130
 
131
  _ = create_examples_ui(
132
  inputs_for_examples=example_inputs_list,
133
- process_fn=example_process_fn if OPENAI_API_KEY else None, # Only make runnable if API key exists
134
  outputs_for_examples=example_outputs_list if OPENAI_API_KEY else None
135
  )
136
 
137
-
138
- # --- Launch ---
139
  if __name__ == "__main__":
140
  if os.name == 'nt':
141
  asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
 
2
  import os
3
  import asyncio
4
  from openai import AsyncOpenAI
5
+ from functools import partial
6
 
 
7
  from ui_layout import (
8
  create_main_input_components, create_speaker_config_components,
9
  create_action_and_output_components, create_examples_ui,
10
  TTS_MODELS_AVAILABLE, MODEL_DEFAULT_ENV
11
  )
12
 
 
13
  from event_handlers import (
14
  handle_script_processing, handle_calculate_cost,
15
  update_model_controls_visibility, update_speaker_config_method_visibility,
16
+ handle_load_refresh_button_click # Import the new wrapper
17
+ # load_refresh_per_speaker_ui_core is now internal to event_handlers.py
18
  )
19
 
 
20
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
21
  NSFW_API_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
22
  MODEL_DEFAULT_FROM_ENV = os.getenv("MODEL_DEFAULT", MODEL_DEFAULT_ENV)
 
 
23
  EFFECTIVE_MODEL_DEFAULT = MODEL_DEFAULT_FROM_ENV if MODEL_DEFAULT_FROM_ENV in TTS_MODELS_AVAILABLE else MODEL_DEFAULT_ENV
 
24
  async_openai_client = None
25
+ # ... (Secrets loading logic remains the same) ...
26
  if not OPENAI_API_KEY:
27
  try:
 
28
  from huggingface_hub import HfApi
29
  api = HfApi()
30
+ space_id = os.getenv("SPACE_ID")
31
  if space_id:
32
  secrets = api.get_space_secrets(repo_id=space_id)
33
  OPENAI_API_KEY = secrets.get("OPENAI_API_KEY")
 
44
  print("CRITICAL ERROR: OPENAI_API_KEY secret is not set. The application will not function properly.")
45
 
46
 
 
47
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
48
  gr.Markdown("# Dialogue Script to Speech (OpenAI TTS) - Refactored")
49
  if not OPENAI_API_KEY or not async_openai_client:
50
  gr.Markdown("<h3 style='color:red;'>⚠️ Warning: OPENAI_API_KEY not set or invalid. Audio generation will fail. Please configure it in your Space settings.</h3>")
51
 
52
+ speaker_configs_state = gr.State({})
 
53
 
 
54
  (script_input, tts_model_dropdown, pause_input,
55
  global_speed_input, global_instructions_input) = create_main_input_components(EFFECTIVE_MODEL_DEFAULT)
56
 
 
62
  individual_lines_zip_output, merged_dialogue_mp3_output,
63
  status_output) = create_action_and_output_components()
64
 
 
 
 
65
  tts_model_dropdown.change(
66
  fn=update_model_controls_visibility,
67
  inputs=[tts_model_dropdown, script_input, speaker_configs_state, speaker_configs_state],
68
  outputs=[global_speed_input, global_instructions_input, dynamic_speaker_ui_area, speaker_configs_state]
69
  )
70
 
 
71
  speaker_config_method_dropdown.change(
72
  fn=update_speaker_config_method_visibility,
73
  inputs=[speaker_config_method_dropdown],
74
  outputs=[single_voice_group, detailed_per_speaker_ui_group]
75
  )
76
 
77
+ # MODIFIED: Button click now uses the new wrapper handle_load_refresh_button_click
78
  load_per_speaker_ui_button.click(
79
+ fn=handle_load_refresh_button_click, # Use the new wrapper
80
  inputs=[script_input, speaker_configs_state, tts_model_dropdown, speaker_configs_state],
81
  outputs=[dynamic_speaker_ui_area, speaker_configs_state]
82
  )
83
 
 
84
  calculate_cost_button.click(
85
  fn=handle_calculate_cost,
86
  inputs=[script_input, tts_model_dropdown],
87
  outputs=[cost_output]
88
  )
89
 
 
 
 
90
  generate_button_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
91
  generate_button.click(
92
  fn=generate_button_fn,
93
  inputs=[
94
  script_input, tts_model_dropdown, pause_input,
95
  speaker_config_method_dropdown, global_voice_dropdown,
96
+ speaker_configs_state,
97
  global_speed_input, global_instructions_input
98
  ],
99
  outputs=[individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
100
  )
101
 
 
102
  example_inputs_list = [
103
  script_input, tts_model_dropdown, pause_input,
104
  speaker_config_method_dropdown, global_voice_dropdown,
105
  speaker_configs_state,
106
  global_speed_input, global_instructions_input
107
  ]
 
108
  example_outputs_list = [individual_lines_zip_output, merged_dialogue_mp3_output, status_output]
 
 
109
  example_process_fn = partial(handle_script_processing, OPENAI_API_KEY, async_openai_client, NSFW_API_URL_TEMPLATE)
110
 
111
  _ = create_examples_ui(
112
  inputs_for_examples=example_inputs_list,
113
+ process_fn=example_process_fn if OPENAI_API_KEY else None,
114
  outputs_for_examples=example_outputs_list if OPENAI_API_KEY else None
115
  )
116
 
 
 
117
  if __name__ == "__main__":
118
  if os.name == 'nt':
119
  asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
event_handlers.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  import os
3
  import asyncio
4
  import tempfile
@@ -43,16 +44,24 @@ def handle_dynamic_input_change(new_value, current_configs_state_dict: dict, spe
43
  return current_configs_state_dict
44
 
45
 
46
- def load_refresh_per_speaker_ui(script_text: str, current_configs_state_dict: dict, tts_model: str, speaker_configs_state_component: gr.State):
 
 
 
 
47
  # --- START OF PHASE 1 DEBUGGING (returns list of components directly) ---
48
- print("DEBUG: load_refresh_per_speaker_ui CALLED - Phase 1: HARDCODED RETURN (direct list for Column)")
49
- debug_markdown = gr.Markdown("## !! Dynamic Area Test Content Loaded (Direct list for Column) !!")
50
- # Return the list of components and the state update
51
  return [debug_markdown], {}
52
  # --- END OF PHASE 1 DEBUGGING ---
53
 
54
  # --- ORIGINAL LOGIC (Commented out for Phase 1) ---
55
- # ... (original logic would eventually return: new_ui_components, current_configs_state_dict)
 
 
 
 
 
56
  # --- END OF ORIGINAL LOGIC ---
57
 
58
 
@@ -66,7 +75,7 @@ async def handle_script_processing(
66
  global_instructions: str,
67
  progress=gr.Progress(track_tqdm=True)
68
  ):
69
- # ... (content of this function remains unchanged from the previous correct version) ...
70
  if not openai_api_key or not async_openai_client:
71
  return None, None, "Error: OpenAI API Key or client is not configured."
72
  if not dialogue_script.strip():
@@ -202,23 +211,30 @@ def handle_calculate_cost(dialogue_script: str, tts_model: str):
202
  except Exception as e:
203
  return f"An unexpected error occurred during cost calculation: {str(e)}"
204
 
205
- def update_model_controls_visibility(selected_model: str, script_text_for_refresh: str, current_speaker_configs_for_refresh: dict, speaker_configs_state_comp: gr.State):
 
 
 
 
 
 
 
 
206
  """Updates visibility of global controls and refreshes per-speaker UI when TTS model changes."""
207
  print(f"Model changed to: {selected_model}. Refreshing dynamic UI and controls.")
208
  try:
209
- # load_refresh_per_speaker_ui returns (list_of_components, updated_state_dict)
210
- dynamic_ui_components_list, updated_state_dict = load_refresh_per_speaker_ui(
211
- script_text_for_refresh, current_speaker_configs_for_refresh, selected_model, speaker_configs_state_comp
212
  )
213
- # The list of components is passed directly for the Column output.
214
- # Gradio should handle replacing children of 'dynamic_speaker_ui_area' (a gr.Column)
215
- # with this new list of components.
216
 
217
  except Exception as e:
218
- print(f"Error in load_refresh_per_speaker_ui called from model_controls_visibility: {e}")
219
  error_markdown = gr.Markdown(f"Error refreshing per-speaker UI: {e}")
220
- dynamic_ui_components_list = [error_markdown] # Fallback to an error message list
221
- updated_state_dict = current_speaker_configs_for_refresh
222
 
223
  is_tts1_family = selected_model in ["tts-1", "tts-1-hd"]
224
  is_gpt_mini_tts = selected_model == "gpt-4o-mini-tts"
@@ -226,11 +242,10 @@ def update_model_controls_visibility(selected_model: str, script_text_for_refres
226
  return (
227
  gr.update(visible=is_tts1_family, interactive=is_tts1_family),
228
  gr.update(visible=is_gpt_mini_tts, interactive=is_gpt_mini_tts),
229
- dynamic_ui_components_list, # Pass the list of components directly
230
  updated_state_dict
231
  )
232
 
233
-
234
  def update_speaker_config_method_visibility(method: str):
235
  # ... (no change) ...
236
  is_single = (method == "Single Voice (Global)")
 
1
  import gradio as gr
2
+ # ... (other imports remain the same) ...
3
  import os
4
  import asyncio
5
  import tempfile
 
44
  return current_configs_state_dict
45
 
46
 
47
+ def load_refresh_per_speaker_ui_core(script_text: str, current_configs_state_dict: dict, tts_model: str, speaker_configs_state_component: gr.State):
48
+ """
49
+ Core logic for generating per-speaker UI components.
50
+ Returns: (list_of_components_for_column, updated_state_dict)
51
+ """
52
  # --- START OF PHASE 1 DEBUGGING (returns list of components directly) ---
53
+ print("DEBUG: load_refresh_per_speaker_ui_core CALLED - Phase 1: HARDCODED RETURN (direct list for Column)")
54
+ debug_markdown = gr.Markdown("## !! Dynamic Area Test Content (Button Click Path) !!")
 
55
  return [debug_markdown], {}
56
  # --- END OF PHASE 1 DEBUGGING ---
57
 
58
  # --- ORIGINAL LOGIC (Commented out for Phase 1) ---
59
+ # print(f"load_refresh_per_speaker_ui_core CALLED. TTS Model: {tts_model}")
60
+ # unique_speakers = get_speakers_from_script(script_text)
61
+ # new_ui_components = []
62
+ # # ... (rest of original logic from previous load_refresh_per_speaker_ui) ...
63
+ # # Make sure this original logic path would also return:
64
+ # # return new_ui_components, current_configs_state_dict
65
  # --- END OF ORIGINAL LOGIC ---
66
 
67
 
 
75
  global_instructions: str,
76
  progress=gr.Progress(track_tqdm=True)
77
  ):
78
+ # ... (content of this function remains unchanged) ...
79
  if not openai_api_key or not async_openai_client:
80
  return None, None, "Error: OpenAI API Key or client is not configured."
81
  if not dialogue_script.strip():
 
211
  except Exception as e:
212
  return f"An unexpected error occurred during cost calculation: {str(e)}"
213
 
214
+ # Wrapper for the "Load/Refresh Per-Speaker UI Button" click
215
+ def handle_load_refresh_button_click(script_text: str, current_configs_state_dict: dict, tts_model: str, speaker_configs_state_comp: gr.State):
216
+ components_list, new_state_dict = load_refresh_per_speaker_ui_core(
217
+ script_text, current_configs_state_dict, tts_model, speaker_configs_state_comp
218
+ )
219
+ # Return gr.update for the column, and the raw state dict for the gr.State component
220
+ return gr.update(children=components_list), new_state_dict
221
+
222
+ def update_model_controls_visibility(selected_model: str, script_text_for_refresh: str, current_configs_state_dict: dict, speaker_configs_state_comp: gr.State):
223
  """Updates visibility of global controls and refreshes per-speaker UI when TTS model changes."""
224
  print(f"Model changed to: {selected_model}. Refreshing dynamic UI and controls.")
225
  try:
226
+ # load_refresh_per_speaker_ui_core returns (list_of_components, updated_state_dict)
227
+ dynamic_ui_components_list, updated_state_dict = load_refresh_per_speaker_ui_core(
228
+ script_text_for_refresh, current_configs_state_dict, selected_model, speaker_configs_state_comp
229
  )
230
+ # Wrap the list of components in gr.update(children=...) here
231
+ dynamic_ui_update_for_column = gr.update(children=dynamic_ui_components_list)
 
232
 
233
  except Exception as e:
234
+ print(f"Error in load_refresh_per_speaker_ui_core called from model_controls_visibility: {e}")
235
  error_markdown = gr.Markdown(f"Error refreshing per-speaker UI: {e}")
236
+ dynamic_ui_update_for_column = gr.update(children=[error_markdown])
237
+ updated_state_dict = current_configs_state_dict
238
 
239
  is_tts1_family = selected_model in ["tts-1", "tts-1-hd"]
240
  is_gpt_mini_tts = selected_model == "gpt-4o-mini-tts"
 
242
  return (
243
  gr.update(visible=is_tts1_family, interactive=is_tts1_family),
244
  gr.update(visible=is_gpt_mini_tts, interactive=is_gpt_mini_tts),
245
+ dynamic_ui_update_for_column,
246
  updated_state_dict
247
  )
248
 
 
249
  def update_speaker_config_method_visibility(method: str):
250
  # ... (no change) ...
251
  is_single = (method == "Single Voice (Global)")