zach commited on
Commit
ee8b196
·
1 Parent(s): 5d6d1ef

Simplify UI logic in app.py, add logic for handling empty character description inputs.

Browse files
src/app.py CHANGED
@@ -75,7 +75,7 @@ def generate_text(
75
  raise gr.Error("Failed to generate text. Please try again later.")
76
 
77
 
78
- def text_to_speech(
79
  character_description: str, text: str, generated_text_state: str
80
  ) -> Tuple[gr.update, gr.update, dict, str, ComparisonType, str, str, bool, str, str]:
81
  """
@@ -116,7 +116,9 @@ def text_to_speech(
116
 
117
  # Select 2 TTS providers based on whether the text has been modified.
118
  text_modified = text != generated_text_state
119
- comparison_type, provider_a, provider_b = choose_providers(text_modified)
 
 
120
 
121
  try:
122
  if provider_b == constants.HUME_AI:
@@ -288,25 +290,17 @@ def reset_ui() -> Tuple[gr.update, gr.update, gr.update, gr.update, None, None,
288
  )
289
 
290
 
291
- def build_input_section() -> Tuple[gr.Markdown, gr.Dropdown, gr.Textbox, gr.Button]:
292
- """Builds the input section including instructions, sample character description dropdown, character description input, and generate button"""
293
- instructions = gr.Markdown(
294
- """
295
- 1. **Enter or Generate Text:** Type directly in the text box—or enter a character description and click “Generate Text” to auto-populate. Edit as needed.
296
- 2. **Synthesize Speech:** Click “Synthesize Speech” to generate two audio outputs.
297
- 3. **Listen & Compare:** Play back both audio options to hear the differences.
298
- 4. **Vote for Your Favorite:** Click “Vote for Option A” or “Vote for Option B” to cast your vote.
299
- """
300
- )
301
  sample_character_description_dropdown = gr.Dropdown(
302
  choices=list(constants.SAMPLE_CHARACTER_DESCRIPTIONS.keys()),
303
- label="Choose a sample character description (or enter your own)",
304
  value=None,
305
  interactive=True,
306
  )
307
  character_description_input = gr.Textbox(
308
  label="Character description",
309
- placeholder="Enter your character description to be used to generate text and a novel voice...",
310
  lines=3,
311
  max_lines=8,
312
  max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
@@ -314,7 +308,6 @@ def build_input_section() -> Tuple[gr.Markdown, gr.Dropdown, gr.Textbox, gr.Butt
314
  )
315
  generate_text_button = gr.Button("Generate text", variant="secondary")
316
  return (
317
- instructions,
318
  sample_character_description_dropdown,
319
  character_description_input,
320
  generate_text_button,
@@ -327,7 +320,7 @@ def build_output_section() -> (
327
  """Builds the output section including generated text, audio players, and vote buttons."""
328
  text_input = gr.Textbox(
329
  label="Text",
330
- placeholder="Enter text to synthesize speech...",
331
  interactive=True,
332
  autoscroll=False,
333
  lines=3,
@@ -370,12 +363,19 @@ def build_gradio_interface() -> gr.Blocks:
370
  fill_width=True,
371
  css_paths="src/assets/styles.css",
372
  ) as demo:
373
- # Title
374
  gr.Markdown("# Expressive TTS Arena")
 
 
 
 
 
 
 
 
375
 
376
  # Build generate text section
377
  (
378
- instructions,
379
  sample_character_description_dropdown,
380
  character_description_input,
381
  generate_text_button,
@@ -393,24 +393,26 @@ def build_gradio_interface() -> gr.Blocks:
393
 
394
  # --- UI state components ---
395
 
396
- # Track text used for speech synthesis
397
- text_state = gr.State("")
398
  # Track character description used for text and voice generation
399
  character_description_state = gr.State("")
400
- # Track comparison type (which set of providers are being compared)
401
- comparison_type_state = gr.State()
 
 
 
 
 
 
 
402
  # Track generation ID for Option A
403
  option_a_generation_id_state = gr.State()
404
  # Track generation ID for Option B
405
  option_b_generation_id_state = gr.State()
406
- # Track whether text that was used was generated or modified/custom
407
- text_modified_state = gr.State()
408
- # Track generated text state
409
- generated_text_state = gr.State("")
410
- # Track generated audio for option B for playing automatically after option 1 audio finishes
411
- option_b_audio_state = gr.State()
412
  # Track option map (option A and option B are randomized)
413
  option_map_state = gr.State()
 
414
  # Track whether the user has voted for an option
415
  vote_submitted_state = gr.State(False)
416
 
@@ -467,7 +469,7 @@ def build_gradio_interface() -> gr.Blocks:
467
  vote_submitted_state,
468
  ],
469
  ).then(
470
- fn=text_to_speech,
471
  inputs=[character_description_input, text_input, generated_text_state],
472
  outputs=[
473
  option_a_audio_player,
 
75
  raise gr.Error("Failed to generate text. Please try again later.")
76
 
77
 
78
+ def synthesize_speech(
79
  character_description: str, text: str, generated_text_state: str
80
  ) -> Tuple[gr.update, gr.update, dict, str, ComparisonType, str, str, bool, str, str]:
81
  """
 
116
 
117
  # Select 2 TTS providers based on whether the text has been modified.
118
  text_modified = text != generated_text_state
119
+ comparison_type, provider_a, provider_b = choose_providers(
120
+ text_modified, character_description
121
+ )
122
 
123
  try:
124
  if provider_b == constants.HUME_AI:
 
290
  )
291
 
292
 
293
+ def build_input_section() -> Tuple[gr.Dropdown, gr.Textbox, gr.Button]:
294
+ """Builds the input section including the sample character description dropdown, character description input, and generate text button"""
 
 
 
 
 
 
 
 
295
  sample_character_description_dropdown = gr.Dropdown(
296
  choices=list(constants.SAMPLE_CHARACTER_DESCRIPTIONS.keys()),
297
+ label="Choose a sample character description",
298
  value=None,
299
  interactive=True,
300
  )
301
  character_description_input = gr.Textbox(
302
  label="Character description",
303
+ placeholder="Enter a character description...",
304
  lines=3,
305
  max_lines=8,
306
  max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
 
308
  )
309
  generate_text_button = gr.Button("Generate text", variant="secondary")
310
  return (
 
311
  sample_character_description_dropdown,
312
  character_description_input,
313
  generate_text_button,
 
320
  """Builds the output section including generated text, audio players, and vote buttons."""
321
  text_input = gr.Textbox(
322
  label="Text",
323
+ placeholder="Generate or enter text...",
324
  interactive=True,
325
  autoscroll=False,
326
  lines=3,
 
363
  fill_width=True,
364
  css_paths="src/assets/styles.css",
365
  ) as demo:
366
+ # Title & instructions
367
  gr.Markdown("# Expressive TTS Arena")
368
+ gr.Markdown(
369
+ """
370
+ 1. **Enter or Generate Text:** Type directly in the text box—or enter a character description and click “Generate Text” to auto-populate. Edit as needed.
371
+ 2. **Synthesize Speech:** Click “Synthesize Speech” to generate two audio outputs.
372
+ 3. **Listen & Compare:** Play back both audio options to hear the differences.
373
+ 4. **Vote for Your Favorite:** Click “Vote for Option A” or “Vote for Option B” to cast your vote.
374
+ """
375
+ )
376
 
377
  # Build generate text section
378
  (
 
379
  sample_character_description_dropdown,
380
  character_description_input,
381
  generate_text_button,
 
393
 
394
  # --- UI state components ---
395
 
 
 
396
  # Track character description used for text and voice generation
397
  character_description_state = gr.State("")
398
+ # Track text used for speech synthesis
399
+ text_state = gr.State("")
400
+ # Track generated text state
401
+ generated_text_state = gr.State("")
402
+ # Track whether text that was used was generated or modified/custom
403
+ text_modified_state = gr.State()
404
+
405
+ # Track generated audio for option B (for playing automatically after option 1 audio finishes)
406
+ option_b_audio_state = gr.State()
407
  # Track generation ID for Option A
408
  option_a_generation_id_state = gr.State()
409
  # Track generation ID for Option B
410
  option_b_generation_id_state = gr.State()
411
+ # Track comparison type (which set of providers are being compared)
412
+ comparison_type_state = gr.State()
 
 
 
 
413
  # Track option map (option A and option B are randomized)
414
  option_map_state = gr.State()
415
+
416
  # Track whether the user has voted for an option
417
  vote_submitted_state = gr.State(False)
418
 
 
469
  vote_submitted_state,
470
  ],
471
  ).then(
472
+ fn=synthesize_speech,
473
  inputs=[character_description_input, text_input, generated_text_state],
474
  outputs=[
475
  option_a_audio_player,
src/assets/styles.css CHANGED
@@ -1,3 +1,14 @@
1
- footer {
2
- display:none !important
 
 
 
 
 
 
 
 
 
 
 
3
  }
 
1
+ /* Remove Gradio footer from UI */
2
+ footer.svelte-sar7eh {
3
+ display: none !important;
4
+ }
5
+
6
+ /*
7
+ The copy buttons for Gradio Textinput components use the "button_secondary_text_color"
8
+ theme color which is currently #FFFFFF (white). This makes the copy svg icon white, causing
9
+ it to disappear background. Overriding the class color here to ensure it is visible
10
+ in the UI.
11
+ */
12
+ .copy-button {
13
+ color: #7E22CE;
14
  }
src/integrations/hume_api.py CHANGED
@@ -135,7 +135,7 @@ def text_to_speech_with_hume(
135
  raise ValueError("Invalid number of generations specified. Must be 1 or 2.")
136
 
137
  request_body = {
138
- "utterances": [{"text": text, "description": character_description}],
139
  "format": {
140
  "type": hume_config.file_format,
141
  },
@@ -173,7 +173,7 @@ def text_to_speech_with_hume(
173
  if isinstance(e, HTTPError):
174
  if e.response.status_code >= 400 and e.response.status_code < 500:
175
  raise UnretryableHumeError(
176
- message=f'"{e.response.text}"', original_exception=e
177
  ) from e
178
  raise HumeError(message=f"{e}", original_exception=e) from e
179
 
 
135
  raise ValueError("Invalid number of generations specified. Must be 1 or 2.")
136
 
137
  request_body = {
138
+ "utterances": [{"text": text, "description": character_description or None}],
139
  "format": {
140
  "type": hume_config.file_format,
141
  },
 
173
  if isinstance(e, HTTPError):
174
  if e.response.status_code >= 400 and e.response.status_code < 500:
175
  raise UnretryableHumeError(
176
+ message=f"{e.response.text}", original_exception=e
177
  ) from e
178
  raise HumeError(message=f"{e}", original_exception=e) from e
179
 
src/utils.py CHANGED
@@ -205,13 +205,14 @@ def save_base64_audio_to_file(base64_audio: str, filename: str) -> str:
205
 
206
  def choose_providers(
207
  text_modified: bool,
 
208
  ) -> Tuple[ComparisonType, TTSProviderName, TTSProviderName]:
209
  """
210
  Select two TTS providers based on whether the text has been modified.
211
 
212
  The first provider is always set to "Hume AI". For the second provider, the function
213
- selects "Hume AI" if the text has been modified; otherwise, it randomly chooses one from
214
- the TTS_PROVIDERS list.
215
 
216
  Args:
217
  text_modified (bool): A flag indicating whether the text has been modified.
@@ -223,9 +224,13 @@ def choose_providers(
223
  where the first is always "Hume AI" and the second is determined by the text_modified
224
  flag and random selection.
225
  """
 
 
226
  provider_a = constants.HUME_AI
227
  provider_b = (
228
- constants.HUME_AI if text_modified else random.choice(constants.TTS_PROVIDERS)
 
 
229
  )
230
 
231
  match provider_b:
 
205
 
206
  def choose_providers(
207
  text_modified: bool,
208
+ character_description: str,
209
  ) -> Tuple[ComparisonType, TTSProviderName, TTSProviderName]:
210
  """
211
  Select two TTS providers based on whether the text has been modified.
212
 
213
  The first provider is always set to "Hume AI". For the second provider, the function
214
+ selects "Hume AI" if the text has been modified or if a character description was
215
+ not provided; otherwise, it randomly chooses one from the TTS_PROVIDERS list.
216
 
217
  Args:
218
  text_modified (bool): A flag indicating whether the text has been modified.
 
224
  where the first is always "Hume AI" and the second is determined by the text_modified
225
  flag and random selection.
226
  """
227
+ hume_comparison_only = text_modified or not character_description
228
+
229
  provider_a = constants.HUME_AI
230
  provider_b = (
231
+ constants.HUME_AI
232
+ if hume_comparison_only
233
+ else random.choice(constants.TTS_PROVIDERS)
234
  )
235
 
236
  match provider_b: