zach commited on
Commit
4ea25cd
·
1 Parent(s): a3fdb3c

Update UI to disable generate button during generation and anonymize tts output options

Browse files
Files changed (1) hide show
  1. src/app.py +119 -52
src/app.py CHANGED
@@ -4,133 +4,200 @@ app.py
4
  This file defines the Gradio user interface for interacting with the Anthropic API, Hume TTS API, and ElevenLabs TTS API.
5
  Users can input prompts, which are processed to generate text using the Claude model via the Anthropic API.
6
  The generated text is then converted to audio using both Hume and ElevenLabs TTS APIs, allowing playback in the Gradio UI.
7
-
8
- Key Features:
9
- - Gradio interface for user interaction.
10
- - Input validation via prompt length constraints.
11
- - Integration with the Anthropic, Hume, and ElevenLabs APIs.
12
- - Playback support for TTS audio responses.
13
-
14
- Functions:
15
- - process_prompt: Handles user input, calls the Anthropic and Hume APIs, and returns generated text and audio.
16
- - build_gradio_interface: Constructs the Gradio Blocks-based interface.
17
  """
 
18
  # Standard Library Imports
19
  from concurrent.futures import ThreadPoolExecutor
 
20
  # Third-Party Library Imports
21
  import gradio as gr
22
  # Local Application Imports
23
  from src.config import logger
 
24
  from src.integrations import generate_text_with_claude, text_to_speech_with_hume, text_to_speech_with_elevenlabs
25
- from src.sample_prompts import SAMPLE_PROMPTS
26
  from src.utils import truncate_text, validate_prompt_length
27
 
28
 
29
- # Constants
30
- PROMPT_MIN_LENGTH: int = 10
31
- PROMPT_MAX_LENGTH: int = 300
32
-
33
-
34
- def process_prompt(prompt: str) -> str:
35
  """
36
- Process the user prompt and generate text using the Claude API.
37
- Then convert the generated text to speech using both Hume and ElevenLabs TTS APIs.
38
 
39
  Args:
40
  prompt (str): The user's input prompt.
41
 
42
  Returns:
43
- tuple: The generated text and audio data from both Hume and ElevenLabs.
 
44
  """
45
  logger.info(f'Processing prompt: {truncate_text(prompt, max_length=100)}')
 
46
  try:
47
- # Validate prompt length before processing
48
  validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
49
 
50
- # Generate text with Claude API
51
  generated_text = generate_text_with_claude(prompt)
52
- logger.info(f'Generated text (length={len(generated_text)} characters).')
53
 
54
- # Run TTS requests in parallel
55
  with ThreadPoolExecutor(max_workers=2) as executor:
56
  hume_future = executor.submit(text_to_speech_with_hume, prompt, generated_text)
57
  elevenlabs_future = executor.submit(text_to_speech_with_elevenlabs, generated_text)
58
 
59
- # Process TTS results
60
  hume_audio = hume_future.result()
61
  elevenlabs_audio = elevenlabs_future.result()
62
 
63
- logger.info(f'TTS audio generated successfully: Hume={len(hume_audio)} bytes, ElevenLabs={len(elevenlabs_audio)} bytes')
64
- return generated_text, hume_audio, elevenlabs_audio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  except ValueError as ve:
67
  logger.warning(f'Validation error: {ve}')
68
- return str(ve), None, None # Return validation error directly to the UI
 
69
  except Exception as e:
70
  logger.error(f'Unexpected error during processing: {e}')
71
- return 'An unexpected error occurred. Please try again.', None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
 
74
  def build_gradio_interface() -> gr.Blocks:
75
  """
76
- Build the Gradio user interface.
77
 
78
  Returns:
79
- gr.Blocks: The Gradio Blocks object representing the interface.
80
  """
81
  with gr.Blocks() as demo:
82
- gr.Markdown("# TTS Arena")
 
83
  gr.Markdown(
84
  'Generate text from a prompt using **Claude by Anthropic**, '
85
- 'and listen to the generated text-to-speech using **Hume TTS API** '
86
- 'and **ElevenLabs TTS API** for comparison.'
87
  )
88
 
 
89
  with gr.Row():
90
- # Dropdown for predefined prompts
91
  sample_prompt_dropdown = gr.Dropdown(
92
  choices=list(SAMPLE_PROMPTS.keys()),
93
- label='Choose a Sample Prompt (or enter your own below)',
94
  value=None,
95
- interactive=True
96
  )
97
 
 
98
  with gr.Row():
99
- # Custom prompt input
100
  prompt_input = gr.Textbox(
101
  label='Enter your prompt',
102
  placeholder='Or type your own prompt here...',
103
  lines=2,
 
104
  )
105
 
 
106
  with gr.Row():
107
  generate_button = gr.Button('Generate')
108
 
109
- # Display the generated text and audio side by side
110
- with gr.Row():
111
  output_text = gr.Textbox(
112
  label='Generated Text',
113
  interactive=False,
114
- lines=12,
115
- max_lines=24,
116
- scale=2,
117
  )
118
- with gr.Column(scale=1):
119
- hume_audio_output = gr.Audio(label='Hume TTS Audio', type='filepath')
120
- elevenlabs_audio_output = gr.Audio(label='ElevenLabs TTS Audio', type='filepath')
121
 
122
- # Auto-fill the text input when a sample is selected
 
 
 
 
 
 
 
 
123
  sample_prompt_dropdown.change(
124
- fn=lambda choice: SAMPLE_PROMPTS[choice] if choice else "",
125
  inputs=[sample_prompt_dropdown],
126
  outputs=[prompt_input],
127
  )
128
 
129
- # Attach the validation, text generation, and TTS processing logic
130
  generate_button.click(
131
- fn=process_prompt,
132
- inputs=prompt_input,
133
- outputs=[output_text, hume_audio_output, elevenlabs_audio_output],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  )
135
 
136
  logger.debug('Gradio interface built successfully')
 
4
  This file defines the Gradio user interface for interacting with the Anthropic API, Hume TTS API, and ElevenLabs TTS API.
5
  Users can input prompts, which are processed to generate text using the Claude model via the Anthropic API.
6
  The generated text is then converted to audio using both Hume and ElevenLabs TTS APIs, allowing playback in the Gradio UI.
 
 
 
 
 
 
 
 
 
 
7
  """
8
+
9
  # Standard Library Imports
10
  from concurrent.futures import ThreadPoolExecutor
11
+ import random
12
  # Third-Party Library Imports
13
  import gradio as gr
14
  # Local Application Imports
15
  from src.config import logger
16
+ from src.constants import PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH, SAMPLE_PROMPTS
17
  from src.integrations import generate_text_with_claude, text_to_speech_with_hume, text_to_speech_with_elevenlabs
 
18
  from src.utils import truncate_text, validate_prompt_length
19
 
20
 
21
+ def process_prompt(prompt: str):
 
 
 
 
 
22
  """
23
+ Processes the user input by generating text using Claude API, then converting
24
+ the generated text to speech using both Hume and ElevenLabs TTS APIs.
25
 
26
  Args:
27
  prompt (str): The user's input prompt.
28
 
29
  Returns:
30
+ tuple: Generated text, two audio paths (Hume & ElevenLabs), and a mapping
31
+ of audio options to their respective TTS providers.
32
  """
33
  logger.info(f'Processing prompt: {truncate_text(prompt, max_length=100)}')
34
+
35
  try:
36
+ # Validate prompt length
37
  validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
38
 
39
+ # Generate text
40
  generated_text = generate_text_with_claude(prompt)
41
+ logger.info(f'Generated text successfully (length={len(generated_text)} characters).')
42
 
43
+ # Run TTS generation in parallel
44
  with ThreadPoolExecutor(max_workers=2) as executor:
45
  hume_future = executor.submit(text_to_speech_with_hume, prompt, generated_text)
46
  elevenlabs_future = executor.submit(text_to_speech_with_elevenlabs, generated_text)
47
 
48
+ # Retrieve results
49
  hume_audio = hume_future.result()
50
  elevenlabs_audio = elevenlabs_future.result()
51
 
52
+ logger.info(
53
+ f'TTS audio generated: Hume={len(hume_audio)} bytes, '
54
+ f'ElevenLabs={len(elevenlabs_audio)} bytes'
55
+ )
56
+
57
+ # Randomly assign audio options
58
+ audio_options = [
59
+ (hume_audio, 'Hume TTS'),
60
+ (elevenlabs_audio, 'ElevenLabs TTS'),
61
+ ]
62
+ random.shuffle(audio_options)
63
+
64
+ option1_audio, option1_provider = audio_options[0]
65
+ option2_audio, option2_provider = audio_options[1]
66
+
67
+ return generated_text, option1_audio, option2_audio, {
68
+ 'Option 1': option1_provider,
69
+ 'Option 2': option2_provider,
70
+ }
71
 
72
  except ValueError as ve:
73
  logger.warning(f'Validation error: {ve}')
74
+ return str(ve), None, None, {}
75
+
76
  except Exception as e:
77
  logger.error(f'Unexpected error during processing: {e}')
78
+ return 'An unexpected error occurred. Please try again.', None, None, {}
79
+
80
+
81
+ def run_process_prompt(prompt: str):
82
+ """
83
+ Handles the UI state transitions while processing a prompt.
84
+
85
+ Args:
86
+ prompt (str): The user's input prompt.
87
+
88
+ Yields:
89
+ tuple: Updates to the UI elements in three stages:
90
+ 1. Disabling UI and clearing previous outputs.
91
+ 2. Displaying generated content.
92
+ 3. Re-enabling UI after generation completes.
93
+ """
94
+ # Stage 1: Disable UI and clear previous outputs
95
+ yield (
96
+ gr.update(interactive=False), # Disable Generate Button
97
+ gr.update(value=None), # Clear generated text
98
+ gr.update(value=None), # Clear Option 1 audio
99
+ gr.update(value=None), # Clear Option 2 audio
100
+ gr.update(value=None), # Clear option mapping
101
+ None, # Reset Option 2 audio state
102
+ )
103
+
104
+ # Process the prompt
105
+ generated_text, option1_audio, option2_audio, option_mapping = process_prompt(prompt)
106
+
107
+ # Stage 2: Display generated text and first audio (autoplay)
108
+ yield (
109
+ gr.update(interactive=True), # Enable Generate Button
110
+ gr.update(value=generated_text), # Show generated text
111
+ gr.update(value=option1_audio, autoplay=True), # Set Option 1 audio
112
+ gr.update(value=option2_audio), # Set Option 2 audio
113
+ gr.update(value=option_mapping), # Store option mapping
114
+ option2_audio, # Store Option 2 audio
115
+ )
116
 
117
 
118
  def build_gradio_interface() -> gr.Blocks:
119
  """
120
+ Constructs the Gradio user interface.
121
 
122
  Returns:
123
+ gr.Blocks: The Gradio Blocks-based UI.
124
  """
125
  with gr.Blocks() as demo:
126
+ # UI title & instructions
127
+ gr.Markdown('# TTS Arena')
128
  gr.Markdown(
129
  'Generate text from a prompt using **Claude by Anthropic**, '
130
+ 'and compare text-to-speech outputs from **Hume TTS API** and **ElevenLabs TTS API**.'
 
131
  )
132
 
133
+ # Prompt selection
134
  with gr.Row():
 
135
  sample_prompt_dropdown = gr.Dropdown(
136
  choices=list(SAMPLE_PROMPTS.keys()),
137
+ label='Choose a sample prompt (or enter your own below)',
138
  value=None,
139
+ interactive=True,
140
  )
141
 
142
+ # Prompt input
143
  with gr.Row():
 
144
  prompt_input = gr.Textbox(
145
  label='Enter your prompt',
146
  placeholder='Or type your own prompt here...',
147
  lines=2,
148
+ max_lines=2
149
  )
150
 
151
+ # Generate button
152
  with gr.Row():
153
  generate_button = gr.Button('Generate')
154
 
155
+ # Output section
156
+ with gr.Column():
157
  output_text = gr.Textbox(
158
  label='Generated Text',
159
  interactive=False,
160
+ lines=8,
161
+ max_lines=12,
 
162
  )
 
 
 
163
 
164
+ with gr.Row():
165
+ option1_audio_player = gr.Audio(label='Option 1', type='filepath', interactive=False)
166
+ option2_audio_player = gr.Audio(label='Option 2', type='filepath', interactive=False)
167
+
168
+ # UI state components
169
+ option_mapping_state = gr.State()
170
+ option2_audio_state = gr.State()
171
+
172
+ # Event handlers
173
  sample_prompt_dropdown.change(
174
+ fn=lambda choice: SAMPLE_PROMPTS.get(choice, ""),
175
  inputs=[sample_prompt_dropdown],
176
  outputs=[prompt_input],
177
  )
178
 
 
179
  generate_button.click(
180
+ fn=run_process_prompt,
181
+ inputs=[prompt_input],
182
+ outputs=[
183
+ generate_button,
184
+ output_text,
185
+ option1_audio_player,
186
+ option2_audio_player,
187
+ option_mapping_state,
188
+ option2_audio_state,
189
+ ],
190
+ )
191
+
192
+ # Auto-play second audio after first completes
193
+ option1_audio_player.stop(
194
+ fn=lambda _: gr.update(value=None), # Reset first audio before playing second
195
+ inputs=[option1_audio_player],
196
+ outputs=[option2_audio_player],
197
+ ).then(
198
+ fn=lambda option2_audio: gr.update(value=option2_audio, autoplay=True),
199
+ inputs=[option2_audio_state],
200
+ outputs=[option2_audio_player],
201
  )
202
 
203
  logger.debug('Gradio interface built successfully')