zach commited on
Commit
f5237ec
·
1 Parent(s): 4ea25cd

Cleans up code in app.py

Browse files
Files changed (1) hide show
  1. src/app.py +65 -70
src/app.py CHANGED
@@ -1,13 +1,16 @@
1
  """
2
  app.py
3
 
4
- This file defines the Gradio user interface for interacting with the Anthropic API, Hume TTS API, and ElevenLabs TTS API.
5
- Users can input prompts, which are processed to generate text using the Claude model via the Anthropic API.
6
- The generated text is then converted to audio using both Hume and ElevenLabs TTS APIs, allowing playback in the Gradio UI.
 
 
7
  """
8
 
9
  # Standard Library Imports
10
  from concurrent.futures import ThreadPoolExecutor
 
11
  import random
12
  # Third-Party Library Imports
13
  import gradio as gr
@@ -20,15 +23,14 @@ from src.utils import truncate_text, validate_prompt_length
20
 
21
  def process_prompt(prompt: str):
22
  """
23
- Processes the user input by generating text using Claude API, then converting
24
- the generated text to speech using both Hume and ElevenLabs TTS APIs.
25
 
26
  Args:
27
- prompt (str): The user's input prompt.
28
 
29
  Returns:
30
- tuple: Generated text, two audio paths (Hume & ElevenLabs), and a mapping
31
- of audio options to their respective TTS providers.
32
  """
33
  logger.info(f'Processing prompt: {truncate_text(prompt, max_length=100)}')
34
 
@@ -38,80 +40,75 @@ def process_prompt(prompt: str):
38
 
39
  # Generate text
40
  generated_text = generate_text_with_claude(prompt)
41
- logger.info(f'Generated text successfully (length={len(generated_text)} characters).')
42
 
43
- # Run TTS generation in parallel
44
  with ThreadPoolExecutor(max_workers=2) as executor:
45
- hume_future = executor.submit(text_to_speech_with_hume, prompt, generated_text)
46
- elevenlabs_future = executor.submit(text_to_speech_with_elevenlabs, generated_text)
47
-
48
- # Retrieve results
49
- hume_audio = hume_future.result()
50
- elevenlabs_audio = elevenlabs_future.result()
51
 
52
  logger.info(
53
- f'TTS audio generated: Hume={len(hume_audio)} bytes, '
54
  f'ElevenLabs={len(elevenlabs_audio)} bytes'
55
  )
56
 
57
- # Randomly assign audio options
58
- audio_options = [
59
- (hume_audio, 'Hume TTS'),
60
- (elevenlabs_audio, 'ElevenLabs TTS'),
61
- ]
62
- random.shuffle(audio_options)
63
 
64
- option1_audio, option1_provider = audio_options[0]
65
- option2_audio, option2_provider = audio_options[1]
66
-
67
- return generated_text, option1_audio, option2_audio, {
68
- 'Option 1': option1_provider,
69
- 'Option 2': option2_provider,
70
- }
71
 
72
  except ValueError as ve:
73
  logger.warning(f'Validation error: {ve}')
74
  return str(ve), None, None, {}
75
 
76
  except Exception as e:
77
- logger.error(f'Unexpected error during processing: {e}')
78
- return 'An unexpected error occurred. Please try again.', None, None, {}
79
 
80
 
81
  def run_process_prompt(prompt: str):
82
  """
83
- Handles the UI state transitions while processing a prompt.
84
 
85
  Args:
86
- prompt (str): The user's input prompt.
87
 
88
  Yields:
89
- tuple: Updates to the UI elements in three stages:
90
- 1. Disabling UI and clearing previous outputs.
91
- 2. Displaying generated content.
92
- 3. Re-enabling UI after generation completes.
93
  """
94
- # Stage 1: Disable UI and clear previous outputs
95
  yield (
96
- gr.update(interactive=False), # Disable Generate Button
97
- gr.update(value=None), # Clear generated text
98
- gr.update(value=None), # Clear Option 1 audio
99
- gr.update(value=None), # Clear Option 2 audio
100
- gr.update(value=None), # Clear option mapping
101
- None, # Reset Option 2 audio state
102
  )
103
 
104
  # Process the prompt
105
  generated_text, option1_audio, option2_audio, option_mapping = process_prompt(prompt)
106
 
107
- # Stage 2: Display generated text and first audio (autoplay)
108
  yield (
109
- gr.update(interactive=True), # Enable Generate Button
110
- gr.update(value=generated_text), # Show generated text
111
- gr.update(value=option1_audio, autoplay=True), # Set Option 1 audio
112
- gr.update(value=option2_audio), # Set Option 2 audio
113
- gr.update(value=option_mapping), # Store option mapping
114
- option2_audio, # Store Option 2 audio
115
  )
116
 
117
 
@@ -120,39 +117,37 @@ def build_gradio_interface() -> gr.Blocks:
120
  Constructs the Gradio user interface.
121
 
122
  Returns:
123
- gr.Blocks: The Gradio Blocks-based UI.
124
  """
125
  with gr.Blocks() as demo:
126
- # UI title & instructions
127
  gr.Markdown('# TTS Arena')
128
  gr.Markdown(
129
- 'Generate text from a prompt using **Claude by Anthropic**, '
130
- 'and compare text-to-speech outputs from **Hume TTS API** and **ElevenLabs TTS API**.'
131
  )
132
 
133
- # Prompt selection
134
  with gr.Row():
135
  sample_prompt_dropdown = gr.Dropdown(
136
  choices=list(SAMPLE_PROMPTS.keys()),
137
- label='Choose a sample prompt (or enter your own below)',
138
  value=None,
139
  interactive=True,
140
  )
141
 
142
- # Prompt input
143
  with gr.Row():
144
  prompt_input = gr.Textbox(
145
  label='Enter your prompt',
146
- placeholder='Or type your own prompt here...',
147
  lines=2,
148
- max_lines=2
149
  )
150
 
151
- # Generate button
152
- with gr.Row():
153
- generate_button = gr.Button('Generate')
154
 
155
- # Output section
156
  with gr.Column():
157
  output_text = gr.Textbox(
158
  label='Generated Text',
@@ -163,7 +158,7 @@ def build_gradio_interface() -> gr.Blocks:
163
 
164
  with gr.Row():
165
  option1_audio_player = gr.Audio(label='Option 1', type='filepath', interactive=False)
166
- option2_audio_player = gr.Audio(label='Option 2', type='filepath', interactive=False)
167
 
168
  # UI state components
169
  option_mapping_state = gr.State()
@@ -189,13 +184,13 @@ def build_gradio_interface() -> gr.Blocks:
189
  ],
190
  )
191
 
192
- # Auto-play second audio after first completes
193
  option1_audio_player.stop(
194
- fn=lambda _: gr.update(value=None), # Reset first audio before playing second
195
- inputs=[option1_audio_player],
196
  outputs=[option2_audio_player],
197
  ).then(
198
- fn=lambda option2_audio: gr.update(value=option2_audio, autoplay=True),
199
  inputs=[option2_audio_state],
200
  outputs=[option2_audio_player],
201
  )
 
1
  """
2
  app.py
3
 
4
+ Gradio UI for interacting with the Anthropic API, Hume TTS API, and ElevenLabs TTS API.
5
+
6
+ Users enter a prompt, which is processed using Claude by Anthropic to generate text.
7
+ The text is then converted into speech using both Hume and ElevenLabs TTS APIs.
8
+ Users can compare the outputs in an interactive UI.
9
  """
10
 
11
  # Standard Library Imports
12
  from concurrent.futures import ThreadPoolExecutor
13
+ from functools import partial
14
  import random
15
  # Third-Party Library Imports
16
  import gradio as gr
 
23
 
24
  def process_prompt(prompt: str):
25
  """
26
+ Generates text from Claude API and converts it to speech using Hume and ElevenLabs.
 
27
 
28
  Args:
29
+ prompt (str): User-provided text prompt.
30
 
31
  Returns:
32
+ tuple: Generated text, two audio file paths (Hume & ElevenLabs), and
33
+ a dictionary mapping audio options to providers.
34
  """
35
  logger.info(f'Processing prompt: {truncate_text(prompt, max_length=100)}')
36
 
 
40
 
41
  # Generate text
42
  generated_text = generate_text_with_claude(prompt)
43
+ logger.info(f'Generated text ({len(generated_text)} characters).')
44
 
45
+ # Generate TTS output in parallel
46
  with ThreadPoolExecutor(max_workers=2) as executor:
47
+ hume_audio, elevenlabs_audio = executor.map(
48
+ lambda func: func(),
49
+ [partial(text_to_speech_with_hume, prompt, generated_text),
50
+ partial(text_to_speech_with_elevenlabs, generated_text)]
51
+ )
 
52
 
53
  logger.info(
54
+ f'TTS generated: Hume={len(hume_audio)} bytes, '
55
  f'ElevenLabs={len(elevenlabs_audio)} bytes'
56
  )
57
 
58
+ # Randomize audio order
59
+ options = [(hume_audio, 'Hume TTS'), (elevenlabs_audio, 'ElevenLabs TTS')]
60
+ random.shuffle(options)
 
 
 
61
 
62
+ return (
63
+ generated_text,
64
+ options[0][0], # Option 1 audio
65
+ options[1][0], # Option 2 audio
66
+ {'Option 1': options[0][1], 'Option 2': options[1][1]}, # Mapping
67
+ )
 
68
 
69
  except ValueError as ve:
70
  logger.warning(f'Validation error: {ve}')
71
  return str(ve), None, None, {}
72
 
73
  except Exception as e:
74
+ logger.error(f'Unexpected error: {e}')
75
+ return 'An error occurred. Please try again.', None, None, {}
76
 
77
 
78
  def run_process_prompt(prompt: str):
79
  """
80
+ Manages UI state while processing a prompt.
81
 
82
  Args:
83
+ prompt (str): User input prompt.
84
 
85
  Yields:
86
+ tuple: UI state updates in three stages:
87
+ 1. Disables UI and clears previous outputs.
88
+ 2. Displays generated content.
89
+ 3. Re-enables UI after processing.
90
  """
91
+ # Disable UI, clear previous outputs
92
  yield (
93
+ gr.update(interactive=False),
94
+ gr.update(value=None),
95
+ gr.update(value=None),
96
+ gr.update(value=None),
97
+ gr.update(value=None),
98
+ None,
99
  )
100
 
101
  # Process the prompt
102
  generated_text, option1_audio, option2_audio, option_mapping = process_prompt(prompt)
103
 
104
+ # Display generated text and audio
105
  yield (
106
+ gr.update(interactive=True),
107
+ gr.update(value=generated_text),
108
+ gr.update(value=option1_audio, autoplay=True),
109
+ gr.update(value=option2_audio),
110
+ gr.update(value=option_mapping),
111
+ option2_audio,
112
  )
113
 
114
 
 
117
  Constructs the Gradio user interface.
118
 
119
  Returns:
120
+ gr.Blocks: The Gradio UI layout.
121
  """
122
  with gr.Blocks() as demo:
123
+ # Title and instructions
124
  gr.Markdown('# TTS Arena')
125
  gr.Markdown(
126
+ 'Generate text using **Claude by Anthropic**, then compare text-to-speech outputs '
127
+ 'from **Hume TTS API** and **ElevenLabs TTS API**.'
128
  )
129
 
130
+ # Input: Sample prompt selection & textbox
131
  with gr.Row():
132
  sample_prompt_dropdown = gr.Dropdown(
133
  choices=list(SAMPLE_PROMPTS.keys()),
134
+ label='Choose a sample prompt (or enter your own)',
135
  value=None,
136
  interactive=True,
137
  )
138
 
 
139
  with gr.Row():
140
  prompt_input = gr.Textbox(
141
  label='Enter your prompt',
142
+ placeholder='Or type your own...',
143
  lines=2,
144
+ max_lines=2,
145
  )
146
 
147
+ # Generate Button
148
+ generate_button = gr.Button('Generate')
 
149
 
150
+ # Output: Text & audio
151
  with gr.Column():
152
  output_text = gr.Textbox(
153
  label='Generated Text',
 
158
 
159
  with gr.Row():
160
  option1_audio_player = gr.Audio(label='Option 1', type='filepath', interactive=False)
161
+ option2_audio_player = gr.Audio(label='Option 2', type='filepath', interactive=False)
162
 
163
  # UI state components
164
  option_mapping_state = gr.State()
 
184
  ],
185
  )
186
 
187
+ # Auto-play second audio after first finishes
188
  option1_audio_player.stop(
189
+ fn=lambda _: gr.update(value=None), # Reset first audio before playing second
190
+ inputs=[],
191
  outputs=[option2_audio_player],
192
  ).then(
193
+ fn=lambda audio: gr.update(value=audio, autoplay=True),
194
  inputs=[option2_audio_state],
195
  outputs=[option2_audio_player],
196
  )