Spaces:
Running
Running
zach
commited on
Commit
·
4ea25cd
1
Parent(s):
a3fdb3c
Update UI to disable generate button during generation and anonymize tts output options
Browse files- src/app.py +119 -52
src/app.py
CHANGED
@@ -4,133 +4,200 @@ app.py
|
|
4 |
This file defines the Gradio user interface for interacting with the Anthropic API, Hume TTS API, and ElevenLabs TTS API.
|
5 |
Users can input prompts, which are processed to generate text using the Claude model via the Anthropic API.
|
6 |
The generated text is then converted to audio using both Hume and ElevenLabs TTS APIs, allowing playback in the Gradio UI.
|
7 |
-
|
8 |
-
Key Features:
|
9 |
-
- Gradio interface for user interaction.
|
10 |
-
- Input validation via prompt length constraints.
|
11 |
-
- Integration with the Anthropic, Hume, and ElevenLabs APIs.
|
12 |
-
- Playback support for TTS audio responses.
|
13 |
-
|
14 |
-
Functions:
|
15 |
-
- process_prompt: Handles user input, calls the Anthropic and Hume APIs, and returns generated text and audio.
|
16 |
-
- build_gradio_interface: Constructs the Gradio Blocks-based interface.
|
17 |
"""
|
|
|
18 |
# Standard Library Imports
|
19 |
from concurrent.futures import ThreadPoolExecutor
|
|
|
20 |
# Third-Party Library Imports
|
21 |
import gradio as gr
|
22 |
# Local Application Imports
|
23 |
from src.config import logger
|
|
|
24 |
from src.integrations import generate_text_with_claude, text_to_speech_with_hume, text_to_speech_with_elevenlabs
|
25 |
-
from src.sample_prompts import SAMPLE_PROMPTS
|
26 |
from src.utils import truncate_text, validate_prompt_length
|
27 |
|
28 |
|
29 |
-
|
30 |
-
PROMPT_MIN_LENGTH: int = 10
|
31 |
-
PROMPT_MAX_LENGTH: int = 300
|
32 |
-
|
33 |
-
|
34 |
-
def process_prompt(prompt: str) -> str:
|
35 |
"""
|
36 |
-
|
37 |
-
|
38 |
|
39 |
Args:
|
40 |
prompt (str): The user's input prompt.
|
41 |
|
42 |
Returns:
|
43 |
-
tuple:
|
|
|
44 |
"""
|
45 |
logger.info(f'Processing prompt: {truncate_text(prompt, max_length=100)}')
|
|
|
46 |
try:
|
47 |
-
# Validate prompt length
|
48 |
validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
|
49 |
|
50 |
-
# Generate text
|
51 |
generated_text = generate_text_with_claude(prompt)
|
52 |
-
logger.info(f'Generated text (length={len(generated_text)} characters).')
|
53 |
|
54 |
-
# Run TTS
|
55 |
with ThreadPoolExecutor(max_workers=2) as executor:
|
56 |
hume_future = executor.submit(text_to_speech_with_hume, prompt, generated_text)
|
57 |
elevenlabs_future = executor.submit(text_to_speech_with_elevenlabs, generated_text)
|
58 |
|
59 |
-
#
|
60 |
hume_audio = hume_future.result()
|
61 |
elevenlabs_audio = elevenlabs_future.result()
|
62 |
|
63 |
-
logger.info(
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
except ValueError as ve:
|
67 |
logger.warning(f'Validation error: {ve}')
|
68 |
-
return str(ve), None, None
|
|
|
69 |
except Exception as e:
|
70 |
logger.error(f'Unexpected error during processing: {e}')
|
71 |
-
return 'An unexpected error occurred. Please try again.', None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
|
74 |
def build_gradio_interface() -> gr.Blocks:
|
75 |
"""
|
76 |
-
|
77 |
|
78 |
Returns:
|
79 |
-
gr.Blocks: The Gradio Blocks
|
80 |
"""
|
81 |
with gr.Blocks() as demo:
|
82 |
-
|
|
|
83 |
gr.Markdown(
|
84 |
'Generate text from a prompt using **Claude by Anthropic**, '
|
85 |
-
'and
|
86 |
-
'and **ElevenLabs TTS API** for comparison.'
|
87 |
)
|
88 |
|
|
|
89 |
with gr.Row():
|
90 |
-
# Dropdown for predefined prompts
|
91 |
sample_prompt_dropdown = gr.Dropdown(
|
92 |
choices=list(SAMPLE_PROMPTS.keys()),
|
93 |
-
label='Choose a
|
94 |
value=None,
|
95 |
-
interactive=True
|
96 |
)
|
97 |
|
|
|
98 |
with gr.Row():
|
99 |
-
# Custom prompt input
|
100 |
prompt_input = gr.Textbox(
|
101 |
label='Enter your prompt',
|
102 |
placeholder='Or type your own prompt here...',
|
103 |
lines=2,
|
|
|
104 |
)
|
105 |
|
|
|
106 |
with gr.Row():
|
107 |
generate_button = gr.Button('Generate')
|
108 |
|
109 |
-
#
|
110 |
-
with gr.
|
111 |
output_text = gr.Textbox(
|
112 |
label='Generated Text',
|
113 |
interactive=False,
|
114 |
-
lines=
|
115 |
-
max_lines=
|
116 |
-
scale=2,
|
117 |
)
|
118 |
-
with gr.Column(scale=1):
|
119 |
-
hume_audio_output = gr.Audio(label='Hume TTS Audio', type='filepath')
|
120 |
-
elevenlabs_audio_output = gr.Audio(label='ElevenLabs TTS Audio', type='filepath')
|
121 |
|
122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
sample_prompt_dropdown.change(
|
124 |
-
fn=lambda choice: SAMPLE_PROMPTS
|
125 |
inputs=[sample_prompt_dropdown],
|
126 |
outputs=[prompt_input],
|
127 |
)
|
128 |
|
129 |
-
# Attach the validation, text generation, and TTS processing logic
|
130 |
generate_button.click(
|
131 |
-
fn=
|
132 |
-
inputs=prompt_input,
|
133 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
)
|
135 |
|
136 |
logger.debug('Gradio interface built successfully')
|
|
|
4 |
This file defines the Gradio user interface for interacting with the Anthropic API, Hume TTS API, and ElevenLabs TTS API.
|
5 |
Users can input prompts, which are processed to generate text using the Claude model via the Anthropic API.
|
6 |
The generated text is then converted to audio using both Hume and ElevenLabs TTS APIs, allowing playback in the Gradio UI.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
"""
|
8 |
+
|
9 |
# Standard Library Imports
|
10 |
from concurrent.futures import ThreadPoolExecutor
|
11 |
+
import random
|
12 |
# Third-Party Library Imports
|
13 |
import gradio as gr
|
14 |
# Local Application Imports
|
15 |
from src.config import logger
|
16 |
+
from src.constants import PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH, SAMPLE_PROMPTS
|
17 |
from src.integrations import generate_text_with_claude, text_to_speech_with_hume, text_to_speech_with_elevenlabs
|
|
|
18 |
from src.utils import truncate_text, validate_prompt_length
|
19 |
|
20 |
|
21 |
+
def process_prompt(prompt: str):
|
|
|
|
|
|
|
|
|
|
|
22 |
"""
|
23 |
+
Processes the user input by generating text using Claude API, then converting
|
24 |
+
the generated text to speech using both Hume and ElevenLabs TTS APIs.
|
25 |
|
26 |
Args:
|
27 |
prompt (str): The user's input prompt.
|
28 |
|
29 |
Returns:
|
30 |
+
tuple: Generated text, two audio paths (Hume & ElevenLabs), and a mapping
|
31 |
+
of audio options to their respective TTS providers.
|
32 |
"""
|
33 |
logger.info(f'Processing prompt: {truncate_text(prompt, max_length=100)}')
|
34 |
+
|
35 |
try:
|
36 |
+
# Validate prompt length
|
37 |
validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
|
38 |
|
39 |
+
# Generate text
|
40 |
generated_text = generate_text_with_claude(prompt)
|
41 |
+
logger.info(f'Generated text successfully (length={len(generated_text)} characters).')
|
42 |
|
43 |
+
# Run TTS generation in parallel
|
44 |
with ThreadPoolExecutor(max_workers=2) as executor:
|
45 |
hume_future = executor.submit(text_to_speech_with_hume, prompt, generated_text)
|
46 |
elevenlabs_future = executor.submit(text_to_speech_with_elevenlabs, generated_text)
|
47 |
|
48 |
+
# Retrieve results
|
49 |
hume_audio = hume_future.result()
|
50 |
elevenlabs_audio = elevenlabs_future.result()
|
51 |
|
52 |
+
logger.info(
|
53 |
+
f'TTS audio generated: Hume={len(hume_audio)} bytes, '
|
54 |
+
f'ElevenLabs={len(elevenlabs_audio)} bytes'
|
55 |
+
)
|
56 |
+
|
57 |
+
# Randomly assign audio options
|
58 |
+
audio_options = [
|
59 |
+
(hume_audio, 'Hume TTS'),
|
60 |
+
(elevenlabs_audio, 'ElevenLabs TTS'),
|
61 |
+
]
|
62 |
+
random.shuffle(audio_options)
|
63 |
+
|
64 |
+
option1_audio, option1_provider = audio_options[0]
|
65 |
+
option2_audio, option2_provider = audio_options[1]
|
66 |
+
|
67 |
+
return generated_text, option1_audio, option2_audio, {
|
68 |
+
'Option 1': option1_provider,
|
69 |
+
'Option 2': option2_provider,
|
70 |
+
}
|
71 |
|
72 |
except ValueError as ve:
|
73 |
logger.warning(f'Validation error: {ve}')
|
74 |
+
return str(ve), None, None, {}
|
75 |
+
|
76 |
except Exception as e:
|
77 |
logger.error(f'Unexpected error during processing: {e}')
|
78 |
+
return 'An unexpected error occurred. Please try again.', None, None, {}
|
79 |
+
|
80 |
+
|
81 |
+
def run_process_prompt(prompt: str):
|
82 |
+
"""
|
83 |
+
Handles the UI state transitions while processing a prompt.
|
84 |
+
|
85 |
+
Args:
|
86 |
+
prompt (str): The user's input prompt.
|
87 |
+
|
88 |
+
Yields:
|
89 |
+
tuple: Updates to the UI elements in three stages:
|
90 |
+
1. Disabling UI and clearing previous outputs.
|
91 |
+
2. Displaying generated content.
|
92 |
+
3. Re-enabling UI after generation completes.
|
93 |
+
"""
|
94 |
+
# Stage 1: Disable UI and clear previous outputs
|
95 |
+
yield (
|
96 |
+
gr.update(interactive=False), # Disable Generate Button
|
97 |
+
gr.update(value=None), # Clear generated text
|
98 |
+
gr.update(value=None), # Clear Option 1 audio
|
99 |
+
gr.update(value=None), # Clear Option 2 audio
|
100 |
+
gr.update(value=None), # Clear option mapping
|
101 |
+
None, # Reset Option 2 audio state
|
102 |
+
)
|
103 |
+
|
104 |
+
# Process the prompt
|
105 |
+
generated_text, option1_audio, option2_audio, option_mapping = process_prompt(prompt)
|
106 |
+
|
107 |
+
# Stage 2: Display generated text and first audio (autoplay)
|
108 |
+
yield (
|
109 |
+
gr.update(interactive=True), # Enable Generate Button
|
110 |
+
gr.update(value=generated_text), # Show generated text
|
111 |
+
gr.update(value=option1_audio, autoplay=True), # Set Option 1 audio
|
112 |
+
gr.update(value=option2_audio), # Set Option 2 audio
|
113 |
+
gr.update(value=option_mapping), # Store option mapping
|
114 |
+
option2_audio, # Store Option 2 audio
|
115 |
+
)
|
116 |
|
117 |
|
118 |
def build_gradio_interface() -> gr.Blocks:
|
119 |
"""
|
120 |
+
Constructs the Gradio user interface.
|
121 |
|
122 |
Returns:
|
123 |
+
gr.Blocks: The Gradio Blocks-based UI.
|
124 |
"""
|
125 |
with gr.Blocks() as demo:
|
126 |
+
# UI title & instructions
|
127 |
+
gr.Markdown('# TTS Arena')
|
128 |
gr.Markdown(
|
129 |
'Generate text from a prompt using **Claude by Anthropic**, '
|
130 |
+
'and compare text-to-speech outputs from **Hume TTS API** and **ElevenLabs TTS API**.'
|
|
|
131 |
)
|
132 |
|
133 |
+
# Prompt selection
|
134 |
with gr.Row():
|
|
|
135 |
sample_prompt_dropdown = gr.Dropdown(
|
136 |
choices=list(SAMPLE_PROMPTS.keys()),
|
137 |
+
label='Choose a sample prompt (or enter your own below)',
|
138 |
value=None,
|
139 |
+
interactive=True,
|
140 |
)
|
141 |
|
142 |
+
# Prompt input
|
143 |
with gr.Row():
|
|
|
144 |
prompt_input = gr.Textbox(
|
145 |
label='Enter your prompt',
|
146 |
placeholder='Or type your own prompt here...',
|
147 |
lines=2,
|
148 |
+
max_lines=2
|
149 |
)
|
150 |
|
151 |
+
# Generate button
|
152 |
with gr.Row():
|
153 |
generate_button = gr.Button('Generate')
|
154 |
|
155 |
+
# Output section
|
156 |
+
with gr.Column():
|
157 |
output_text = gr.Textbox(
|
158 |
label='Generated Text',
|
159 |
interactive=False,
|
160 |
+
lines=8,
|
161 |
+
max_lines=12,
|
|
|
162 |
)
|
|
|
|
|
|
|
163 |
|
164 |
+
with gr.Row():
|
165 |
+
option1_audio_player = gr.Audio(label='Option 1', type='filepath', interactive=False)
|
166 |
+
option2_audio_player = gr.Audio(label='Option 2', type='filepath', interactive=False)
|
167 |
+
|
168 |
+
# UI state components
|
169 |
+
option_mapping_state = gr.State()
|
170 |
+
option2_audio_state = gr.State()
|
171 |
+
|
172 |
+
# Event handlers
|
173 |
sample_prompt_dropdown.change(
|
174 |
+
fn=lambda choice: SAMPLE_PROMPTS.get(choice, ""),
|
175 |
inputs=[sample_prompt_dropdown],
|
176 |
outputs=[prompt_input],
|
177 |
)
|
178 |
|
|
|
179 |
generate_button.click(
|
180 |
+
fn=run_process_prompt,
|
181 |
+
inputs=[prompt_input],
|
182 |
+
outputs=[
|
183 |
+
generate_button,
|
184 |
+
output_text,
|
185 |
+
option1_audio_player,
|
186 |
+
option2_audio_player,
|
187 |
+
option_mapping_state,
|
188 |
+
option2_audio_state,
|
189 |
+
],
|
190 |
+
)
|
191 |
+
|
192 |
+
# Auto-play second audio after first completes
|
193 |
+
option1_audio_player.stop(
|
194 |
+
fn=lambda _: gr.update(value=None), # Reset first audio before playing second
|
195 |
+
inputs=[option1_audio_player],
|
196 |
+
outputs=[option2_audio_player],
|
197 |
+
).then(
|
198 |
+
fn=lambda option2_audio: gr.update(value=option2_audio, autoplay=True),
|
199 |
+
inputs=[option2_audio_state],
|
200 |
+
outputs=[option2_audio_player],
|
201 |
)
|
202 |
|
203 |
logger.debug('Gradio interface built successfully')
|