Spaces:
Running
Running
File size: 8,828 Bytes
3ce989d f5237ec 3ce989d 4ea25cd 136ff40 f5237ec 4ea25cd 3ce989d a807c4d afdac46 2369fc5 a807c4d 3ce989d 6506ee8 3ce989d 6506ee8 3ce989d f5237ec 3ce989d 6506ee8 3ce989d 4ea25cd 3ce989d adecb62 6506ee8 4ea25cd 3ce989d f5237ec adecb62 6506ee8 f5237ec 136ff40 f5237ec adecb62 4ea25cd afdac46 4ea25cd f5237ec afdac46 f5237ec 4ea25cd 6506ee8 f5237ec 6506ee8 f5237ec adecb62 3ce989d f5237ec 6506ee8 afdac46 0dde48b afdac46 0dde48b afdac46 0dde48b afdac46 6506ee8 4ea25cd 3ce989d 4ea25cd 3ce989d f5237ec 3ce989d 2369fc5 6506ee8 60bcef1 afdac46 3ce989d 60bcef1 afdac46 6506ee8 afdac46 6506ee8 3ce989d f5237ec 60bcef1 3ce989d 60bcef1 6506ee8 e9bcee8 adecb62 afdac46 adecb62 3ce989d 6506ee8 4ea25cd 6506ee8 afdac46 6506ee8 4ea25cd 6506ee8 4ea25cd 96154e7 60bcef1 96154e7 3ce989d 6506ee8 4ea25cd 6506ee8 4ea25cd afdac46 f5237ec 4ea25cd afdac46 f5237ec 4ea25cd afdac46 4ea25cd 3ce989d afdac46 e9bcee8 3ce989d e9bcee8 3ce989d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 |
"""
app.py
Gradio UI for interacting with the Anthropic API, Hume TTS API, and ElevenLabs TTS API.
Users enter a prompt, which is processed using Claude by Anthropic to generate text.
The text is then converted into speech using both Hume and ElevenLabs TTS APIs.
Users can compare the outputs in an interactive UI.
"""
# Standard Library Imports
from concurrent.futures import ThreadPoolExecutor
from functools import partial
import random
# Third-Party Library Imports
import gradio as gr
# Local Application Imports
from src.config import logger
from src.constants import (
OPTION_ONE,
OPTION_TWO,
VOTE_FOR_OPTION_ONE,
VOTE_FOR_OPTION_TWO,
PROMPT_MAX_LENGTH,
PROMPT_MIN_LENGTH,
SAMPLE_PROMPTS
)
from src.integrations import (
generate_text_with_claude,
text_to_speech_with_hume,
text_to_speech_with_elevenlabs
)
from src.theme import CustomTheme
from src.utils import truncate_text, validate_prompt_length
def generate_text(prompt: str):
"""
Generates text from Claude API.
Args:
prompt (str): User-provided text prompt.
"""
logger.info(f'Generating text with prompt: {truncate_text(prompt, max_length=100)}')
try:
# Validate prompt length
validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
# Generate text
generated_text = generate_text_with_claude(prompt)
logger.info(f'Generated text ({len(generated_text)} characters).')
return gr.update(value=generated_text)
except ValueError as ve:
logger.warning(f'Validation error: {ve}')
return str(ve)
def text_to_speech(prompt: str, generated_text: str):
try:
# Generate TTS output in parallel
with ThreadPoolExecutor(max_workers=2) as executor:
hume_audio, elevenlabs_audio = executor.map(
lambda func: func(),
[partial(text_to_speech_with_hume, prompt, generated_text),
partial(text_to_speech_with_elevenlabs, generated_text)]
)
logger.info(
f'TTS generated: Hume={len(hume_audio)} bytes, ElevenLabs={len(elevenlabs_audio)} bytes'
)
# Randomize audio order
options = [(hume_audio, 'Hume AI'), (elevenlabs_audio, 'ElevenLabs')]
random.shuffle(options)
option_1_audio = options[0][0]
option_2_audio = options[1][0]
option_1_provider = options[0][1]
option_2_provider = options[1][1]
options_map = { OPTION_ONE: option_1_provider, OPTION_TWO: option_2_provider }
return (
gr.update(value=option_1_audio, autoplay=True), # Set option 1 audio
gr.update(value=option_2_audio), # Option 2 audio
options_map, # Set option mapping state
option_2_audio # Set option 2 audio state
)
except Exception as e:
logger.error(f'Unexpected error: {e}')
return None, None, {}
def vote(option_mapping: dict, selected_button: str):
"""
Updates both vote buttons to reflect the user's choice.
Args:
option_mapping (dict): Maps "Option 1" and "Option 2" to their TTS providers.
selected_button (str): The label of the button that was clicked.
Returns:
tuple[gr.update, gr.update]: Updated properties for both vote buttons.
"""
if not option_mapping:
return gr.update(), gr.update() # No updates if mapping is missing
# Determine selected option
is_option_1 = selected_button == VOTE_FOR_OPTION_ONE
selected_option, other_option = (OPTION_ONE, OPTION_TWO) if is_option_1 else (OPTION_TWO, OPTION_ONE)
# Get provider names
selected_provider = option_mapping.get(selected_option, "Unknown")
other_provider = option_mapping.get(other_option, "Unknown")
# Return updated button states
return (
gr.update(value=f'{selected_provider} ✔', interactive=False, variant='primary') if is_option_1 else gr.update(value=other_provider, interactive=False, variant='secondary'),
gr.update(value=other_provider, interactive=False, variant='secondary') if is_option_1 else gr.update(value=f'{selected_provider} ✔', interactive=False, variant='primary'),
gr.update(interactive=True, variant='primary')
)
def build_gradio_interface() -> gr.Blocks:
"""
Constructs the Gradio user interface.
Returns:
gr.Blocks: The Gradio UI layout.
"""
custom_theme = CustomTheme()
with gr.Blocks(title='Expressive TTS Arena', theme=custom_theme) as demo:
# Title
gr.Markdown('# Expressive TTS Arena')
with gr.Column(variant='compact'):
# Instructions
gr.Markdown(
'Generate text using **Claude by Anthropic**, then compare text-to-speech outputs '
'from **Hume AI** and **ElevenLabs**. Listen to both samples and vote for your favorite!'
)
# Sample prompt select
sample_prompt_dropdown = gr.Dropdown(
choices=list(SAMPLE_PROMPTS.keys()),
label='Choose a sample prompt (or enter your own)',
value=None,
interactive=True,
)
# Prompt input
prompt_input = gr.Textbox(
label='Enter your prompt',
placeholder='Or type your own...',
lines=2,
max_lines=2,
show_copy_button=True,
)
# Generate Button
generate_button = gr.Button('Generate', variant='primary')
with gr.Column(variant='compact'):
# Generated text
generated_text = gr.Textbox(
label='Generated Text',
interactive=False,
autoscroll=False,
lines=5,
max_lines=5,
show_copy_button=True,
)
# Audio players
with gr.Row():
option1_audio_player = gr.Audio(label=OPTION_ONE, type='filepath', interactive=False)
option2_audio_player = gr.Audio(label=OPTION_TWO, type='filepath', interactive=False)
# Vote buttons
with gr.Row():
vote_button_1 = gr.Button(VOTE_FOR_OPTION_ONE, interactive=False)
vote_button_2 = gr.Button(VOTE_FOR_OPTION_TWO, interactive=False)
# UI state components
option_mapping_state = gr.State()
option2_audio_state = gr.State()
generated_text_state = gr.State()
# Event handlers
sample_prompt_dropdown.change(
fn=lambda choice: SAMPLE_PROMPTS.get(choice, ''),
inputs=[sample_prompt_dropdown],
outputs=[prompt_input],
)
generate_button.click(
fn=lambda _: (
gr.update(interactive=False),
gr.update(interactive=False, value=VOTE_FOR_OPTION_ONE, variant='secondary'),
gr.update(interactive=False, value=VOTE_FOR_OPTION_TWO, variant='secondary'),
None,
None,
),
inputs=[],
outputs=[generate_button, vote_button_1, vote_button_2, option_mapping_state, option2_audio_state]
).then(
fn=generate_text,
inputs=[prompt_input],
outputs=[generated_text]
).then(
fn=text_to_speech,
inputs=[prompt_input, generated_text],
outputs=[option1_audio_player, option2_audio_player, option_mapping_state, option2_audio_state]
)
vote_button_1.click(
fn=vote,
inputs=[option_mapping_state, vote_button_1],
outputs=[vote_button_1, vote_button_2, generate_button]
)
vote_button_2.click(
fn=vote,
inputs=[option_mapping_state, vote_button_2],
outputs=[vote_button_1, vote_button_2, generate_button]
)
# Auto-play second audio after first finishes
option1_audio_player.stop(
fn=lambda _: gr.update(value=None),
inputs=[],
outputs=[option2_audio_player],
).then(
fn=lambda audio: gr.update(value=audio, autoplay=True),
inputs=[option2_audio_state],
outputs=[option2_audio_player],
)
# Enable voting after 2nd audio option playback finishes
option2_audio_player.stop(
fn=lambda _: (gr.update(interactive=True), gr.update(interactive=True)),
inputs=[],
outputs=[vote_button_1, vote_button_2],
)
logger.debug('Gradio interface built successfully')
return demo
if __name__ == '__main__':
logger.info('Launching TTS Arena Gradio app...')
demo = build_gradio_interface()
demo.launch() |