Spaces:
Running
Running
File size: 5,107 Bytes
3ce989d 7d79ca4 adecb62 7d79ca4 3ce989d 7d79ca4 adecb62 3ce989d adecb62 3ce989d a807c4d 7d79ca4 96154e7 a807c4d 3ce989d 7d79ca4 3ce989d 7d79ca4 3ce989d adecb62 3ce989d adecb62 7d79ca4 adecb62 7d79ca4 adecb62 3ce989d 7d79ca4 3ce989d adecb62 7d79ca4 3ce989d adecb62 7d79ca4 adecb62 3ce989d 96154e7 3ce989d 96154e7 3ce989d 96154e7 3ce989d adecb62 96154e7 7d79ca4 adecb62 7d79ca4 3ce989d 96154e7 adecb62 3ce989d 7d79ca4 3ce989d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
"""
app.py
This file defines the Gradio user interface for interacting with the Anthropic API, Hume TTS API, and ElevenLabs TTS API.
Users can input prompts, which are processed to generate text using the Claude model via the Anthropic API.
The generated text is then converted to audio using both Hume and ElevenLabs TTS APIs, allowing playback in the Gradio UI.
Key Features:
- Gradio interface for user interaction.
- Input validation via prompt length constraints.
- Integration with the Anthropic, Hume, and ElevenLabs APIs.
- Playback support for TTS audio responses.
Functions:
- process_prompt: Handles user input, calls the Anthropic and Hume APIs, and returns generated text and audio.
- build_gradio_interface: Constructs the Gradio Blocks-based interface.
"""
# Third-Party Library Imports
import gradio as gr
# Local Application Imports
from src.config import logger
from src.integrations import generate_text_with_claude, text_to_speech_with_hume, text_to_speech_with_elevenlabs
from src.sample_prompts import SAMPLE_PROMPTS
from src.utils import truncate_text, validate_prompt_length
# Constants
PROMPT_MIN_LENGTH: int = 10
PROMPT_MAX_LENGTH: int = 500
def process_prompt(prompt: str) -> str:
"""
Process the user prompt and generate text using the Claude API.
Then convert the generated text to speech using both Hume and ElevenLabs TTS APIs.
Args:
prompt (str): The user's input prompt.
Returns:
tuple: The generated text and audio data from both Hume and ElevenLabs.
"""
logger.debug(f"Entering process_prompt with prompt: {prompt}")
try:
# Validate prompt length before processing
validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
# Generate text with Claude API
generated_text = generate_text_with_claude(prompt)
logger.debug(f"Generated text: {generated_text}")
# Convert text to speech with Hume TTS API
hume_audio = text_to_speech_with_hume(prompt, generated_text)
logger.debug(f"Hume audio data: {len(hume_audio)} bytes")
# Convert text to speech with ElevenLabs TTS API
elevenlabs_audio = text_to_speech_with_elevenlabs(generated_text)
logger.debug(f"ElevenLabs audio data: {len(elevenlabs_audio)} bytes")
logger.info("Successfully processed prompt.")
return generated_text, hume_audio, elevenlabs_audio
except ValueError as ve:
logger.warning(f"Validation error: {ve}")
return str(ve), None, None # Return validation error directly to the UI
except Exception as e:
logger.error(f"Unexpected error during processing: {e}")
return "An unexpected error occurred. Please try again.", None, None
def build_gradio_interface() -> gr.Blocks:
"""
Build the Gradio user interface.
Returns:
gr.Blocks: The Gradio Blocks object representing the interface.
"""
with gr.Blocks() as demo:
gr.Markdown("# TTS Arena")
gr.Markdown(
"Generate text from a prompt using **Claude by Anthropic**, "
"and listen to the generated text-to-speech using **Hume TTS API** "
"and **ElevenLabs TTS API** for comparison."
)
with gr.Row():
# Dropdown for predefined prompts
sample_prompt_dropdown = gr.Dropdown(
choices=list(SAMPLE_PROMPTS.keys()),
label="Choose a Sample Prompt (or enter your own below)",
interactive=True
)
with gr.Row():
# Custom prompt input
prompt_input = gr.Textbox(
label="Enter your custom prompt",
placeholder="Or type your own custom prompt here...",
lines=2,
)
with gr.Row():
generate_button = gr.Button("Generate")
# Display the generated text and audio side by side
with gr.Row():
output_text = gr.Textbox(
label="Generated Text",
interactive=False,
lines=16,
max_lines=24,
scale=2,
)
with gr.Column(scale=1):
hume_audio_output = gr.Audio(label="Hume TTS Audio", type="filepath")
elevenlabs_audio_output = gr.Audio(label="ElevenLabs TTS Audio", type="filepath")
# Auto-fill the text input when a sample is selected
sample_prompt_dropdown.change(
fn=lambda choice: SAMPLE_PROMPTS[choice] if choice else "",
inputs=[sample_prompt_dropdown],
outputs=[prompt_input],
)
# Attach the validation, text generation, and TTS processing logic
generate_button.click(
fn=process_prompt,
inputs=prompt_input,
outputs=[output_text, hume_audio_output, elevenlabs_audio_output],
)
logger.debug("Gradio interface built successfully")
return demo
if __name__ == "__main__":
logger.info("Launching TTS Arena Gradio app...")
demo = build_gradio_interface()
demo.launch() |