Spaces:
Running
Running
zach
commited on
Commit
·
7d79ca4
1
Parent(s):
a375dbf
Update UI to include ElevenLabs audio playback for comparison
Browse files- src/app.py +24 -14
src/app.py
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
"""
|
2 |
app.py
|
3 |
|
4 |
-
This file defines the Gradio user interface for interacting with the Anthropic API and
|
5 |
Users can input prompts, which are processed to generate text using the Claude model via the Anthropic API.
|
6 |
-
The generated text is then converted to audio using
|
7 |
|
8 |
Key Features:
|
9 |
- Gradio interface for user interaction.
|
10 |
- Input validation via prompt length constraints.
|
11 |
-
- Integration with the Anthropic and
|
12 |
- Playback support for TTS audio responses.
|
13 |
|
14 |
Functions:
|
@@ -19,8 +19,8 @@ Functions:
|
|
19 |
# Third-Party Library Imports
|
20 |
import gradio as gr
|
21 |
# Local Application Imports
|
22 |
-
from src.integrations import generate_text_with_claude, text_to_speech_with_hume
|
23 |
from src.config import logger
|
|
|
24 |
from src.utils import truncate_text, validate_prompt_length
|
25 |
|
26 |
|
@@ -32,12 +32,13 @@ PROMPT_MAX_LENGTH: int = 500
|
|
32 |
def process_prompt(prompt: str) -> str:
|
33 |
"""
|
34 |
Process the user prompt and generate text using the Claude API.
|
|
|
35 |
|
36 |
Args:
|
37 |
prompt (str): The user's input prompt.
|
38 |
|
39 |
Returns:
|
40 |
-
|
41 |
"""
|
42 |
logger.debug(f"Entering process_prompt with prompt: {prompt}")
|
43 |
try:
|
@@ -49,18 +50,22 @@ def process_prompt(prompt: str) -> str:
|
|
49 |
logger.debug(f"Generated text: {generated_text}")
|
50 |
|
51 |
# Convert text to speech with Hume TTS API
|
52 |
-
|
53 |
-
logger.debug(f"
|
|
|
|
|
|
|
|
|
54 |
|
55 |
logger.info("Successfully processed prompt.")
|
56 |
-
return generated_text,
|
57 |
|
58 |
except ValueError as ve:
|
59 |
logger.warning(f"Validation error: {ve}")
|
60 |
-
return str(ve),
|
61 |
except Exception as e:
|
62 |
logger.error(f"Unexpected error during processing: {e}")
|
63 |
-
return "An unexpected error occurred. Please try again.",
|
64 |
|
65 |
|
66 |
def build_gradio_interface() -> gr.Blocks:
|
@@ -74,7 +79,8 @@ def build_gradio_interface() -> gr.Blocks:
|
|
74 |
gr.Markdown("# TTS Arena")
|
75 |
gr.Markdown(
|
76 |
"Generate text from a prompt using **Claude by Anthropic**, "
|
77 |
-
"and listen to the generated text-to-speech using **Hume TTS API
|
|
|
78 |
)
|
79 |
|
80 |
with gr.Row():
|
@@ -91,15 +97,19 @@ def build_gradio_interface() -> gr.Blocks:
|
|
91 |
output_text = gr.Textbox(
|
92 |
label="Generated Text",
|
93 |
interactive=False,
|
94 |
-
lines=
|
|
|
|
|
95 |
)
|
96 |
-
|
|
|
|
|
97 |
|
98 |
# Attach the validation, text generation, and TTS processing logic
|
99 |
generate_button.click(
|
100 |
fn=process_prompt,
|
101 |
inputs=prompt_input,
|
102 |
-
outputs=[output_text,
|
103 |
)
|
104 |
|
105 |
logger.debug("Gradio interface built successfully")
|
|
|
1 |
"""
|
2 |
app.py
|
3 |
|
4 |
+
This file defines the Gradio user interface for interacting with the Anthropic API, Hume TTS API, and ElevenLabs TTS API.
|
5 |
Users can input prompts, which are processed to generate text using the Claude model via the Anthropic API.
|
6 |
+
The generated text is then converted to audio using both Hume and ElevenLabs TTS APIs, allowing playback in the Gradio UI.
|
7 |
|
8 |
Key Features:
|
9 |
- Gradio interface for user interaction.
|
10 |
- Input validation via prompt length constraints.
|
11 |
+
- Integration with the Anthropic, Hume, and ElevenLabs APIs.
|
12 |
- Playback support for TTS audio responses.
|
13 |
|
14 |
Functions:
|
|
|
19 |
# Third-Party Library Imports
|
20 |
import gradio as gr
|
21 |
# Local Application Imports
|
|
|
22 |
from src.config import logger
|
23 |
+
from src.integrations import generate_text_with_claude, text_to_speech_with_hume, text_to_speech_with_elevenlabs
|
24 |
from src.utils import truncate_text, validate_prompt_length
|
25 |
|
26 |
|
|
|
32 |
def process_prompt(prompt: str) -> str:
|
33 |
"""
|
34 |
Process the user prompt and generate text using the Claude API.
|
35 |
+
Then convert the generated text to speech using both Hume and ElevenLabs TTS APIs.
|
36 |
|
37 |
Args:
|
38 |
prompt (str): The user's input prompt.
|
39 |
|
40 |
Returns:
|
41 |
+
tuple: The generated text and audio data from both Hume and ElevenLabs.
|
42 |
"""
|
43 |
logger.debug(f"Entering process_prompt with prompt: {prompt}")
|
44 |
try:
|
|
|
50 |
logger.debug(f"Generated text: {generated_text}")
|
51 |
|
52 |
# Convert text to speech with Hume TTS API
|
53 |
+
hume_audio = text_to_speech_with_hume(prompt, generated_text)
|
54 |
+
logger.debug(f"Hume audio data: {len(hume_audio)} bytes")
|
55 |
+
|
56 |
+
# Convert text to speech with ElevenLabs TTS API
|
57 |
+
elevenlabs_audio = text_to_speech_with_elevenlabs(generated_text)
|
58 |
+
logger.debug(f"ElevenLabs audio data: {len(elevenlabs_audio)} bytes")
|
59 |
|
60 |
logger.info("Successfully processed prompt.")
|
61 |
+
return generated_text, hume_audio, elevenlabs_audio
|
62 |
|
63 |
except ValueError as ve:
|
64 |
logger.warning(f"Validation error: {ve}")
|
65 |
+
return str(ve), None, None # Return validation error directly to the UI
|
66 |
except Exception as e:
|
67 |
logger.error(f"Unexpected error during processing: {e}")
|
68 |
+
return "An unexpected error occurred. Please try again.", None, None
|
69 |
|
70 |
|
71 |
def build_gradio_interface() -> gr.Blocks:
|
|
|
79 |
gr.Markdown("# TTS Arena")
|
80 |
gr.Markdown(
|
81 |
"Generate text from a prompt using **Claude by Anthropic**, "
|
82 |
+
"and listen to the generated text-to-speech using **Hume TTS API** "
|
83 |
+
"and **ElevenLabs TTS API** for comparison."
|
84 |
)
|
85 |
|
86 |
with gr.Row():
|
|
|
97 |
output_text = gr.Textbox(
|
98 |
label="Generated Text",
|
99 |
interactive=False,
|
100 |
+
lines=20,
|
101 |
+
max_lines=20,
|
102 |
+
scale=2,
|
103 |
)
|
104 |
+
with gr.Column(scale=1):
|
105 |
+
hume_audio_output = gr.Audio(label="Hume TTS Audio", type="filepath")
|
106 |
+
elevenlabs_audio_output = gr.Audio(label="ElevenLabs TTS Audio", type="filepath")
|
107 |
|
108 |
# Attach the validation, text generation, and TTS processing logic
|
109 |
generate_button.click(
|
110 |
fn=process_prompt,
|
111 |
inputs=prompt_input,
|
112 |
+
outputs=[output_text, hume_audio_output, elevenlabs_audio_output],
|
113 |
)
|
114 |
|
115 |
logger.debug("Gradio interface built successfully")
|