zach commited on
Commit
7d79ca4
·
1 Parent(s): a375dbf

Update UI to include ElevenLabs audio playback for comparison

Browse files
Files changed (1) hide show
  1. src/app.py +24 -14
src/app.py CHANGED
@@ -1,14 +1,14 @@
1
  """
2
  app.py
3
 
4
- This file defines the Gradio user interface for interacting with the Anthropic API and Hume TTS API.
5
  Users can input prompts, which are processed to generate text using the Claude model via the Anthropic API.
6
- The generated text is then converted to audio using the Hume TTS API, allowing playback in the Gradio UI.
7
 
8
  Key Features:
9
  - Gradio interface for user interaction.
10
  - Input validation via prompt length constraints.
11
- - Integration with the Anthropic and Hume APIs.
12
  - Playback support for TTS audio responses.
13
 
14
  Functions:
@@ -19,8 +19,8 @@ Functions:
19
  # Third-Party Library Imports
20
  import gradio as gr
21
  # Local Application Imports
22
- from src.integrations import generate_text_with_claude, text_to_speech_with_hume
23
  from src.config import logger
 
24
  from src.utils import truncate_text, validate_prompt_length
25
 
26
 
@@ -32,12 +32,13 @@ PROMPT_MAX_LENGTH: int = 500
32
  def process_prompt(prompt: str) -> str:
33
  """
34
  Process the user prompt and generate text using the Claude API.
 
35
 
36
  Args:
37
  prompt (str): The user's input prompt.
38
 
39
  Returns:
40
- str: The generated text or an error message.
41
  """
42
  logger.debug(f"Entering process_prompt with prompt: {prompt}")
43
  try:
@@ -49,18 +50,22 @@ def process_prompt(prompt: str) -> str:
49
  logger.debug(f"Generated text: {generated_text}")
50
 
51
  # Convert text to speech with Hume TTS API
52
- generated_hume_audio = text_to_speech_with_hume(prompt, generated_text)
53
- logger.debug(f"Generated audio data: {len(generated_hume_audio)} bytes")
 
 
 
 
54
 
55
  logger.info("Successfully processed prompt.")
56
- return generated_text, generated_hume_audio
57
 
58
  except ValueError as ve:
59
  logger.warning(f"Validation error: {ve}")
60
- return str(ve), b"" # Return validation error directly to the UI with no audio
61
  except Exception as e:
62
  logger.error(f"Unexpected error during processing: {e}")
63
- return "An unexpected error occurred. Please try again.", b""
64
 
65
 
66
  def build_gradio_interface() -> gr.Blocks:
@@ -74,7 +79,8 @@ def build_gradio_interface() -> gr.Blocks:
74
  gr.Markdown("# TTS Arena")
75
  gr.Markdown(
76
  "Generate text from a prompt using **Claude by Anthropic**, "
77
- "and listen to the generated text-to-speech using **Hume TTS API**."
 
78
  )
79
 
80
  with gr.Row():
@@ -91,15 +97,19 @@ def build_gradio_interface() -> gr.Blocks:
91
  output_text = gr.Textbox(
92
  label="Generated Text",
93
  interactive=False,
94
- lines=10,
 
 
95
  )
96
- audio_output = gr.Audio(label="Generated Audio", type="filepath") # Fix: type="filepath"
 
 
97
 
98
  # Attach the validation, text generation, and TTS processing logic
99
  generate_button.click(
100
  fn=process_prompt,
101
  inputs=prompt_input,
102
- outputs=[output_text, audio_output],
103
  )
104
 
105
  logger.debug("Gradio interface built successfully")
 
1
  """
2
  app.py
3
 
4
+ This file defines the Gradio user interface for interacting with the Anthropic API, Hume TTS API, and ElevenLabs TTS API.
5
  Users can input prompts, which are processed to generate text using the Claude model via the Anthropic API.
6
+ The generated text is then converted to audio using both Hume and ElevenLabs TTS APIs, allowing playback in the Gradio UI.
7
 
8
  Key Features:
9
  - Gradio interface for user interaction.
10
  - Input validation via prompt length constraints.
11
+ - Integration with the Anthropic, Hume, and ElevenLabs APIs.
12
  - Playback support for TTS audio responses.
13
 
14
  Functions:
 
19
  # Third-Party Library Imports
20
  import gradio as gr
21
  # Local Application Imports
 
22
  from src.config import logger
23
+ from src.integrations import generate_text_with_claude, text_to_speech_with_hume, text_to_speech_with_elevenlabs
24
  from src.utils import truncate_text, validate_prompt_length
25
 
26
 
 
32
  def process_prompt(prompt: str) -> str:
33
  """
34
  Process the user prompt and generate text using the Claude API.
35
+ Then convert the generated text to speech using both Hume and ElevenLabs TTS APIs.
36
 
37
  Args:
38
  prompt (str): The user's input prompt.
39
 
40
  Returns:
41
+ tuple: The generated text and audio data from both Hume and ElevenLabs.
42
  """
43
  logger.debug(f"Entering process_prompt with prompt: {prompt}")
44
  try:
 
50
  logger.debug(f"Generated text: {generated_text}")
51
 
52
  # Convert text to speech with Hume TTS API
53
+ hume_audio = text_to_speech_with_hume(prompt, generated_text)
54
+ logger.debug(f"Hume audio data: {len(hume_audio)} bytes")
55
+
56
+ # Convert text to speech with ElevenLabs TTS API
57
+ elevenlabs_audio = text_to_speech_with_elevenlabs(generated_text)
58
+ logger.debug(f"ElevenLabs audio data: {len(elevenlabs_audio)} bytes")
59
 
60
  logger.info("Successfully processed prompt.")
61
+ return generated_text, hume_audio, elevenlabs_audio
62
 
63
  except ValueError as ve:
64
  logger.warning(f"Validation error: {ve}")
65
+ return str(ve), None, None # Return validation error directly to the UI
66
  except Exception as e:
67
  logger.error(f"Unexpected error during processing: {e}")
68
+ return "An unexpected error occurred. Please try again.", None, None
69
 
70
 
71
  def build_gradio_interface() -> gr.Blocks:
 
79
  gr.Markdown("# TTS Arena")
80
  gr.Markdown(
81
  "Generate text from a prompt using **Claude by Anthropic**, "
82
+ "and listen to the generated text-to-speech using **Hume TTS API** "
83
+ "and **ElevenLabs TTS API** for comparison."
84
  )
85
 
86
  with gr.Row():
 
97
  output_text = gr.Textbox(
98
  label="Generated Text",
99
  interactive=False,
100
+ lines=20,
101
+ max_lines=20,
102
+ scale=2,
103
  )
104
+ with gr.Column(scale=1):
105
+ hume_audio_output = gr.Audio(label="Hume TTS Audio", type="filepath")
106
+ elevenlabs_audio_output = gr.Audio(label="ElevenLabs TTS Audio", type="filepath")
107
 
108
  # Attach the validation, text generation, and TTS processing logic
109
  generate_button.click(
110
  fn=process_prompt,
111
  inputs=prompt_input,
112
+ outputs=[output_text, hume_audio_output, elevenlabs_audio_output],
113
  )
114
 
115
  logger.debug("Gradio interface built successfully")