Spaces:
Running
Running
zach
commited on
Commit
·
e9bcee8
1
Parent(s):
e560bf3
Update Hume integration to randmonly pick a voice from a predefined list of top voices
Browse files- src/app.py +19 -19
- src/config.py +8 -8
- src/integrations/anthropic_api.py +21 -21
- src/integrations/elevenlabs_api.py +21 -21
- src/integrations/hume_api.py +39 -20
- src/sample_prompts.py +17 -17
- src/utils.py +12 -12
src/app.py
CHANGED
@@ -42,14 +42,14 @@ def process_prompt(prompt: str) -> str:
|
|
42 |
Returns:
|
43 |
tuple: The generated text and audio data from both Hume and ElevenLabs.
|
44 |
"""
|
45 |
-
logger.info(f
|
46 |
try:
|
47 |
# Validate prompt length before processing
|
48 |
validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
|
49 |
|
50 |
# Generate text with Claude API
|
51 |
generated_text = generate_text_with_claude(prompt)
|
52 |
-
logger.info(f
|
53 |
|
54 |
# Run TTS requests in parallel
|
55 |
with ThreadPoolExecutor(max_workers=2) as executor:
|
@@ -60,15 +60,15 @@ def process_prompt(prompt: str) -> str:
|
|
60 |
hume_audio = hume_future.result()
|
61 |
elevenlabs_audio = elevenlabs_future.result()
|
62 |
|
63 |
-
logger.info(f
|
64 |
return generated_text, hume_audio, elevenlabs_audio
|
65 |
|
66 |
except ValueError as ve:
|
67 |
-
logger.warning(f
|
68 |
return str(ve), None, None # Return validation error directly to the UI
|
69 |
except Exception as e:
|
70 |
-
logger.error(f
|
71 |
-
return
|
72 |
|
73 |
|
74 |
def build_gradio_interface() -> gr.Blocks:
|
@@ -81,16 +81,16 @@ def build_gradio_interface() -> gr.Blocks:
|
|
81 |
with gr.Blocks() as demo:
|
82 |
gr.Markdown("# TTS Arena")
|
83 |
gr.Markdown(
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
)
|
88 |
|
89 |
with gr.Row():
|
90 |
# Dropdown for predefined prompts
|
91 |
sample_prompt_dropdown = gr.Dropdown(
|
92 |
choices=list(SAMPLE_PROMPTS.keys()),
|
93 |
-
label=
|
94 |
value=None,
|
95 |
interactive=True
|
96 |
)
|
@@ -98,26 +98,26 @@ def build_gradio_interface() -> gr.Blocks:
|
|
98 |
with gr.Row():
|
99 |
# Custom prompt input
|
100 |
prompt_input = gr.Textbox(
|
101 |
-
label=
|
102 |
-
placeholder=
|
103 |
lines=2,
|
104 |
)
|
105 |
|
106 |
with gr.Row():
|
107 |
-
generate_button = gr.Button(
|
108 |
|
109 |
# Display the generated text and audio side by side
|
110 |
with gr.Row():
|
111 |
output_text = gr.Textbox(
|
112 |
-
label=
|
113 |
interactive=False,
|
114 |
lines=12,
|
115 |
max_lines=24,
|
116 |
scale=2,
|
117 |
)
|
118 |
with gr.Column(scale=1):
|
119 |
-
hume_audio_output = gr.Audio(label=
|
120 |
-
elevenlabs_audio_output = gr.Audio(label=
|
121 |
|
122 |
# Auto-fill the text input when a sample is selected
|
123 |
sample_prompt_dropdown.change(
|
@@ -133,11 +133,11 @@ def build_gradio_interface() -> gr.Blocks:
|
|
133 |
outputs=[output_text, hume_audio_output, elevenlabs_audio_output],
|
134 |
)
|
135 |
|
136 |
-
logger.debug(
|
137 |
return demo
|
138 |
|
139 |
|
140 |
-
if __name__ ==
|
141 |
-
logger.info(
|
142 |
demo = build_gradio_interface()
|
143 |
demo.launch()
|
|
|
42 |
Returns:
|
43 |
tuple: The generated text and audio data from both Hume and ElevenLabs.
|
44 |
"""
|
45 |
+
logger.info(f'Processing prompt: {truncate_text(prompt, max_length=100)}')
|
46 |
try:
|
47 |
# Validate prompt length before processing
|
48 |
validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
|
49 |
|
50 |
# Generate text with Claude API
|
51 |
generated_text = generate_text_with_claude(prompt)
|
52 |
+
logger.info(f'Generated text (length={len(generated_text)} characters).')
|
53 |
|
54 |
# Run TTS requests in parallel
|
55 |
with ThreadPoolExecutor(max_workers=2) as executor:
|
|
|
60 |
hume_audio = hume_future.result()
|
61 |
elevenlabs_audio = elevenlabs_future.result()
|
62 |
|
63 |
+
logger.info(f'TTS audio generated successfully: Hume={len(hume_audio)} bytes, ElevenLabs={len(elevenlabs_audio)} bytes')
|
64 |
return generated_text, hume_audio, elevenlabs_audio
|
65 |
|
66 |
except ValueError as ve:
|
67 |
+
logger.warning(f'Validation error: {ve}')
|
68 |
return str(ve), None, None # Return validation error directly to the UI
|
69 |
except Exception as e:
|
70 |
+
logger.error(f'Unexpected error during processing: {e}')
|
71 |
+
return 'An unexpected error occurred. Please try again.', None, None
|
72 |
|
73 |
|
74 |
def build_gradio_interface() -> gr.Blocks:
|
|
|
81 |
with gr.Blocks() as demo:
|
82 |
gr.Markdown("# TTS Arena")
|
83 |
gr.Markdown(
|
84 |
+
'Generate text from a prompt using **Claude by Anthropic**, '
|
85 |
+
'and listen to the generated text-to-speech using **Hume TTS API** '
|
86 |
+
'and **ElevenLabs TTS API** for comparison.'
|
87 |
)
|
88 |
|
89 |
with gr.Row():
|
90 |
# Dropdown for predefined prompts
|
91 |
sample_prompt_dropdown = gr.Dropdown(
|
92 |
choices=list(SAMPLE_PROMPTS.keys()),
|
93 |
+
label='Choose a Sample Prompt (or enter your own below)',
|
94 |
value=None,
|
95 |
interactive=True
|
96 |
)
|
|
|
98 |
with gr.Row():
|
99 |
# Custom prompt input
|
100 |
prompt_input = gr.Textbox(
|
101 |
+
label='Enter your prompt',
|
102 |
+
placeholder='Or type your own prompt here...',
|
103 |
lines=2,
|
104 |
)
|
105 |
|
106 |
with gr.Row():
|
107 |
+
generate_button = gr.Button('Generate')
|
108 |
|
109 |
# Display the generated text and audio side by side
|
110 |
with gr.Row():
|
111 |
output_text = gr.Textbox(
|
112 |
+
label='Generated Text',
|
113 |
interactive=False,
|
114 |
lines=12,
|
115 |
max_lines=24,
|
116 |
scale=2,
|
117 |
)
|
118 |
with gr.Column(scale=1):
|
119 |
+
hume_audio_output = gr.Audio(label='Hume TTS Audio', type='filepath')
|
120 |
+
elevenlabs_audio_output = gr.Audio(label='ElevenLabs TTS Audio', type='filepath')
|
121 |
|
122 |
# Auto-fill the text input when a sample is selected
|
123 |
sample_prompt_dropdown.change(
|
|
|
133 |
outputs=[output_text, hume_audio_output, elevenlabs_audio_output],
|
134 |
)
|
135 |
|
136 |
+
logger.debug('Gradio interface built successfully')
|
137 |
return demo
|
138 |
|
139 |
|
140 |
+
if __name__ == '__main__':
|
141 |
+
logger.info('Launching TTS Arena Gradio app...')
|
142 |
demo = build_gradio_interface()
|
143 |
demo.launch()
|
src/config.py
CHANGED
@@ -25,10 +25,10 @@ load_dotenv()
|
|
25 |
|
26 |
|
27 |
# Enable debugging mode based on an environment variable
|
28 |
-
debug_raw = os.getenv(
|
29 |
-
if debug_raw not in {
|
30 |
-
print(f
|
31 |
-
DEBUG = debug_raw ==
|
32 |
|
33 |
|
34 |
# Configure the logger
|
@@ -36,8 +36,8 @@ logging.basicConfig(
|
|
36 |
level=logging.DEBUG if DEBUG else logging.INFO,
|
37 |
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
38 |
)
|
39 |
-
logger: logging.Logger = logging.getLogger(
|
40 |
-
logger.info(f
|
41 |
|
42 |
|
43 |
# Log environment variables
|
@@ -49,7 +49,7 @@ def log_env_variable(var_name: str, value: str) -> None:
|
|
49 |
var_name (str): The name of the environment variable.
|
50 |
value (str): The value of the environment variable.
|
51 |
"""
|
52 |
-
logger.debug(f
|
53 |
|
54 |
if DEBUG:
|
55 |
-
logger.debug(f
|
|
|
25 |
|
26 |
|
27 |
# Enable debugging mode based on an environment variable
|
28 |
+
debug_raw = os.getenv('DEBUG', 'false').lower()
|
29 |
+
if debug_raw not in {'true', 'false'}:
|
30 |
+
print(f'Warning: Invalid DEBUG value "{debug_raw}". Defaulting to "false".')
|
31 |
+
DEBUG = debug_raw == 'true'
|
32 |
|
33 |
|
34 |
# Configure the logger
|
|
|
36 |
level=logging.DEBUG if DEBUG else logging.INFO,
|
37 |
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
38 |
)
|
39 |
+
logger: logging.Logger = logging.getLogger('tts_arena')
|
40 |
+
logger.info(f'Debug mode is {"enabled" if DEBUG else "disabled"}.')
|
41 |
|
42 |
|
43 |
# Log environment variables
|
|
|
49 |
var_name (str): The name of the environment variable.
|
50 |
value (str): The value of the environment variable.
|
51 |
"""
|
52 |
+
logger.debug(f'Environment variable "{var_name}" validated with value: {value}')
|
53 |
|
54 |
if DEBUG:
|
55 |
+
logger.debug(f'DEBUG mode enabled.')
|
src/integrations/anthropic_api.py
CHANGED
@@ -38,8 +38,8 @@ class AnthropicConfig:
|
|
38 |
Immutable configuration for interacting with the Anthropic API.
|
39 |
Includes client initialization for encapsulation.
|
40 |
"""
|
41 |
-
api_key: str = validate_env_var(
|
42 |
-
model: ModelParam =
|
43 |
max_tokens: int = 300 # Max tokens for API response
|
44 |
system_prompt: str = """You are a highly creative and articulate assistant specialized in generating vivid, engaging, and well-written content.
|
45 |
|
@@ -62,13 +62,13 @@ Always keep your responses concise, unless explicitly instructed to elaborate.""
|
|
62 |
def __post_init__(self):
|
63 |
# Validate that required attributes are set
|
64 |
if not self.api_key:
|
65 |
-
raise ValueError(
|
66 |
if not self.model:
|
67 |
-
raise ValueError(
|
68 |
if not self.max_tokens:
|
69 |
-
raise ValueError(
|
70 |
if not self.system_prompt:
|
71 |
-
raise ValueError(
|
72 |
|
73 |
@property
|
74 |
def client(self) -> Anthropic:
|
@@ -119,44 +119,44 @@ def generate_text_with_claude(prompt: str) -> str:
|
|
119 |
>>> generate_text_with_claude("")
|
120 |
"The prompt exceeds the maximum allowed length of 500 characters. Your prompt contains 512 characters."
|
121 |
"""
|
122 |
-
logger.debug(f
|
123 |
|
124 |
try:
|
125 |
response: Message = anthropic_config.client.messages.create(
|
126 |
model=anthropic_config.model,
|
127 |
max_tokens=anthropic_config.max_tokens,
|
128 |
system=anthropic_config.system_prompt,
|
129 |
-
messages=[{
|
130 |
)
|
131 |
-
logger.debug(f
|
132 |
|
133 |
# Validate response content
|
134 |
-
if not hasattr(response,
|
135 |
logger.error("Response is missing 'content'. Response: %s", response)
|
136 |
-
raise AnthropicError(
|
137 |
|
138 |
# Process response content
|
139 |
blocks: Union[List[TextBlock], TextBlock, None] = response.content
|
140 |
|
141 |
if isinstance(blocks, list):
|
142 |
-
result =
|
143 |
-
logger.debug(f
|
144 |
return result
|
145 |
if isinstance(blocks, TextBlock):
|
146 |
-
logger.debug(f
|
147 |
return blocks.text
|
148 |
|
149 |
-
logger.warning(f
|
150 |
-
return str(blocks or
|
151 |
|
152 |
except Exception as e:
|
153 |
-
logger.exception(f
|
154 |
raise AnthropicError(
|
155 |
message=(
|
156 |
-
f
|
157 |
-
f
|
158 |
-
f
|
159 |
-
f
|
160 |
),
|
161 |
original_exception=e,
|
162 |
)
|
|
|
38 |
Immutable configuration for interacting with the Anthropic API.
|
39 |
Includes client initialization for encapsulation.
|
40 |
"""
|
41 |
+
api_key: str = validate_env_var('ANTHROPIC_API_KEY')
|
42 |
+
model: ModelParam = 'claude-3-5-sonnet-latest' # Valid predefined model
|
43 |
max_tokens: int = 300 # Max tokens for API response
|
44 |
system_prompt: str = """You are a highly creative and articulate assistant specialized in generating vivid, engaging, and well-written content.
|
45 |
|
|
|
62 |
def __post_init__(self):
|
63 |
# Validate that required attributes are set
|
64 |
if not self.api_key:
|
65 |
+
raise ValueError('Anthropic API key is not set.')
|
66 |
if not self.model:
|
67 |
+
raise ValueError('Anthropic Model is not set.')
|
68 |
if not self.max_tokens:
|
69 |
+
raise ValueError('Anthropic Max Tokens is not set.')
|
70 |
if not self.system_prompt:
|
71 |
+
raise ValueError('Anthropic System Prompt is not set.')
|
72 |
|
73 |
@property
|
74 |
def client(self) -> Anthropic:
|
|
|
119 |
>>> generate_text_with_claude("")
|
120 |
"The prompt exceeds the maximum allowed length of 500 characters. Your prompt contains 512 characters."
|
121 |
"""
|
122 |
+
logger.debug(f'Generating text with Claude. Prompt length: {len(prompt)} characters.')
|
123 |
|
124 |
try:
|
125 |
response: Message = anthropic_config.client.messages.create(
|
126 |
model=anthropic_config.model,
|
127 |
max_tokens=anthropic_config.max_tokens,
|
128 |
system=anthropic_config.system_prompt,
|
129 |
+
messages=[{'role': 'user', 'content': prompt}],
|
130 |
)
|
131 |
+
logger.debug(f'API response received: {truncate_text(str(response))}')
|
132 |
|
133 |
# Validate response content
|
134 |
+
if not hasattr(response, 'content'):
|
135 |
logger.error("Response is missing 'content'. Response: %s", response)
|
136 |
+
raise AnthropicError('Invalid API response: Missing "content".')
|
137 |
|
138 |
# Process response content
|
139 |
blocks: Union[List[TextBlock], TextBlock, None] = response.content
|
140 |
|
141 |
if isinstance(blocks, list):
|
142 |
+
result = '\n\n'.join(block.text for block in blocks if isinstance(block, TextBlock))
|
143 |
+
logger.debug(f'Processed response from list: {truncate_text(result)}')
|
144 |
return result
|
145 |
if isinstance(blocks, TextBlock):
|
146 |
+
logger.debug(f'Processed response from single TextBlock: {truncate_text(blocks.text)}')
|
147 |
return blocks.text
|
148 |
|
149 |
+
logger.warning(f'Unexpected response type: {type(blocks)}')
|
150 |
+
return str(blocks or 'No content generated.')
|
151 |
|
152 |
except Exception as e:
|
153 |
+
logger.exception(f'Error generating text with Claude: {e}')
|
154 |
raise AnthropicError(
|
155 |
message=(
|
156 |
+
f'Error generating text with Claude: {e}. '
|
157 |
+
f'HTTP Status: {getattr(response, "status", "N/A")}. '
|
158 |
+
f'Prompt (truncated): {truncate_text(prompt)}. '
|
159 |
+
f'Model: {anthropic_config.model}, Max tokens: {anthropic_config.max_tokens}'
|
160 |
),
|
161 |
original_exception=e,
|
162 |
)
|
src/integrations/elevenlabs_api.py
CHANGED
@@ -34,26 +34,26 @@ from src.utils import validate_env_var, truncate_text
|
|
34 |
@dataclass(frozen=True)
|
35 |
class ElevenLabsConfig:
|
36 |
"""Immutable configuration for interacting with the ElevenLabs TTS API."""
|
37 |
-
api_key: str = validate_env_var(
|
38 |
-
model_id: str =
|
39 |
-
output_format: str =
|
40 |
top_voices: list[str] = (
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
)
|
46 |
|
47 |
def __post_init__(self):
|
48 |
# Validate that required attributes are set
|
49 |
if not self.api_key:
|
50 |
-
raise ValueError(
|
51 |
if not self.model_id:
|
52 |
-
raise ValueError(
|
53 |
if not self.output_format:
|
54 |
-
raise ValueError(
|
55 |
if not self.top_voices:
|
56 |
-
raise ValueError(
|
57 |
|
58 |
@property
|
59 |
def client(self) -> ElevenLabs:
|
@@ -103,7 +103,7 @@ def text_to_speech_with_elevenlabs(text: str) -> bytes:
|
|
103 |
Raises:
|
104 |
ElevenLabsException: If there is an error communicating with the ElevenLabs API or processing the response.
|
105 |
"""
|
106 |
-
logger.debug(f
|
107 |
|
108 |
try:
|
109 |
# Generate audio using the ElevenLabs SDK
|
@@ -115,24 +115,24 @@ def text_to_speech_with_elevenlabs(text: str) -> bytes:
|
|
115 |
)
|
116 |
|
117 |
# Ensure the response is an iterator
|
118 |
-
if not hasattr(audio_iterator,
|
119 |
-
logger.error(
|
120 |
-
raise ElevenLabsException(
|
121 |
|
122 |
# Combine chunks into a single bytes object
|
123 |
-
audio = b
|
124 |
|
125 |
# Validate audio
|
126 |
if not audio:
|
127 |
-
logger.error(
|
128 |
-
raise ElevenLabsException(
|
129 |
|
130 |
-
logger.info(f
|
131 |
return audio
|
132 |
|
133 |
except Exception as e:
|
134 |
-
logger.exception(f
|
135 |
raise ElevenLabsException(
|
136 |
-
message=f
|
137 |
original_exception=e,
|
138 |
)
|
|
|
34 |
@dataclass(frozen=True)
|
35 |
class ElevenLabsConfig:
|
36 |
"""Immutable configuration for interacting with the ElevenLabs TTS API."""
|
37 |
+
api_key: str = validate_env_var('ELEVENLABS_API_KEY')
|
38 |
+
model_id: str = 'eleven_multilingual_v2' # ElevenLab's most emotionally expressive model
|
39 |
+
output_format: str = 'mp3_44100_128' # Output format of the generated audio.
|
40 |
top_voices: list[str] = (
|
41 |
+
'pNInz6obpgDQGcFmaJgB', # Adam
|
42 |
+
'ErXwobaYiN019PkySvjV', # Antoni
|
43 |
+
'21m00Tcm4TlvDq8ikWAM', # Rachel
|
44 |
+
'XrExE9yKIg1WjnnlVkGX', # Matilda
|
45 |
)
|
46 |
|
47 |
def __post_init__(self):
|
48 |
# Validate that required attributes are set
|
49 |
if not self.api_key:
|
50 |
+
raise ValueError('ElevenLabs API key is not set.')
|
51 |
if not self.model_id:
|
52 |
+
raise ValueError('ElevenLabs Model ID is not set.')
|
53 |
if not self.output_format:
|
54 |
+
raise ValueError('ElevenLabs Output Format is not set.')
|
55 |
if not self.top_voices:
|
56 |
+
raise ValueError('ElevenLabs Top Voices are not set.')
|
57 |
|
58 |
@property
|
59 |
def client(self) -> ElevenLabs:
|
|
|
103 |
Raises:
|
104 |
ElevenLabsException: If there is an error communicating with the ElevenLabs API or processing the response.
|
105 |
"""
|
106 |
+
logger.debug(f'Generating speech with ElevenLabs. Text length: {len(text)} characters.')
|
107 |
|
108 |
try:
|
109 |
# Generate audio using the ElevenLabs SDK
|
|
|
115 |
)
|
116 |
|
117 |
# Ensure the response is an iterator
|
118 |
+
if not hasattr(audio_iterator, '__iter__') or not hasattr(audio_iterator, '__next__'):
|
119 |
+
logger.error('Invalid audio iterator response.')
|
120 |
+
raise ElevenLabsException('Invalid audio iterator received from ElevenLabs API.')
|
121 |
|
122 |
# Combine chunks into a single bytes object
|
123 |
+
audio = b''.join(chunk for chunk in audio_iterator)
|
124 |
|
125 |
# Validate audio
|
126 |
if not audio:
|
127 |
+
logger.error('No audio data received from ElevenLabs API.')
|
128 |
+
raise ElevenLabsException('Empty audio data received from ElevenLabs API.')
|
129 |
|
130 |
+
logger.info(f'Received ElevenLabs audio ({len(audio)} bytes).')
|
131 |
return audio
|
132 |
|
133 |
except Exception as e:
|
134 |
+
logger.exception(f'Error generating speech: {e}')
|
135 |
raise ElevenLabsException(
|
136 |
+
message=f'Failed to generate audio with ElevenLabs: {e}',
|
137 |
original_exception=e,
|
138 |
)
|
src/integrations/hume_api.py
CHANGED
@@ -21,7 +21,8 @@ Functions:
|
|
21 |
# Standard Library Imports
|
22 |
from dataclasses import dataclass
|
23 |
import logging
|
24 |
-
|
|
|
25 |
# Third-Party Library Imports
|
26 |
import requests
|
27 |
from tenacity import retry, stop_after_attempt, wait_fixed, before_log, after_log
|
@@ -32,20 +33,38 @@ from src.utils import validate_env_var, truncate_text
|
|
32 |
|
33 |
@dataclass(frozen=True)
|
34 |
class HumeConfig:
|
35 |
-
"""Immutable configuration for interacting with the TTS API."""
|
36 |
-
tts_endpoint_url: str =
|
37 |
-
api_key: str = validate_env_var(
|
38 |
-
|
39 |
audio_format: str = 'wav'
|
40 |
-
headers: dict = None
|
41 |
|
42 |
def __post_init__(self):
|
43 |
-
#
|
44 |
-
|
45 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
'Content-Type': 'application/json',
|
47 |
})
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
class HumeException(Exception):
|
51 |
"""Custom exception for errors related to the Hume TTS API."""
|
@@ -78,11 +97,11 @@ def text_to_speech_with_hume(prompt: str, text: str) -> bytes:
|
|
78 |
Raises:
|
79 |
HumeException: If there is an error communicating with the Hume TTS API.
|
80 |
"""
|
81 |
-
logger.debug(f
|
82 |
|
83 |
request_body = {
|
84 |
-
|
85 |
-
|
86 |
# "voice_description": prompt, # <-- breaking request!?
|
87 |
# "format": hume_config.audio_format, # <-- breaking request!?
|
88 |
}
|
@@ -96,26 +115,26 @@ def text_to_speech_with_hume(prompt: str, text: str) -> bytes:
|
|
96 |
|
97 |
# Validate response
|
98 |
if response.status_code != 200:
|
99 |
-
logger.error(f
|
100 |
-
raise HumeException(f
|
101 |
|
102 |
# Process audio response
|
103 |
-
if response.headers.get(
|
104 |
audio_data = response.content # Raw binary audio data
|
105 |
-
logger.info(f
|
106 |
return audio_data
|
107 |
|
108 |
# Unexpected content type
|
109 |
-
raise HumeException(f
|
110 |
|
111 |
except requests.exceptions.RequestException as e:
|
112 |
-
logger.exception(
|
113 |
raise HumeException(
|
114 |
-
message=f
|
115 |
original_exception=e,
|
116 |
)
|
117 |
except Exception as e:
|
118 |
-
logger.exception(
|
119 |
raise HumeException(
|
120 |
message=f"Unexpected error while processing the Hume TTS response: {e}",
|
121 |
original_exception=e,
|
|
|
21 |
# Standard Library Imports
|
22 |
from dataclasses import dataclass
|
23 |
import logging
|
24 |
+
import random
|
25 |
+
from typing import List, Optional
|
26 |
# Third-Party Library Imports
|
27 |
import requests
|
28 |
from tenacity import retry, stop_after_attempt, wait_fixed, before_log, after_log
|
|
|
33 |
|
34 |
@dataclass(frozen=True)
|
35 |
class HumeConfig:
|
36 |
+
"""Immutable configuration for interacting with the Hume TTS API."""
|
37 |
+
tts_endpoint_url: str = 'https://api.hume.ai/v0/tts'
|
38 |
+
api_key: str = validate_env_var('HUME_API_KEY')
|
39 |
+
voices: List[str] = ('ITO', 'KORA', 'DACHER') # List of available Hume voices
|
40 |
audio_format: str = 'wav'
|
41 |
+
headers: dict = None # Headers for the API requests
|
42 |
|
43 |
def __post_init__(self):
|
44 |
+
# Validate required attributes
|
45 |
+
if not self.api_key:
|
46 |
+
raise ValueError('Hume API key is not set.')
|
47 |
+
if not self.voices:
|
48 |
+
raise ValueError('Hume voices list is empty. Please provide at least one voice.')
|
49 |
+
if not self.audio_format:
|
50 |
+
raise ValueError('Hume audio format is not set.')
|
51 |
+
|
52 |
+
# Set headers dynamically after validation
|
53 |
+
object.__setattr__(self, 'headers', {
|
54 |
+
'X-Hume-Api-Key': f'{self.api_key}',
|
55 |
'Content-Type': 'application/json',
|
56 |
})
|
57 |
|
58 |
+
@property
|
59 |
+
def random_voice(self) -> str:
|
60 |
+
"""
|
61 |
+
Randomly selects a voice from the available voices.
|
62 |
+
|
63 |
+
Returns:
|
64 |
+
str: A randomly chosen voice name.
|
65 |
+
"""
|
66 |
+
return random.choice(self.voices)
|
67 |
+
|
68 |
|
69 |
class HumeException(Exception):
|
70 |
"""Custom exception for errors related to the Hume TTS API."""
|
|
|
97 |
Raises:
|
98 |
HumeException: If there is an error communicating with the Hume TTS API.
|
99 |
"""
|
100 |
+
logger.debug(f'Processing TTS with Hume. Prompt length: {len(prompt)} characters. Text length: {len(text)} characters.')
|
101 |
|
102 |
request_body = {
|
103 |
+
'text': text,
|
104 |
+
'voice': {'name': hume_config.random_voice},
|
105 |
# "voice_description": prompt, # <-- breaking request!?
|
106 |
# "format": hume_config.audio_format, # <-- breaking request!?
|
107 |
}
|
|
|
115 |
|
116 |
# Validate response
|
117 |
if response.status_code != 200:
|
118 |
+
logger.error(f'Hume TTS API Error: {response.status_code} - {response.text[:200]}... (truncated)')
|
119 |
+
raise HumeException(f'Hume TTS API responded with status {response.status_code}: {response.text}')
|
120 |
|
121 |
# Process audio response
|
122 |
+
if response.headers.get('Content-Type', '').startswith('audio/'):
|
123 |
audio_data = response.content # Raw binary audio data
|
124 |
+
logger.info(f'Received audio data from Hume ({len(response.content)} bytes).')
|
125 |
return audio_data
|
126 |
|
127 |
# Unexpected content type
|
128 |
+
raise HumeException(f'Unexpected Content-Type: {response.headers.get("Content-Type", "Unknown")}')
|
129 |
|
130 |
except requests.exceptions.RequestException as e:
|
131 |
+
logger.exception('Request to Hume TTS API failed.')
|
132 |
raise HumeException(
|
133 |
+
message=f'Failed to communicate with Hume TTS API: {e}',
|
134 |
original_exception=e,
|
135 |
)
|
136 |
except Exception as e:
|
137 |
+
logger.exception('Request to Hume TTS API failed.')
|
138 |
raise HumeException(
|
139 |
message=f"Unexpected error while processing the Hume TTS response: {e}",
|
140 |
original_exception=e,
|
src/sample_prompts.py
CHANGED
@@ -6,26 +6,26 @@ These prompts are structured to highlight different aspects of emotional tone, p
|
|
6 |
"""
|
7 |
|
8 |
SAMPLE_PROMPTS = {
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
|
18 |
"🐱 Whimsical Children's Story (Talking Cat)":
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
}
|
|
|
6 |
"""
|
7 |
|
8 |
SAMPLE_PROMPTS = {
|
9 |
+
'🚀 Dramatic Monologue (Stranded Astronaut)':
|
10 |
+
'Write a short dramatic monologue from a lone astronaut stranded on Mars, '
|
11 |
+
'speaking to mission control for the last time. The tone should be reflective, '
|
12 |
+
'filled with awe and resignation as they describe the Martian landscape and their final thoughts.',
|
13 |
|
14 |
+
'📜 Poetic Sonnet (The Passage of Time)':
|
15 |
+
'Compose a sonnet about the passage of time, using vivid imagery and a flowing, melodic rhythm. '
|
16 |
+
'The poem should contrast fleeting moments with eternity, capturing both beauty and melancholy.',
|
17 |
|
18 |
"🐱 Whimsical Children's Story (Talking Cat)":
|
19 |
+
'Tell a short bedtime story about a mischievous talking cat who sneaks into a grand wizard’s library '
|
20 |
+
'at night and accidentally casts a spell that brings the books to life. '
|
21 |
+
'Make the tone playful, whimsical, and filled with wonder.',
|
22 |
|
23 |
+
'🔥 Intense Speech (Freedom & Justice)':
|
24 |
+
'Write a powerful speech delivered by a rebel leader rallying their people against a tyrant. '
|
25 |
+
'The speech should be passionate, filled with urgency and conviction, calling for freedom and justice.',
|
26 |
|
27 |
+
'👻 Mysterious Horror Scene (Haunted Lighthouse)':
|
28 |
+
'Describe a chilling ghostly encounter in an abandoned lighthouse on a foggy night. '
|
29 |
+
'The protagonist, alone and cold, begins hearing whispers from the shadows, '
|
30 |
+
'telling them secrets they were never meant to know.'
|
31 |
}
|
src/utils.py
CHANGED
@@ -40,14 +40,14 @@ def truncate_text(text: str, max_length: int = 50) -> str:
|
|
40 |
''
|
41 |
"""
|
42 |
if max_length <= 0:
|
43 |
-
logger.warning(f
|
44 |
-
return
|
45 |
|
46 |
is_truncated = len(text) > max_length
|
47 |
if is_truncated:
|
48 |
-
logger.debug(f
|
49 |
|
50 |
-
return text[:max_length] + (
|
51 |
|
52 |
|
53 |
def validate_env_var(var_name: str) -> str:
|
@@ -74,9 +74,9 @@ def validate_env_var(var_name: str) -> str:
|
|
74 |
...
|
75 |
ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
|
76 |
"""
|
77 |
-
value = os.environ.get(var_name,
|
78 |
if not value:
|
79 |
-
raise ValueError(f
|
80 |
return value
|
81 |
|
82 |
|
@@ -99,21 +99,21 @@ def validate_prompt_length(prompt: str, max_length: int, min_length: int) -> Non
|
|
99 |
>>> validate_prompt_length("", max_length=500, min_length=1)
|
100 |
# Raises ValueError: "Prompt must be at least 1 character(s) long."
|
101 |
"""
|
102 |
-
logger.debug(f
|
103 |
|
104 |
# Check if prompt is empty or too short
|
105 |
stripped_prompt = prompt.strip()
|
106 |
if len(stripped_prompt) < min_length:
|
107 |
raise ValueError(
|
108 |
-
f
|
109 |
-
f
|
110 |
)
|
111 |
|
112 |
# Check if prompt is too long
|
113 |
if len(stripped_prompt) > max_length:
|
114 |
raise ValueError(
|
115 |
-
f
|
116 |
-
f
|
117 |
)
|
118 |
|
119 |
-
logger.debug(f
|
|
|
40 |
''
|
41 |
"""
|
42 |
if max_length <= 0:
|
43 |
+
logger.warning(f'Invalid max_length={max_length}. Returning empty string.')
|
44 |
+
return ''
|
45 |
|
46 |
is_truncated = len(text) > max_length
|
47 |
if is_truncated:
|
48 |
+
logger.debug(f'Truncated text to {max_length} characters.')
|
49 |
|
50 |
+
return text[:max_length] + ('...' if is_truncated else '')
|
51 |
|
52 |
|
53 |
def validate_env_var(var_name: str) -> str:
|
|
|
74 |
...
|
75 |
ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
|
76 |
"""
|
77 |
+
value = os.environ.get(var_name, '')
|
78 |
if not value:
|
79 |
+
raise ValueError(f'{var_name} is not set. Please ensure it is defined in your environment variables.')
|
80 |
return value
|
81 |
|
82 |
|
|
|
99 |
>>> validate_prompt_length("", max_length=500, min_length=1)
|
100 |
# Raises ValueError: "Prompt must be at least 1 character(s) long."
|
101 |
"""
|
102 |
+
logger.debug(f'Prompt length being validated: {len(prompt)} characters')
|
103 |
|
104 |
# Check if prompt is empty or too short
|
105 |
stripped_prompt = prompt.strip()
|
106 |
if len(stripped_prompt) < min_length:
|
107 |
raise ValueError(
|
108 |
+
f'Prompt must be at least {min_length} character(s) long. '
|
109 |
+
f'Received only {len(stripped_prompt)}.'
|
110 |
)
|
111 |
|
112 |
# Check if prompt is too long
|
113 |
if len(stripped_prompt) > max_length:
|
114 |
raise ValueError(
|
115 |
+
f'The prompt exceeds the maximum allowed length of {max_length} characters. '
|
116 |
+
f'Your prompt contains {len(stripped_prompt)} characters.'
|
117 |
)
|
118 |
|
119 |
+
logger.debug(f'Prompt length validation passed for prompt: {truncate_text(stripped_prompt)}')
|