zach commited on
Commit
e9bcee8
·
1 Parent(s): e560bf3

Update Hume integration to randmonly pick a voice from a predefined list of top voices

Browse files
src/app.py CHANGED
@@ -42,14 +42,14 @@ def process_prompt(prompt: str) -> str:
42
  Returns:
43
  tuple: The generated text and audio data from both Hume and ElevenLabs.
44
  """
45
- logger.info(f"Processing prompt: {truncate_text(prompt, max_length=100)}")
46
  try:
47
  # Validate prompt length before processing
48
  validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
49
 
50
  # Generate text with Claude API
51
  generated_text = generate_text_with_claude(prompt)
52
- logger.info(f"Generated text (length={len(generated_text)} characters).")
53
 
54
  # Run TTS requests in parallel
55
  with ThreadPoolExecutor(max_workers=2) as executor:
@@ -60,15 +60,15 @@ def process_prompt(prompt: str) -> str:
60
  hume_audio = hume_future.result()
61
  elevenlabs_audio = elevenlabs_future.result()
62
 
63
- logger.info(f"TTS audio generated successfully: Hume={len(hume_audio)} bytes, ElevenLabs={len(elevenlabs_audio)} bytes")
64
  return generated_text, hume_audio, elevenlabs_audio
65
 
66
  except ValueError as ve:
67
- logger.warning(f"Validation error: {ve}")
68
  return str(ve), None, None # Return validation error directly to the UI
69
  except Exception as e:
70
- logger.error(f"Unexpected error during processing: {e}")
71
- return "An unexpected error occurred. Please try again.", None, None
72
 
73
 
74
  def build_gradio_interface() -> gr.Blocks:
@@ -81,16 +81,16 @@ def build_gradio_interface() -> gr.Blocks:
81
  with gr.Blocks() as demo:
82
  gr.Markdown("# TTS Arena")
83
  gr.Markdown(
84
- "Generate text from a prompt using **Claude by Anthropic**, "
85
- "and listen to the generated text-to-speech using **Hume TTS API** "
86
- "and **ElevenLabs TTS API** for comparison."
87
  )
88
 
89
  with gr.Row():
90
  # Dropdown for predefined prompts
91
  sample_prompt_dropdown = gr.Dropdown(
92
  choices=list(SAMPLE_PROMPTS.keys()),
93
- label="Choose a Sample Prompt (or enter your own below)",
94
  value=None,
95
  interactive=True
96
  )
@@ -98,26 +98,26 @@ def build_gradio_interface() -> gr.Blocks:
98
  with gr.Row():
99
  # Custom prompt input
100
  prompt_input = gr.Textbox(
101
- label="Enter your prompt",
102
- placeholder="Or type your own prompt here...",
103
  lines=2,
104
  )
105
 
106
  with gr.Row():
107
- generate_button = gr.Button("Generate")
108
 
109
  # Display the generated text and audio side by side
110
  with gr.Row():
111
  output_text = gr.Textbox(
112
- label="Generated Text",
113
  interactive=False,
114
  lines=12,
115
  max_lines=24,
116
  scale=2,
117
  )
118
  with gr.Column(scale=1):
119
- hume_audio_output = gr.Audio(label="Hume TTS Audio", type="filepath")
120
- elevenlabs_audio_output = gr.Audio(label="ElevenLabs TTS Audio", type="filepath")
121
 
122
  # Auto-fill the text input when a sample is selected
123
  sample_prompt_dropdown.change(
@@ -133,11 +133,11 @@ def build_gradio_interface() -> gr.Blocks:
133
  outputs=[output_text, hume_audio_output, elevenlabs_audio_output],
134
  )
135
 
136
- logger.debug("Gradio interface built successfully")
137
  return demo
138
 
139
 
140
- if __name__ == "__main__":
141
- logger.info("Launching TTS Arena Gradio app...")
142
  demo = build_gradio_interface()
143
  demo.launch()
 
42
  Returns:
43
  tuple: The generated text and audio data from both Hume and ElevenLabs.
44
  """
45
+ logger.info(f'Processing prompt: {truncate_text(prompt, max_length=100)}')
46
  try:
47
  # Validate prompt length before processing
48
  validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
49
 
50
  # Generate text with Claude API
51
  generated_text = generate_text_with_claude(prompt)
52
+ logger.info(f'Generated text (length={len(generated_text)} characters).')
53
 
54
  # Run TTS requests in parallel
55
  with ThreadPoolExecutor(max_workers=2) as executor:
 
60
  hume_audio = hume_future.result()
61
  elevenlabs_audio = elevenlabs_future.result()
62
 
63
+ logger.info(f'TTS audio generated successfully: Hume={len(hume_audio)} bytes, ElevenLabs={len(elevenlabs_audio)} bytes')
64
  return generated_text, hume_audio, elevenlabs_audio
65
 
66
  except ValueError as ve:
67
+ logger.warning(f'Validation error: {ve}')
68
  return str(ve), None, None # Return validation error directly to the UI
69
  except Exception as e:
70
+ logger.error(f'Unexpected error during processing: {e}')
71
+ return 'An unexpected error occurred. Please try again.', None, None
72
 
73
 
74
  def build_gradio_interface() -> gr.Blocks:
 
81
  with gr.Blocks() as demo:
82
  gr.Markdown("# TTS Arena")
83
  gr.Markdown(
84
+ 'Generate text from a prompt using **Claude by Anthropic**, '
85
+ 'and listen to the generated text-to-speech using **Hume TTS API** '
86
+ 'and **ElevenLabs TTS API** for comparison.'
87
  )
88
 
89
  with gr.Row():
90
  # Dropdown for predefined prompts
91
  sample_prompt_dropdown = gr.Dropdown(
92
  choices=list(SAMPLE_PROMPTS.keys()),
93
+ label='Choose a Sample Prompt (or enter your own below)',
94
  value=None,
95
  interactive=True
96
  )
 
98
  with gr.Row():
99
  # Custom prompt input
100
  prompt_input = gr.Textbox(
101
+ label='Enter your prompt',
102
+ placeholder='Or type your own prompt here...',
103
  lines=2,
104
  )
105
 
106
  with gr.Row():
107
+ generate_button = gr.Button('Generate')
108
 
109
  # Display the generated text and audio side by side
110
  with gr.Row():
111
  output_text = gr.Textbox(
112
+ label='Generated Text',
113
  interactive=False,
114
  lines=12,
115
  max_lines=24,
116
  scale=2,
117
  )
118
  with gr.Column(scale=1):
119
+ hume_audio_output = gr.Audio(label='Hume TTS Audio', type='filepath')
120
+ elevenlabs_audio_output = gr.Audio(label='ElevenLabs TTS Audio', type='filepath')
121
 
122
  # Auto-fill the text input when a sample is selected
123
  sample_prompt_dropdown.change(
 
133
  outputs=[output_text, hume_audio_output, elevenlabs_audio_output],
134
  )
135
 
136
+ logger.debug('Gradio interface built successfully')
137
  return demo
138
 
139
 
140
+ if __name__ == '__main__':
141
+ logger.info('Launching TTS Arena Gradio app...')
142
  demo = build_gradio_interface()
143
  demo.launch()
src/config.py CHANGED
@@ -25,10 +25,10 @@ load_dotenv()
25
 
26
 
27
  # Enable debugging mode based on an environment variable
28
- debug_raw = os.getenv("DEBUG", "false").lower()
29
- if debug_raw not in {"true", "false"}:
30
- print(f"Warning: Invalid DEBUG value '{debug_raw}'. Defaulting to 'false'.")
31
- DEBUG = debug_raw == "true"
32
 
33
 
34
  # Configure the logger
@@ -36,8 +36,8 @@ logging.basicConfig(
36
  level=logging.DEBUG if DEBUG else logging.INFO,
37
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
38
  )
39
- logger: logging.Logger = logging.getLogger("tts_arena")
40
- logger.info(f"Debug mode is {'enabled' if DEBUG else 'disabled'}.")
41
 
42
 
43
  # Log environment variables
@@ -49,7 +49,7 @@ def log_env_variable(var_name: str, value: str) -> None:
49
  var_name (str): The name of the environment variable.
50
  value (str): The value of the environment variable.
51
  """
52
- logger.debug(f"Environment variable '{var_name}' validated with value: {value}")
53
 
54
  if DEBUG:
55
- logger.debug(f"DEBUG mode enabled.")
 
25
 
26
 
27
  # Enable debugging mode based on an environment variable
28
+ debug_raw = os.getenv('DEBUG', 'false').lower()
29
+ if debug_raw not in {'true', 'false'}:
30
+ print(f'Warning: Invalid DEBUG value "{debug_raw}". Defaulting to "false".')
31
+ DEBUG = debug_raw == 'true'
32
 
33
 
34
  # Configure the logger
 
36
  level=logging.DEBUG if DEBUG else logging.INFO,
37
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
38
  )
39
+ logger: logging.Logger = logging.getLogger('tts_arena')
40
+ logger.info(f'Debug mode is {"enabled" if DEBUG else "disabled"}.')
41
 
42
 
43
  # Log environment variables
 
49
  var_name (str): The name of the environment variable.
50
  value (str): The value of the environment variable.
51
  """
52
+ logger.debug(f'Environment variable "{var_name}" validated with value: {value}')
53
 
54
  if DEBUG:
55
+ logger.debug(f'DEBUG mode enabled.')
src/integrations/anthropic_api.py CHANGED
@@ -38,8 +38,8 @@ class AnthropicConfig:
38
  Immutable configuration for interacting with the Anthropic API.
39
  Includes client initialization for encapsulation.
40
  """
41
- api_key: str = validate_env_var("ANTHROPIC_API_KEY")
42
- model: ModelParam = "claude-3-5-sonnet-latest" # Valid predefined model
43
  max_tokens: int = 300 # Max tokens for API response
44
  system_prompt: str = """You are a highly creative and articulate assistant specialized in generating vivid, engaging, and well-written content.
45
 
@@ -62,13 +62,13 @@ Always keep your responses concise, unless explicitly instructed to elaborate.""
62
  def __post_init__(self):
63
  # Validate that required attributes are set
64
  if not self.api_key:
65
- raise ValueError("Anthropic API key is not set.")
66
  if not self.model:
67
- raise ValueError("Anthropic Model is not set.")
68
  if not self.max_tokens:
69
- raise ValueError("Anthropic Max Tokens is not set.")
70
  if not self.system_prompt:
71
- raise ValueError("Anthropic System Prompt is not set.")
72
 
73
  @property
74
  def client(self) -> Anthropic:
@@ -119,44 +119,44 @@ def generate_text_with_claude(prompt: str) -> str:
119
  >>> generate_text_with_claude("")
120
  "The prompt exceeds the maximum allowed length of 500 characters. Your prompt contains 512 characters."
121
  """
122
- logger.debug(f"Generating text with Claude. Prompt length: {len(prompt)} characters.")
123
 
124
  try:
125
  response: Message = anthropic_config.client.messages.create(
126
  model=anthropic_config.model,
127
  max_tokens=anthropic_config.max_tokens,
128
  system=anthropic_config.system_prompt,
129
- messages=[{"role": "user", "content": prompt}],
130
  )
131
- logger.debug(f"API response received: {truncate_text(str(response))}")
132
 
133
  # Validate response content
134
- if not hasattr(response, "content"):
135
  logger.error("Response is missing 'content'. Response: %s", response)
136
- raise AnthropicError("Invalid API response: Missing 'content'.")
137
 
138
  # Process response content
139
  blocks: Union[List[TextBlock], TextBlock, None] = response.content
140
 
141
  if isinstance(blocks, list):
142
- result = "\n\n".join(block.text for block in blocks if isinstance(block, TextBlock))
143
- logger.debug(f"Processed response from list: {truncate_text(result)}")
144
  return result
145
  if isinstance(blocks, TextBlock):
146
- logger.debug(f"Processed response from single TextBlock: {truncate_text(blocks.text)}")
147
  return blocks.text
148
 
149
- logger.warning(f"Unexpected response type: {type(blocks)}")
150
- return str(blocks or "No content generated.")
151
 
152
  except Exception as e:
153
- logger.exception(f"Error generating text with Claude: {e}")
154
  raise AnthropicError(
155
  message=(
156
- f"Error generating text with Claude: {e}. "
157
- f"HTTP Status: {getattr(response, 'status', 'N/A')}. "
158
- f"Prompt (truncated): {truncate_text(prompt)}. "
159
- f"Model: {anthropic_config.model}, Max tokens: {anthropic_config.max_tokens}"
160
  ),
161
  original_exception=e,
162
  )
 
38
  Immutable configuration for interacting with the Anthropic API.
39
  Includes client initialization for encapsulation.
40
  """
41
+ api_key: str = validate_env_var('ANTHROPIC_API_KEY')
42
+ model: ModelParam = 'claude-3-5-sonnet-latest' # Valid predefined model
43
  max_tokens: int = 300 # Max tokens for API response
44
  system_prompt: str = """You are a highly creative and articulate assistant specialized in generating vivid, engaging, and well-written content.
45
 
 
62
  def __post_init__(self):
63
  # Validate that required attributes are set
64
  if not self.api_key:
65
+ raise ValueError('Anthropic API key is not set.')
66
  if not self.model:
67
+ raise ValueError('Anthropic Model is not set.')
68
  if not self.max_tokens:
69
+ raise ValueError('Anthropic Max Tokens is not set.')
70
  if not self.system_prompt:
71
+ raise ValueError('Anthropic System Prompt is not set.')
72
 
73
  @property
74
  def client(self) -> Anthropic:
 
119
  >>> generate_text_with_claude("")
120
  "The prompt exceeds the maximum allowed length of 500 characters. Your prompt contains 512 characters."
121
  """
122
+ logger.debug(f'Generating text with Claude. Prompt length: {len(prompt)} characters.')
123
 
124
  try:
125
  response: Message = anthropic_config.client.messages.create(
126
  model=anthropic_config.model,
127
  max_tokens=anthropic_config.max_tokens,
128
  system=anthropic_config.system_prompt,
129
+ messages=[{'role': 'user', 'content': prompt}],
130
  )
131
+ logger.debug(f'API response received: {truncate_text(str(response))}')
132
 
133
  # Validate response content
134
+ if not hasattr(response, 'content'):
135
  logger.error("Response is missing 'content'. Response: %s", response)
136
+ raise AnthropicError('Invalid API response: Missing "content".')
137
 
138
  # Process response content
139
  blocks: Union[List[TextBlock], TextBlock, None] = response.content
140
 
141
  if isinstance(blocks, list):
142
+ result = '\n\n'.join(block.text for block in blocks if isinstance(block, TextBlock))
143
+ logger.debug(f'Processed response from list: {truncate_text(result)}')
144
  return result
145
  if isinstance(blocks, TextBlock):
146
+ logger.debug(f'Processed response from single TextBlock: {truncate_text(blocks.text)}')
147
  return blocks.text
148
 
149
+ logger.warning(f'Unexpected response type: {type(blocks)}')
150
+ return str(blocks or 'No content generated.')
151
 
152
  except Exception as e:
153
+ logger.exception(f'Error generating text with Claude: {e}')
154
  raise AnthropicError(
155
  message=(
156
+ f'Error generating text with Claude: {e}. '
157
+ f'HTTP Status: {getattr(response, "status", "N/A")}. '
158
+ f'Prompt (truncated): {truncate_text(prompt)}. '
159
+ f'Model: {anthropic_config.model}, Max tokens: {anthropic_config.max_tokens}'
160
  ),
161
  original_exception=e,
162
  )
src/integrations/elevenlabs_api.py CHANGED
@@ -34,26 +34,26 @@ from src.utils import validate_env_var, truncate_text
34
  @dataclass(frozen=True)
35
  class ElevenLabsConfig:
36
  """Immutable configuration for interacting with the ElevenLabs TTS API."""
37
- api_key: str = validate_env_var("ELEVENLABS_API_KEY")
38
- model_id: str = "eleven_multilingual_v2" # ElevenLab's most emotionally expressive model
39
- output_format: str = "mp3_44100_128" # Output format of the generated audio.
40
  top_voices: list[str] = (
41
- "pNInz6obpgDQGcFmaJgB", # Adam
42
- "ErXwobaYiN019PkySvjV", # Antoni
43
- "21m00Tcm4TlvDq8ikWAM", # Rachel
44
- "XrExE9yKIg1WjnnlVkGX", # Matilda
45
  )
46
 
47
  def __post_init__(self):
48
  # Validate that required attributes are set
49
  if not self.api_key:
50
- raise ValueError("ElevenLabs API key is not set.")
51
  if not self.model_id:
52
- raise ValueError("ElevenLabs Model ID is not set.")
53
  if not self.output_format:
54
- raise ValueError("ElevenLabs Output Format is not set.")
55
  if not self.top_voices:
56
- raise ValueError("ElevenLabs Top Voices are not set.")
57
 
58
  @property
59
  def client(self) -> ElevenLabs:
@@ -103,7 +103,7 @@ def text_to_speech_with_elevenlabs(text: str) -> bytes:
103
  Raises:
104
  ElevenLabsException: If there is an error communicating with the ElevenLabs API or processing the response.
105
  """
106
- logger.debug(f"Generating speech with ElevenLabs. Text length: {len(text)} characters.")
107
 
108
  try:
109
  # Generate audio using the ElevenLabs SDK
@@ -115,24 +115,24 @@ def text_to_speech_with_elevenlabs(text: str) -> bytes:
115
  )
116
 
117
  # Ensure the response is an iterator
118
- if not hasattr(audio_iterator, "__iter__") or not hasattr(audio_iterator, "__next__"):
119
- logger.error("Invalid audio iterator response.")
120
- raise ElevenLabsException("Invalid audio iterator received from ElevenLabs API.")
121
 
122
  # Combine chunks into a single bytes object
123
- audio = b"".join(chunk for chunk in audio_iterator)
124
 
125
  # Validate audio
126
  if not audio:
127
- logger.error("No audio data received from ElevenLabs API.")
128
- raise ElevenLabsException("Empty audio data received from ElevenLabs API.")
129
 
130
- logger.info(f"Received ElevenLabs audio ({len(audio)} bytes).")
131
  return audio
132
 
133
  except Exception as e:
134
- logger.exception(f"Error generating speech: {e}")
135
  raise ElevenLabsException(
136
- message=f"Failed to generate audio with ElevenLabs: {e}",
137
  original_exception=e,
138
  )
 
34
  @dataclass(frozen=True)
35
  class ElevenLabsConfig:
36
  """Immutable configuration for interacting with the ElevenLabs TTS API."""
37
+ api_key: str = validate_env_var('ELEVENLABS_API_KEY')
38
+ model_id: str = 'eleven_multilingual_v2' # ElevenLab's most emotionally expressive model
39
+ output_format: str = 'mp3_44100_128' # Output format of the generated audio.
40
  top_voices: list[str] = (
41
+ 'pNInz6obpgDQGcFmaJgB', # Adam
42
+ 'ErXwobaYiN019PkySvjV', # Antoni
43
+ '21m00Tcm4TlvDq8ikWAM', # Rachel
44
+ 'XrExE9yKIg1WjnnlVkGX', # Matilda
45
  )
46
 
47
  def __post_init__(self):
48
  # Validate that required attributes are set
49
  if not self.api_key:
50
+ raise ValueError('ElevenLabs API key is not set.')
51
  if not self.model_id:
52
+ raise ValueError('ElevenLabs Model ID is not set.')
53
  if not self.output_format:
54
+ raise ValueError('ElevenLabs Output Format is not set.')
55
  if not self.top_voices:
56
+ raise ValueError('ElevenLabs Top Voices are not set.')
57
 
58
  @property
59
  def client(self) -> ElevenLabs:
 
103
  Raises:
104
  ElevenLabsException: If there is an error communicating with the ElevenLabs API or processing the response.
105
  """
106
+ logger.debug(f'Generating speech with ElevenLabs. Text length: {len(text)} characters.')
107
 
108
  try:
109
  # Generate audio using the ElevenLabs SDK
 
115
  )
116
 
117
  # Ensure the response is an iterator
118
+ if not hasattr(audio_iterator, '__iter__') or not hasattr(audio_iterator, '__next__'):
119
+ logger.error('Invalid audio iterator response.')
120
+ raise ElevenLabsException('Invalid audio iterator received from ElevenLabs API.')
121
 
122
  # Combine chunks into a single bytes object
123
+ audio = b''.join(chunk for chunk in audio_iterator)
124
 
125
  # Validate audio
126
  if not audio:
127
+ logger.error('No audio data received from ElevenLabs API.')
128
+ raise ElevenLabsException('Empty audio data received from ElevenLabs API.')
129
 
130
+ logger.info(f'Received ElevenLabs audio ({len(audio)} bytes).')
131
  return audio
132
 
133
  except Exception as e:
134
+ logger.exception(f'Error generating speech: {e}')
135
  raise ElevenLabsException(
136
+ message=f'Failed to generate audio with ElevenLabs: {e}',
137
  original_exception=e,
138
  )
src/integrations/hume_api.py CHANGED
@@ -21,7 +21,8 @@ Functions:
21
  # Standard Library Imports
22
  from dataclasses import dataclass
23
  import logging
24
- from typing import Optional
 
25
  # Third-Party Library Imports
26
  import requests
27
  from tenacity import retry, stop_after_attempt, wait_fixed, before_log, after_log
@@ -32,20 +33,38 @@ from src.utils import validate_env_var, truncate_text
32
 
33
  @dataclass(frozen=True)
34
  class HumeConfig:
35
- """Immutable configuration for interacting with the TTS API."""
36
- tts_endpoint_url: str = "https://api.hume.ai/v0/tts"
37
- api_key: str = validate_env_var("HUME_API_KEY")
38
- voice: str = "KORA"
39
  audio_format: str = 'wav'
40
- headers: dict = None
41
 
42
  def __post_init__(self):
43
- # Dynamically set headers after initialization
44
- object.__setattr__(self, "headers", {
45
- 'X-Hume-Api-Key': f"{self.api_key}",
 
 
 
 
 
 
 
 
46
  'Content-Type': 'application/json',
47
  })
48
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  class HumeException(Exception):
51
  """Custom exception for errors related to the Hume TTS API."""
@@ -78,11 +97,11 @@ def text_to_speech_with_hume(prompt: str, text: str) -> bytes:
78
  Raises:
79
  HumeException: If there is an error communicating with the Hume TTS API.
80
  """
81
- logger.debug(f"Processing TTS with Hume. Prompt length: {len(prompt)} characters. Text length: {len(text)} characters.")
82
 
83
  request_body = {
84
- "text": text,
85
- "voice": {"name": hume_config.voice},
86
  # "voice_description": prompt, # <-- breaking request!?
87
  # "format": hume_config.audio_format, # <-- breaking request!?
88
  }
@@ -96,26 +115,26 @@ def text_to_speech_with_hume(prompt: str, text: str) -> bytes:
96
 
97
  # Validate response
98
  if response.status_code != 200:
99
- logger.error(f"Hume TTS API Error: {response.status_code} - {response.text[:200]}... (truncated)")
100
- raise HumeException(f"Hume TTS API responded with status {response.status_code}: {response.text}")
101
 
102
  # Process audio response
103
- if response.headers.get("Content-Type", "").startswith("audio/"):
104
  audio_data = response.content # Raw binary audio data
105
- logger.info(f"Received audio data from Hume ({len(response.content)} bytes).")
106
  return audio_data
107
 
108
  # Unexpected content type
109
- raise HumeException(f"Unexpected Content-Type: {response.headers.get('Content-Type', 'Unknown')}")
110
 
111
  except requests.exceptions.RequestException as e:
112
- logger.exception("Request to Hume TTS API failed.")
113
  raise HumeException(
114
- message=f"Failed to communicate with Hume TTS API: {e}",
115
  original_exception=e,
116
  )
117
  except Exception as e:
118
- logger.exception("Request to Hume TTS API failed.")
119
  raise HumeException(
120
  message=f"Unexpected error while processing the Hume TTS response: {e}",
121
  original_exception=e,
 
21
  # Standard Library Imports
22
  from dataclasses import dataclass
23
  import logging
24
+ import random
25
+ from typing import List, Optional
26
  # Third-Party Library Imports
27
  import requests
28
  from tenacity import retry, stop_after_attempt, wait_fixed, before_log, after_log
 
33
 
34
  @dataclass(frozen=True)
35
  class HumeConfig:
36
+ """Immutable configuration for interacting with the Hume TTS API."""
37
+ tts_endpoint_url: str = 'https://api.hume.ai/v0/tts'
38
+ api_key: str = validate_env_var('HUME_API_KEY')
39
+ voices: List[str] = ('ITO', 'KORA', 'DACHER') # List of available Hume voices
40
  audio_format: str = 'wav'
41
+ headers: dict = None # Headers for the API requests
42
 
43
  def __post_init__(self):
44
+ # Validate required attributes
45
+ if not self.api_key:
46
+ raise ValueError('Hume API key is not set.')
47
+ if not self.voices:
48
+ raise ValueError('Hume voices list is empty. Please provide at least one voice.')
49
+ if not self.audio_format:
50
+ raise ValueError('Hume audio format is not set.')
51
+
52
+ # Set headers dynamically after validation
53
+ object.__setattr__(self, 'headers', {
54
+ 'X-Hume-Api-Key': f'{self.api_key}',
55
  'Content-Type': 'application/json',
56
  })
57
 
58
+ @property
59
+ def random_voice(self) -> str:
60
+ """
61
+ Randomly selects a voice from the available voices.
62
+
63
+ Returns:
64
+ str: A randomly chosen voice name.
65
+ """
66
+ return random.choice(self.voices)
67
+
68
 
69
  class HumeException(Exception):
70
  """Custom exception for errors related to the Hume TTS API."""
 
97
  Raises:
98
  HumeException: If there is an error communicating with the Hume TTS API.
99
  """
100
+ logger.debug(f'Processing TTS with Hume. Prompt length: {len(prompt)} characters. Text length: {len(text)} characters.')
101
 
102
  request_body = {
103
+ 'text': text,
104
+ 'voice': {'name': hume_config.random_voice},
105
  # "voice_description": prompt, # <-- breaking request!?
106
  # "format": hume_config.audio_format, # <-- breaking request!?
107
  }
 
115
 
116
  # Validate response
117
  if response.status_code != 200:
118
+ logger.error(f'Hume TTS API Error: {response.status_code} - {response.text[:200]}... (truncated)')
119
+ raise HumeException(f'Hume TTS API responded with status {response.status_code}: {response.text}')
120
 
121
  # Process audio response
122
+ if response.headers.get('Content-Type', '').startswith('audio/'):
123
  audio_data = response.content # Raw binary audio data
124
+ logger.info(f'Received audio data from Hume ({len(response.content)} bytes).')
125
  return audio_data
126
 
127
  # Unexpected content type
128
+ raise HumeException(f'Unexpected Content-Type: {response.headers.get("Content-Type", "Unknown")}')
129
 
130
  except requests.exceptions.RequestException as e:
131
+ logger.exception('Request to Hume TTS API failed.')
132
  raise HumeException(
133
+ message=f'Failed to communicate with Hume TTS API: {e}',
134
  original_exception=e,
135
  )
136
  except Exception as e:
137
+ logger.exception('Request to Hume TTS API failed.')
138
  raise HumeException(
139
  message=f"Unexpected error while processing the Hume TTS response: {e}",
140
  original_exception=e,
src/sample_prompts.py CHANGED
@@ -6,26 +6,26 @@ These prompts are structured to highlight different aspects of emotional tone, p
6
  """
7
 
8
  SAMPLE_PROMPTS = {
9
- "🚀 Dramatic Monologue (Stranded Astronaut)":
10
- "Write a short dramatic monologue from a lone astronaut stranded on Mars, "
11
- "speaking to mission control for the last time. The tone should be reflective, "
12
- "filled with awe and resignation as they describe the Martian landscape and their final thoughts.",
13
 
14
- "📜 Poetic Sonnet (The Passage of Time)":
15
- "Compose a sonnet about the passage of time, using vivid imagery and a flowing, melodic rhythm. "
16
- "The poem should contrast fleeting moments with eternity, capturing both beauty and melancholy.",
17
 
18
  "🐱 Whimsical Children's Story (Talking Cat)":
19
- "Tell a short bedtime story about a mischievous talking cat who sneaks into a grand wizard’s library "
20
- "at night and accidentally casts a spell that brings the books to life. "
21
- "Make the tone playful, whimsical, and filled with wonder.",
22
 
23
- "🔥 Intense Speech (Freedom & Justice)":
24
- "Write a powerful speech delivered by a rebel leader rallying their people against a tyrant. "
25
- "The speech should be passionate, filled with urgency and conviction, calling for freedom and justice.",
26
 
27
- "👻 Mysterious Horror Scene (Haunted Lighthouse)":
28
- "Describe a chilling ghostly encounter in an abandoned lighthouse on a foggy night. "
29
- "The protagonist, alone and cold, begins hearing whispers from the shadows, "
30
- "telling them secrets they were never meant to know."
31
  }
 
6
  """
7
 
8
  SAMPLE_PROMPTS = {
9
+ '🚀 Dramatic Monologue (Stranded Astronaut)':
10
+ 'Write a short dramatic monologue from a lone astronaut stranded on Mars, '
11
+ 'speaking to mission control for the last time. The tone should be reflective, '
12
+ 'filled with awe and resignation as they describe the Martian landscape and their final thoughts.',
13
 
14
+ '📜 Poetic Sonnet (The Passage of Time)':
15
+ 'Compose a sonnet about the passage of time, using vivid imagery and a flowing, melodic rhythm. '
16
+ 'The poem should contrast fleeting moments with eternity, capturing both beauty and melancholy.',
17
 
18
  "🐱 Whimsical Children's Story (Talking Cat)":
19
+ 'Tell a short bedtime story about a mischievous talking cat who sneaks into a grand wizard’s library '
20
+ 'at night and accidentally casts a spell that brings the books to life. '
21
+ 'Make the tone playful, whimsical, and filled with wonder.',
22
 
23
+ '🔥 Intense Speech (Freedom & Justice)':
24
+ 'Write a powerful speech delivered by a rebel leader rallying their people against a tyrant. '
25
+ 'The speech should be passionate, filled with urgency and conviction, calling for freedom and justice.',
26
 
27
+ '👻 Mysterious Horror Scene (Haunted Lighthouse)':
28
+ 'Describe a chilling ghostly encounter in an abandoned lighthouse on a foggy night. '
29
+ 'The protagonist, alone and cold, begins hearing whispers from the shadows, '
30
+ 'telling them secrets they were never meant to know.'
31
  }
src/utils.py CHANGED
@@ -40,14 +40,14 @@ def truncate_text(text: str, max_length: int = 50) -> str:
40
  ''
41
  """
42
  if max_length <= 0:
43
- logger.warning(f"Invalid max_length={max_length}. Returning empty string.")
44
- return ""
45
 
46
  is_truncated = len(text) > max_length
47
  if is_truncated:
48
- logger.debug(f"Truncated text to {max_length} characters.")
49
 
50
- return text[:max_length] + ("..." if is_truncated else "")
51
 
52
 
53
  def validate_env_var(var_name: str) -> str:
@@ -74,9 +74,9 @@ def validate_env_var(var_name: str) -> str:
74
  ...
75
  ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
76
  """
77
- value = os.environ.get(var_name, "")
78
  if not value:
79
- raise ValueError(f"{var_name} is not set. Please ensure it is defined in your environment variables.")
80
  return value
81
 
82
 
@@ -99,21 +99,21 @@ def validate_prompt_length(prompt: str, max_length: int, min_length: int) -> Non
99
  >>> validate_prompt_length("", max_length=500, min_length=1)
100
  # Raises ValueError: "Prompt must be at least 1 character(s) long."
101
  """
102
- logger.debug(f"Prompt length being validated: {len(prompt)} characters")
103
 
104
  # Check if prompt is empty or too short
105
  stripped_prompt = prompt.strip()
106
  if len(stripped_prompt) < min_length:
107
  raise ValueError(
108
- f"Prompt must be at least {min_length} character(s) long. "
109
- f"Received only {len(stripped_prompt)}."
110
  )
111
 
112
  # Check if prompt is too long
113
  if len(stripped_prompt) > max_length:
114
  raise ValueError(
115
- f"The prompt exceeds the maximum allowed length of {max_length} characters. "
116
- f"Your prompt contains {len(stripped_prompt)} characters."
117
  )
118
 
119
- logger.debug(f"Prompt length validation passed for prompt: {truncate_text(stripped_prompt)}")
 
40
  ''
41
  """
42
  if max_length <= 0:
43
+ logger.warning(f'Invalid max_length={max_length}. Returning empty string.')
44
+ return ''
45
 
46
  is_truncated = len(text) > max_length
47
  if is_truncated:
48
+ logger.debug(f'Truncated text to {max_length} characters.')
49
 
50
+ return text[:max_length] + ('...' if is_truncated else '')
51
 
52
 
53
  def validate_env_var(var_name: str) -> str:
 
74
  ...
75
  ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
76
  """
77
+ value = os.environ.get(var_name, '')
78
  if not value:
79
+ raise ValueError(f'{var_name} is not set. Please ensure it is defined in your environment variables.')
80
  return value
81
 
82
 
 
99
  >>> validate_prompt_length("", max_length=500, min_length=1)
100
  # Raises ValueError: "Prompt must be at least 1 character(s) long."
101
  """
102
+ logger.debug(f'Prompt length being validated: {len(prompt)} characters')
103
 
104
  # Check if prompt is empty or too short
105
  stripped_prompt = prompt.strip()
106
  if len(stripped_prompt) < min_length:
107
  raise ValueError(
108
+ f'Prompt must be at least {min_length} character(s) long. '
109
+ f'Received only {len(stripped_prompt)}.'
110
  )
111
 
112
  # Check if prompt is too long
113
  if len(stripped_prompt) > max_length:
114
  raise ValueError(
115
+ f'The prompt exceeds the maximum allowed length of {max_length} characters. '
116
+ f'Your prompt contains {len(stripped_prompt)} characters.'
117
  )
118
 
119
+ logger.debug(f'Prompt length validation passed for prompt: {truncate_text(stripped_prompt)}')