zach commited on
Commit
a375dbf
·
1 Parent(s): fa43e81

Adds ElevenLabs integration

Browse files
src/integrations/__init__.py CHANGED
@@ -1,2 +1,3 @@
1
  from .anthropic_api import generate_text_with_claude
2
- from .hume_api import text_to_speech_with_hume
 
 
1
  from .anthropic_api import generate_text_with_claude
2
+ from .hume_api import text_to_speech_with_hume
3
+ from .elevenlabs_api import text_to_speech_with_elevenlabs
src/integrations/elevenlabs_api.py CHANGED
@@ -1 +1,129 @@
1
- # coming soon...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ elevenlabs_api.py
3
+
4
+ This file defines the interaction with the ElevenLabs TTS API using the ElevenLabs Python SDK.
5
+ It includes functionality for API request handling and processing API responses.
6
+
7
+ Key Features:
8
+ - Encapsulates all logic related to the ElevenLabs TTS API.
9
+ - Implements retry logic for handling transient API errors.
10
+ - Handles received audio and processes it for playback on the web.
11
+ - Provides detailed logging for debugging and error tracking.
12
+
13
+ Classes:
14
+ - ElevenLabsException: Custom exception for TTS API-related errors.
15
+ - ElevenLabsConfig: Immutable configuration for interacting with the TTS API.
16
+
17
+ Functions:
18
+ - text_to_speech_with_elevenlabs: Converts text to speech using the ElevenLabs TTS API.
19
+ """
20
+
21
+ # Standard Library Imports
22
+ from dataclasses import dataclass
23
+ import logging
24
+ from typing import Optional
25
+ # Third-Party Library Imports
26
+ from elevenlabs import ElevenLabs
27
+ from tenacity import retry, stop_after_attempt, wait_fixed, before_log, after_log
28
+ # Local Application Imports
29
+ from src.config import logger
30
+ from src.utils import validate_env_var, truncate_text
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class ElevenLabsConfig:
35
+ """Immutable configuration for interacting with the ElevenLabs TTS API."""
36
+ api_key: str = validate_env_var("ELEVENLABS_API_KEY")
37
+ voice_id: str = "pNInz6obpgDQGcFmaJgB" # Adam (popular ElevenLabs pre-made voice)
38
+ model_id: str = "eleven_multilingual_v2" # ElevenLab's most emotionally expressive model
39
+ output_format: str = "mp3_44100_128" # Output format of the generated audio.
40
+
41
+ def __post_init__(self):
42
+ # Validate that required attributes are set
43
+ if not self.api_key:
44
+ raise ValueError("ElevenLabs API key is not set.")
45
+ if not self.voice_id:
46
+ raise ValueError("ElevenLabs Voice ID is not set.")
47
+ if not self.model_id:
48
+ raise ValueError("ElevenLabs Model ID is not set.")
49
+
50
+ @property
51
+ def client(self) -> ElevenLabs:
52
+ """
53
+ Lazy initialization of the ElevenLabs client.
54
+
55
+ Returns:
56
+ ElevenLabs: Configured client instance.
57
+ """
58
+ return ElevenLabs(api_key=self.api_key)
59
+
60
+
61
+ class ElevenLabsException(Exception):
62
+ """Custom exception for errors related to the ElevenLabs TTS API."""
63
+ def __init__(self, message: str, original_exception: Optional[Exception] = None):
64
+ super().__init__(message)
65
+ self.original_exception = original_exception
66
+
67
+
68
+ # Initialize the ElevenLabs client
69
+ elevenlabs_config = ElevenLabsConfig()
70
+
71
+
72
+ @retry(
73
+ stop=stop_after_attempt(3),
74
+ wait=wait_fixed(2),
75
+ before=before_log(logger, logging.DEBUG),
76
+ after=after_log(logger, logging.DEBUG),
77
+ )
78
+ def text_to_speech_with_elevenlabs(text: str) -> bytes:
79
+ """
80
+ Converts text to speech using the ElevenLabs TTS API.
81
+
82
+ Args:
83
+ text (str): The text to be converted to speech.
84
+
85
+ Returns:
86
+ bytes: The raw binary audio data for playback.
87
+
88
+ Raises:
89
+ ElevenLabsException: If there is an error communicating with the ElevenLabs API or processing the response.
90
+ """
91
+ logger.debug(f"Generated text for TTS: {truncate_text(text)}")
92
+ logger.debug(f"Using Voice ID: {elevenlabs_config.voice_id}")
93
+ logger.debug(f"Using Model ID: {elevenlabs_config.model_id}")
94
+ logger.debug(f"Using Output Format: {elevenlabs_config.output_format}")
95
+
96
+ try:
97
+ # Generate audio using the ElevenLabs SDK
98
+ audio_iterator = elevenlabs_config.client.text_to_speech.convert(
99
+ text=text,
100
+ voice_id=elevenlabs_config.voice_id,
101
+ model_id=elevenlabs_config.model_id,
102
+ output_format=elevenlabs_config.output_format,
103
+ )
104
+
105
+ # Ensure the response is an iterator
106
+ if not hasattr(audio_iterator, "__iter__") or not hasattr(audio_iterator, "__next__"):
107
+ logger.error(f"Invalid audio iterator response: {audio_iterator}")
108
+ raise ElevenLabsException("Invalid audio iterator received from ElevenLabs API.")
109
+
110
+ # Combine chunks into a single bytes object
111
+ audio = b"".join(chunk for chunk in audio_iterator)
112
+
113
+ # Validate audio
114
+ if not audio:
115
+ logger.error("No audio data received from ElevenLabs API.")
116
+ raise ElevenLabsException("Empty audio data received from ElevenLabs API.")
117
+
118
+ logger.debug(f"Received binary audio data: {len(audio)} bytes")
119
+ return audio
120
+
121
+ except Exception as e:
122
+ logger.exception(
123
+ f"Error generating text-to-speech with ElevenLabs: {e}. "
124
+ f"Text: {truncate_text(text)}, Voice ID: {elevenlabs_config.voice_id}"
125
+ )
126
+ raise ElevenLabsException(
127
+ message=f"Failed to generate audio with ElevenLabs: {e}",
128
+ original_exception=e,
129
+ )
src/integrations/hume_api.py CHANGED
@@ -19,8 +19,8 @@ Functions:
19
  """
20
 
21
  # Standard Library Imports
22
- import logging
23
  from dataclasses import dataclass
 
24
  from typing import Optional
25
  # Third-Party Library Imports
26
  import requests
 
19
  """
20
 
21
  # Standard Library Imports
 
22
  from dataclasses import dataclass
23
+ import logging
24
  from typing import Optional
25
  # Third-Party Library Imports
26
  import requests