File size: 4,749 Bytes
a375dbf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
elevenlabs_api.py

This file defines the interaction with the ElevenLabs TTS API using the ElevenLabs Python SDK.
It includes functionality for API request handling and processing API responses.

Key Features:
- Encapsulates all logic related to the ElevenLabs TTS API.
- Implements retry logic for handling transient API errors.
- Handles received audio and processes it for playback on the web.
- Provides detailed logging for debugging and error tracking.

Classes:
- ElevenLabsException: Custom exception for TTS API-related errors.
- ElevenLabsConfig: Immutable configuration for interacting with the TTS API.

Functions:
- text_to_speech_with_elevenlabs: Converts text to speech using the ElevenLabs TTS API.
"""

# Standard Library Imports
from dataclasses import dataclass
import logging
from typing import Optional
# Third-Party Library Imports
from elevenlabs import ElevenLabs
from tenacity import retry, stop_after_attempt, wait_fixed, before_log, after_log
# Local Application Imports
from src.config import logger
from src.utils import validate_env_var, truncate_text


@dataclass(frozen=True)
class ElevenLabsConfig:
    """Immutable configuration for interacting with the ElevenLabs TTS API."""
    api_key: str = validate_env_var("ELEVENLABS_API_KEY")
    voice_id: str = "pNInz6obpgDQGcFmaJgB" # Adam (popular ElevenLabs pre-made voice)
    model_id: str = "eleven_multilingual_v2" # ElevenLab's most emotionally expressive model
    output_format: str = "mp3_44100_128" # Output format of the generated audio.

    def __post_init__(self):
        # Validate that required attributes are set
        if not self.api_key:
            raise ValueError("ElevenLabs API key is not set.")
        if not self.voice_id:
            raise ValueError("ElevenLabs Voice ID is not set.")
        if not self.model_id:
            raise ValueError("ElevenLabs Model ID is not set.")

    @property
    def client(self) -> ElevenLabs:
        """
        Lazy initialization of the ElevenLabs client.

        Returns:
            ElevenLabs: Configured client instance.
        """
        return ElevenLabs(api_key=self.api_key)


class ElevenLabsException(Exception):
    """Custom exception for errors related to the ElevenLabs TTS API."""
    def __init__(self, message: str, original_exception: Optional[Exception] = None):
        super().__init__(message)
        self.original_exception = original_exception


# Initialize the ElevenLabs client
elevenlabs_config = ElevenLabsConfig()


@retry(
    stop=stop_after_attempt(3),
    wait=wait_fixed(2),
    before=before_log(logger, logging.DEBUG),
    after=after_log(logger, logging.DEBUG),
)
def text_to_speech_with_elevenlabs(text: str) -> bytes:
    """
    Converts text to speech using the ElevenLabs TTS API.

    Args:
        text (str): The text to be converted to speech.

    Returns:
        bytes: The raw binary audio data for playback.

    Raises:
        ElevenLabsException: If there is an error communicating with the ElevenLabs API or processing the response.
    """
    logger.debug(f"Generated text for TTS: {truncate_text(text)}")
    logger.debug(f"Using Voice ID: {elevenlabs_config.voice_id}")
    logger.debug(f"Using Model ID: {elevenlabs_config.model_id}")
    logger.debug(f"Using Output Format: {elevenlabs_config.output_format}")

    try:
        # Generate audio using the ElevenLabs SDK
        audio_iterator = elevenlabs_config.client.text_to_speech.convert(
            text=text,
            voice_id=elevenlabs_config.voice_id,
            model_id=elevenlabs_config.model_id,
            output_format=elevenlabs_config.output_format,
        )

       # Ensure the response is an iterator
        if not hasattr(audio_iterator, "__iter__") or not hasattr(audio_iterator, "__next__"):
            logger.error(f"Invalid audio iterator response: {audio_iterator}")
            raise ElevenLabsException("Invalid audio iterator received from ElevenLabs API.")

        # Combine chunks into a single bytes object
        audio = b"".join(chunk for chunk in audio_iterator)

        # Validate audio
        if not audio:
            logger.error("No audio data received from ElevenLabs API.")
            raise ElevenLabsException("Empty audio data received from ElevenLabs API.")

        logger.debug(f"Received binary audio data: {len(audio)} bytes")
        return audio

    except Exception as e:
        logger.exception(
            f"Error generating text-to-speech with ElevenLabs: {e}. "
            f"Text: {truncate_text(text)}, Voice ID: {elevenlabs_config.voice_id}"
        )
        raise ElevenLabsException(
            message=f"Failed to generate audio with ElevenLabs: {e}",
            original_exception=e,
        )