Spaces:
Running
Running
File size: 5,473 Bytes
a375dbf 048c3fc a375dbf f8ddf74 a375dbf f8ddf74 a375dbf f8ddf74 5a007ca a375dbf f8ddf74 a375dbf 681c05f fc85b67 7e7e83a 5a007ca a375dbf 0e508c8 234af57 048c3fc 5a007ca a375dbf 1ed6720 048c3fc 1ed6720 36b195f a375dbf d1ed6b1 fc85b67 0e508c8 a375dbf fc85b67 d1ed6b1 a375dbf 63ef86b a375dbf d1ed6b1 a375dbf 234af57 a375dbf d1ed6b1 a375dbf fc85b67 a375dbf ba3994f a375dbf ba3994f a375dbf 5bf19b3 048c3fc fc85b67 ba3994f a375dbf 63ef86b a375dbf 1ed6720 a375dbf bc5091e 7f25817 5bf19b3 a375dbf 0e508c8 a375dbf 7f25817 a375dbf 0e508c8 d4b2b49 0e508c8 d4b2b49 0e508c8 1ed6720 0e508c8 ba3994f a375dbf fc85b67 048c3fc 63ef86b 2f050a8 a375dbf 7f25817 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
"""
elevenlabs_api.py
This file defines the interaction with the ElevenLabs text-to-speech (TTS) API using the
ElevenLabs Python SDK. It includes functionality for API request handling and processing API responses.
Key Features:
- Encapsulates all logic related to the ElevenLabs TTS API.
- Implements retry logic using Tenacity for handling transient API errors.
- Handles received audio and processes it for playback on the web.
- Provides detailed logging for debugging and error tracking.
- Utilizes robust error handling (EAFP) to validate API responses.
Classes:
- ElevenLabsConfig: Immutable configuration for interacting with ElevenLabs' TTS API.
- ElevenLabsError: Custom exception for ElevenLabs API-related errors.
Functions:
- text_to_speech_with_elevenlabs: Synthesizes speech from text using ElevenLabs' TTS API.
"""
# Standard Library Imports
import logging
import random
from dataclasses import dataclass, field
from typing import Optional, Tuple
# Third-Party Library Imports
from elevenlabs import ElevenLabs, TextToVoiceCreatePreviewsRequestOutputFormat
from elevenlabs.core import ApiError
from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
# Local Application Imports
from src.config import Config, logger
from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
from src.utils import save_base64_audio_to_file, validate_env_var
@dataclass(frozen=True)
class ElevenLabsConfig:
"""Immutable configuration for interacting with the ElevenLabs TTS API."""
api_key: str = field(init=False)
output_format: TextToVoiceCreatePreviewsRequestOutputFormat = "mp3_44100_128"
def __post_init__(self):
# Validate required attributes.
if not self.output_format:
raise ValueError("ElevenLabs TTS API output format is not set.")
# Compute the API key from the environment.
computed_key = validate_env_var("ELEVENLABS_API_KEY")
object.__setattr__(self, "api_key", computed_key)
@property
def client(self) -> ElevenLabs:
"""
Lazy initialization of the ElevenLabs client.
Returns:
ElevenLabs: Configured client instance.
"""
return ElevenLabs(api_key=self.api_key)
class ElevenLabsError(Exception):
"""Custom exception for errors related to the ElevenLabs TTS API."""
def __init__(self, message: str, original_exception: Optional[Exception] = None):
super().__init__(message)
self.original_exception = original_exception
self.message = message
class UnretryableElevenLabsError(ElevenLabsError):
"""Custom exception for errors related to the ElevenLabs TTS API that should not be retried."""
def __init__(self, message: str, original_exception: Optional[Exception] = None):
super().__init__(message, original_exception)
@retry(
stop=stop_after_attempt(3),
wait=wait_fixed(2),
before=before_log(logger, logging.DEBUG),
after=after_log(logger, logging.DEBUG),
reraise=True,
)
def text_to_speech_with_elevenlabs(
character_description: str, text: str, config: Config
) -> Tuple[None, str]:
"""
Synthesizes text to speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
Args:
character_description (str): The character description used as the voice description.
text (str): The text to be synthesized into speech.
Returns:
Tuple[None, str]: A tuple containing:
- generation_id (None): We do not record the generation ID for ElevenLabs, but return None for uniformity
across TTS integrations.
- file_path (str): The relative file path to the audio file where the synthesized speech was saved.
Raises:
ElevenLabsError: If there is an error communicating with the ElevenLabs API or processing the response.
"""
logger.debug(f"Synthesizing speech with ElevenLabs. Text length: {len(text)} characters.")
elevenlabs_config = config.elevenlabs_config
try:
# Synthesize speech using the ElevenLabs SDK
response = elevenlabs_config.client.text_to_voice.create_previews(
voice_description=character_description,
text=text,
output_format=elevenlabs_config.output_format,
)
previews = response.previews
if not previews:
msg = "No previews returned by ElevenLabs API."
logger.error(msg)
raise ElevenLabsError(message=msg)
# Extract the base64 encoded audio and generated voice ID from the preview
preview = random.choice(previews)
generated_voice_id = preview.generated_voice_id
base64_audio = preview.audio_base_64
filename = f"{generated_voice_id}.mp3"
audio_file_path = save_base64_audio_to_file(base64_audio, filename, config)
# Write audio to file and return the relative path
return None, audio_file_path
except Exception as e:
if (
isinstance(e, ApiError)
and e.status_code is not None
and CLIENT_ERROR_CODE <= e.status_code < SERVER_ERROR_CODE
):
raise UnretryableElevenLabsError(
message=f"{e.body['detail']['message']}",
original_exception=e,
) from e
raise ElevenLabsError(
message=f"{e}",
original_exception=e,
) from e
|