Spaces:
Paused
Paused
| """ | |
| ElevenLabs TTS Implementation | |
| """ | |
| import httpx | |
| from typing import Optional, Dict | |
| from tts_interface import TTSInterface | |
| from logger import log_info, log_error, log_debug, log_warning | |
| class ElevenLabsTTS(TTSInterface): | |
| """ElevenLabs TTS implementation""" | |
| def __init__(self, api_key: str): | |
| super().__init__() | |
| self.api_key = api_key.strip() | |
| self.base_url = "https://api.elevenlabs.io/v1" | |
| self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia | |
| # ElevenLabs preprocessing needs | |
| self.preprocessing_flags = { | |
| "PREPROCESS_NUMBERS", # Large numbers | |
| "PREPROCESS_CURRENCY", # Currency amounts | |
| "PREPROCESS_TIME", # Time format | |
| "PREPROCESS_CODES", # PNR/codes | |
| "PREPROCESS_PHONE" # Phone numbers | |
| } | |
| # Debug log | |
| masked_key = f"{api_key[:4]}...{api_key[-4:]}" if len(api_key) > 8 else "***" | |
| log_debug(f"π ElevenLabsTTS initialized with key: {masked_key}") | |
| async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes: | |
| """Convert text to speech using ElevenLabs API""" | |
| try: | |
| voice = voice_id or self.default_voice_id | |
| url = f"{self.base_url}/text-to-speech/{voice}" | |
| headers = { | |
| "xi-api-key": self.api_key, | |
| "Content-Type": "application/json" | |
| } | |
| # Default parameters | |
| data = { | |
| "text": text, | |
| "model_id": kwargs.get("model_id", "eleven_multilingual_v2"), | |
| "voice_settings": kwargs.get("voice_settings", { | |
| "stability": 1, | |
| "similarity_boost": 0.85, | |
| "style": 0.7, | |
| "speed": 1.14, | |
| "use_speaker_boost": True | |
| }) | |
| } | |
| # Add optional parameters | |
| if "output_format" in kwargs: | |
| params = {"output_format": kwargs["output_format"]} | |
| else: | |
| params = {"output_format": "mp3_44100_128"} | |
| log_debug(f"π€ Calling ElevenLabs TTS for {len(text)} characters") | |
| async with httpx.AsyncClient(timeout=30.0) as client: | |
| response = await client.post( | |
| url, | |
| headers=headers, | |
| json=data, | |
| params=params | |
| ) | |
| response.raise_for_status() | |
| audio_data = response.content # This should be bytes | |
| # Ensure we're returning bytes | |
| if isinstance(audio_data, str): | |
| log_warning("ElevenLabs returned string instead of bytes") | |
| # Try to decode if it's base64 | |
| try: | |
| audio_data = base64.b64decode(audio_data) | |
| except: | |
| pass | |
| log_debug(f"β ElevenLabs TTS returned {len(audio_data)} bytes") | |
| log_debug(f"Audio data type: {type(audio_data)}") | |
| return audio_data | |
| except httpx.HTTPStatusError as e: | |
| log_error(f"β ElevenLabs API error: {e.response.status_code} - {e.response.text}") | |
| raise | |
| except Exception as e: | |
| log_error("β TTS synthesis error", e) | |
| raise | |
| def get_supported_voices(self) -> Dict[str, str]: | |
| """Get default voices - full list can be fetched from API""" | |
| return { | |
| "2thYbn2sOGtiTwd9QwWH": "Avencia (Female - Turkish)", | |
| "21m00Tcm4TlvDq8ikWAM": "Rachel (Female)", | |
| "EXAVITQu4vr4xnSDxMaL": "Bella (Female)", | |
| "ErXwobaYiN019PkySvjV": "Antoni (Male)", | |
| "VR6AewLTigWG4xSOukaG": "Arnold (Male)", | |
| "pNInz6obpgDQGcFmaJgB": "Adam (Male)", | |
| "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)", | |
| } | |
| def get_provider_name(self) -> str: | |
| """Get provider name""" | |
| return "elevenlabs" |