Spaces:

UcsTurkey
/

flare

Paused

App Files Files Community

flare / tts_elevenlabs.py

ciyidogan

Update tts_elevenlabs.py

387abc2 verified 4 months ago

raw

history blame

4.22 kB

	"""
	ElevenLabs TTS Implementation
	"""
	import httpx
	from typing import Optional, Dict
	from tts_interface import TTSInterface
	from logger import log_info, log_error, log_debug, log_warning

	class ElevenLabsTTS(TTSInterface):
	"""ElevenLabs TTS implementation"""

	def __init__(self, api_key: str):
	super().__init__()
	self.api_key = api_key.strip()
	self.base_url = "https://api.elevenlabs.io/v1"
	self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia

	# ElevenLabs preprocessing needs
	self.preprocessing_flags = {
	"PREPROCESS_NUMBERS", # Large numbers
	"PREPROCESS_CURRENCY", # Currency amounts
	"PREPROCESS_TIME", # Time format
	"PREPROCESS_CODES", # PNR/codes
	"PREPROCESS_PHONE" # Phone numbers
	}

	# Debug log
	masked_key = f"{api_key[:4]}...{api_key[-4:]}" if len(api_key) > 8 else "***"
	log_debug(f"🔑 ElevenLabsTTS initialized with key: {masked_key}")

	async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
	"""Convert text to speech using ElevenLabs API"""
	try:
	voice = voice_id or self.default_voice_id
	url = f"{self.base_url}/text-to-speech/{voice}"

	headers = {
	"xi-api-key": self.api_key,
	"Content-Type": "application/json"
	}

	# Default parameters
	data = {
	"text": text,
	"model_id": kwargs.get("model_id", "eleven_multilingual_v2"),
	"voice_settings": kwargs.get("voice_settings", {
	"stability": 1,
	"similarity_boost": 0.85,
	"style": 0.7,
	"speed": 1.14,
	"use_speaker_boost": True
	})
	}

	# Add optional parameters
	if "output_format" in kwargs:
	params = {"output_format": kwargs["output_format"]}
	else:
	params = {"output_format": "mp3_44100_128"}

	log_debug(f"🎤 Calling ElevenLabs TTS for {len(text)} characters")

	async with httpx.AsyncClient(timeout=30.0) as client:
	response = await client.post(
	url,
	headers=headers,
	json=data,
	params=params
	)

	response.raise_for_status()
	audio_data = response.content # This should be bytes

	# Ensure we're returning bytes
	if isinstance(audio_data, str):
	log_warning("ElevenLabs returned string instead of bytes")
	# Try to decode if it's base64
	try:
	audio_data = base64.b64decode(audio_data)
	except:
	pass

	log_debug(f"✅ ElevenLabs TTS returned {len(audio_data)} bytes")
	log_debug(f"Audio data type: {type(audio_data)}")

	return audio_data

	except httpx.HTTPStatusError as e:
	log_error(f"❌ ElevenLabs API error: {e.response.status_code} - {e.response.text}")
	raise
	except Exception as e:
	log_error("❌ TTS synthesis error", e)
	raise

	def get_supported_voices(self) -> Dict[str, str]:
	"""Get default voices - full list can be fetched from API"""
	return {
	"2thYbn2sOGtiTwd9QwWH": "Avencia (Female - Turkish)",
	"21m00Tcm4TlvDq8ikWAM": "Rachel (Female)",
	"EXAVITQu4vr4xnSDxMaL": "Bella (Female)",
	"ErXwobaYiN019PkySvjV": "Antoni (Male)",
	"VR6AewLTigWG4xSOukaG": "Arnold (Male)",
	"pNInz6obpgDQGcFmaJgB": "Adam (Male)",
	"yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
	}

	def get_provider_name(self) -> str:
	"""Get provider name"""
	return "elevenlabs"