Spaces:
Paused
Paused
| # tts_google.py | |
| from google.cloud import texttospeech | |
| from ssml_converter import SSMLConverter | |
| from logger import log_info, log_error, log_debug, log_warning | |
| class GoogleCloudTTS(TTSInterface): | |
| """Google Cloud Text-to-Speech implementation""" | |
| def __init__(self, credentials_path: str): | |
| super().__init__() | |
| self.supports_ssml = True | |
| self.credentials_path = credentials_path | |
| # Google TTS doesn't need preprocessing with SSML | |
| self.preprocessing_flags = set() | |
| # Initialize client | |
| os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_path | |
| self.client = texttospeech.TextToSpeechClient() | |
| # SSML converter | |
| self.ssml_converter = SSMLConverter(language="tr-TR") | |
| async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes: | |
| """Convert text to speech using Google Cloud TTS""" | |
| try: | |
| # Check if SSML should be used | |
| use_ssml = kwargs.get("use_ssml", True) | |
| if use_ssml and not text.startswith("<speak>"): | |
| # Convert to SSML | |
| text = self.ssml_converter.convert_to_ssml(text) | |
| log_info(f"π Converted to SSML: {text[:200]}...") | |
| input_text = texttospeech.SynthesisInput(ssml=text) | |
| else: | |
| input_text = texttospeech.SynthesisInput(text=text) | |
| # Voice selection | |
| voice = texttospeech.VoiceSelectionParams( | |
| language_code=kwargs.get("language_code", "tr-TR"), | |
| name=voice_id or "tr-TR-Wavenet-B", | |
| ssml_gender=texttospeech.SsmlVoiceGender.FEMALE | |
| ) | |
| # Audio config | |
| audio_config = texttospeech.AudioConfig( | |
| audio_encoding=texttospeech.AudioEncoding.MP3, | |
| speaking_rate=kwargs.get("speaking_rate", 1.0), | |
| pitch=kwargs.get("pitch", 0.0), | |
| volume_gain_db=kwargs.get("volume_gain_db", 0.0) | |
| ) | |
| # Perform synthesis | |
| response = self.client.synthesize_speech( | |
| input=input_text, | |
| voice=voice, | |
| audio_config=audio_config | |
| ) | |
| log_info(f"β Google TTS returned {len(response.audio_content)} bytes") | |
| return response.audio_content | |
| except Exception as e: | |
| log_error("β Google TTS error", e) | |
| raise |