ciyidogan commited on
Commit
c598435
Β·
verified Β·
1 Parent(s): d6da344

Create tts_google.py

Browse files
Files changed (1) hide show
  1. tts_google.py +64 -0
tts_google.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tts_google.py
2
+ from google.cloud import texttospeech
3
+ from ssml_converter import SSMLConverter
4
+
5
+ class GoogleCloudTTS(TTSInterface):
6
+ """Google Cloud Text-to-Speech implementation"""
7
+
8
+ def __init__(self, credentials_path: str):
9
+ super().__init__()
10
+ self.supports_ssml = True
11
+ self.credentials_path = credentials_path
12
+
13
+ # Google TTS doesn't need preprocessing with SSML
14
+ self.preprocessing_flags = set()
15
+
16
+ # Initialize client
17
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_path
18
+ self.client = texttospeech.TextToSpeechClient()
19
+
20
+ # SSML converter
21
+ self.ssml_converter = SSMLConverter(language="tr-TR")
22
+
23
+ async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
24
+ """Convert text to speech using Google Cloud TTS"""
25
+ try:
26
+ # Check if SSML should be used
27
+ use_ssml = kwargs.get("use_ssml", True)
28
+
29
+ if use_ssml and not text.startswith("<speak>"):
30
+ # Convert to SSML
31
+ text = self.ssml_converter.convert_to_ssml(text)
32
+ log(f"πŸ“ Converted to SSML: {text[:200]}...")
33
+ input_text = texttospeech.SynthesisInput(ssml=text)
34
+ else:
35
+ input_text = texttospeech.SynthesisInput(text=text)
36
+
37
+ # Voice selection
38
+ voice = texttospeech.VoiceSelectionParams(
39
+ language_code=kwargs.get("language_code", "tr-TR"),
40
+ name=voice_id or "tr-TR-Wavenet-B",
41
+ ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
42
+ )
43
+
44
+ # Audio config
45
+ audio_config = texttospeech.AudioConfig(
46
+ audio_encoding=texttospeech.AudioEncoding.MP3,
47
+ speaking_rate=kwargs.get("speaking_rate", 1.0),
48
+ pitch=kwargs.get("pitch", 0.0),
49
+ volume_gain_db=kwargs.get("volume_gain_db", 0.0)
50
+ )
51
+
52
+ # Perform synthesis
53
+ response = self.client.synthesize_speech(
54
+ input=input_text,
55
+ voice=voice,
56
+ audio_config=audio_config
57
+ )
58
+
59
+ log(f"βœ… Google TTS returned {len(response.audio_content)} bytes")
60
+ return response.audio_content
61
+
62
+ except Exception as e:
63
+ log(f"❌ Google TTS error: {e}")
64
+ raise