Michael Hu commited on
Commit
22bd0b9
·
1 Parent(s): 5a72681

use kokoro fastAPI server to generate voice

Browse files
Files changed (1) hide show
  1. utils/tts.py +34 -3
utils/tts.py CHANGED
@@ -9,6 +9,7 @@ logger = logging.getLogger(__name__)
9
 
10
  # Flag to track TTS engine availability
11
  KOKORO_AVAILABLE = False
 
12
  DIA_AVAILABLE = False
13
 
14
  # Try to import Kokoro first
@@ -25,7 +26,9 @@ except AttributeError as e:
25
  result = client.predict(
26
  api_name="/lambda"
27
  )
28
- print(f"result get back from Kokora FastAPI server: {result}")
 
 
29
  else:
30
  # Re-raise if it's a different error
31
  logger.error(f"Kokoro import failed with unexpected error: {str(e)}")
@@ -97,14 +100,32 @@ class TTSEngine:
97
  logger.error(f"Failed to initialize Kokoro pipeline: {str(kokoro_err)}")
98
  logger.error(f"Error type: {type(kokoro_err).__name__}")
99
  logger.info("Will try to fall back to Dia TTS engine")
100
- # Fall through to try Dia
101
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  # Try Dia if Kokoro is not available or failed to initialize
103
  if self.engine_type is None and DIA_AVAILABLE:
104
  logger.info("Using Dia as fallback TTS engine")
105
  # For Dia, we don't need to initialize anything here
106
  # The model will be lazy-loaded when needed
107
  self.pipeline = None
 
108
  self.engine_type = "dia"
109
  logger.info("TTS engine initialized with Dia (lazy loading)")
110
 
@@ -113,6 +134,7 @@ class TTSEngine:
113
  logger.warning("Using dummy TTS implementation as no TTS engines are available")
114
  logger.warning("Check logs above for specific errors that prevented Kokoro or Dia initialization")
115
  self.pipeline = None
 
116
  self.engine_type = "dummy"
117
 
118
  def generate_speech(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> str:
@@ -145,6 +167,15 @@ class TTSEngine:
145
  logger.info(f"Saving Kokoro audio to {output_path}")
146
  sf.write(output_path, audio, 24000)
147
  break
 
 
 
 
 
 
 
 
 
148
  elif self.engine_type == "dia":
149
  # Use Dia for TTS generation
150
  try:
 
9
 
10
  # Flag to track TTS engine availability
11
  KOKORO_AVAILABLE = False
12
+ KOKORO_SPACE_AVAILABLE = False
13
  DIA_AVAILABLE = False
14
 
15
  # Try to import Kokoro first
 
26
  result = client.predict(
27
  api_name="/lambda"
28
  )
29
+ logger.debug(f"result get back from Kokora FastAPI server: {result}")
30
+ if result:
31
+ KOKORO_SPACE_AVAILABLE = True
32
  else:
33
  # Re-raise if it's a different error
34
  logger.error(f"Kokoro import failed with unexpected error: {str(e)}")
 
100
  logger.error(f"Failed to initialize Kokoro pipeline: {str(kokoro_err)}")
101
  logger.error(f"Error type: {type(kokoro_err).__name__}")
102
  logger.info("Will try to fall back to Dia TTS engine")
103
+
104
+ if KOKORO_SPACE_AVAILABLE:
105
+ logger.info(f"Using Kokoro FastAPI server as primary TTS engine with language code: {lang_code}")
106
+ try:
107
+ self.client = Client("Remsky/Kokoro-TTS-Zero")
108
+ self.engine_type = "kokoro_space"
109
+ logger.info("TTS engine successfully initialized with Kokoro FastAPI server")
110
+ result = client.predict(
111
+ text="The studio was filled with the rich odour of roses, and when the light",
112
+ voice_names=None,
113
+ speed=1,
114
+ api_name="/generate_speech_from_ui"
115
+ )
116
+ logger.info(result)
117
+ except Exception as kokoro_err:
118
+ logger.error(f"Failed to initialize Kokoro pipeline: {str(kokoro_err)}")
119
+ logger.error(f"Error type: {type(kokoro_err).__name__}")
120
+ logger.info("Will try to fall back to Dia TTS engine")
121
+
122
  # Try Dia if Kokoro is not available or failed to initialize
123
  if self.engine_type is None and DIA_AVAILABLE:
124
  logger.info("Using Dia as fallback TTS engine")
125
  # For Dia, we don't need to initialize anything here
126
  # The model will be lazy-loaded when needed
127
  self.pipeline = None
128
+ self.client = None
129
  self.engine_type = "dia"
130
  logger.info("TTS engine initialized with Dia (lazy loading)")
131
 
 
134
  logger.warning("Using dummy TTS implementation as no TTS engines are available")
135
  logger.warning("Check logs above for specific errors that prevented Kokoro or Dia initialization")
136
  self.pipeline = None
137
+ self.client = None
138
  self.engine_type = "dummy"
139
 
140
  def generate_speech(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> str:
 
167
  logger.info(f"Saving Kokoro audio to {output_path}")
168
  sf.write(output_path, audio, 24000)
169
  break
170
+ elif self.engine_type == "kokoro_space":
171
+ # Use Kokoro FastAPI server for TTS generation
172
+ logger.info("Generating speech using Kokoro FastAPI server")
173
+ result = self.client.predict(
174
+ text=text,
175
+ voice_names=None,
176
+ speed=speed,
177
+ api_name="/generate_speech_from_ui"
178
+ )
179
  elif self.engine_type == "dia":
180
  # Use Dia for TTS generation
181
  try: