Spaces:

DroolingPanda
/

teachingAssistant

Running

App Files Files Community

Michael Hu commited on Apr 28

Commit

22bd0b9

1 Parent(s): 5a72681

use kokoro fastAPI server to generate voice

Browse files

Files changed (1) hide show

utils/tts.py +34 -3

utils/tts.py CHANGED Viewed

@@ -9,6 +9,7 @@ logger = logging.getLogger(__name__)
 # Flag to track TTS engine availability
 KOKORO_AVAILABLE = False
 DIA_AVAILABLE = False
 # Try to import Kokoro first
@@ -25,7 +26,9 @@ except AttributeError as e:
         result = client.predict(
                 api_name="/lambda"
         )
-        print(f"result get back from Kokora FastAPI server: {result}")
     else:
         # Re-raise if it's a different error
         logger.error(f"Kokoro import failed with unexpected error: {str(e)}")
@@ -97,14 +100,32 @@ class TTSEngine:
                 logger.error(f"Failed to initialize Kokoro pipeline: {str(kokoro_err)}")
                 logger.error(f"Error type: {type(kokoro_err).__name__}")
                 logger.info("Will try to fall back to Dia TTS engine")
-                # Fall through to try Dia
         # Try Dia if Kokoro is not available or failed to initialize
         if self.engine_type is None and DIA_AVAILABLE:
             logger.info("Using Dia as fallback TTS engine")
             # For Dia, we don't need to initialize anything here
             # The model will be lazy-loaded when needed
             self.pipeline = None
             self.engine_type = "dia"
             logger.info("TTS engine initialized with Dia (lazy loading)")
@@ -113,6 +134,7 @@ class TTSEngine:
             logger.warning("Using dummy TTS implementation as no TTS engines are available")
             logger.warning("Check logs above for specific errors that prevented Kokoro or Dia initialization")
             self.pipeline = None
             self.engine_type = "dummy"
     def generate_speech(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> str:
@@ -145,6 +167,15 @@ class TTSEngine:
                     logger.info(f"Saving Kokoro audio to {output_path}")
                     sf.write(output_path, audio, 24000)
                     break
             elif self.engine_type == "dia":
                 # Use Dia for TTS generation
                 try:

 # Flag to track TTS engine availability
 KOKORO_AVAILABLE = False
+KOKORO_SPACE_AVAILABLE = False
 DIA_AVAILABLE = False
 # Try to import Kokoro first
         result = client.predict(
                 api_name="/lambda"
         )
+        logger.debug(f"result get back from Kokora FastAPI server: {result}")
+        if result:
+            KOKORO_SPACE_AVAILABLE = True
     else:
         # Re-raise if it's a different error
         logger.error(f"Kokoro import failed with unexpected error: {str(e)}")
                 logger.error(f"Failed to initialize Kokoro pipeline: {str(kokoro_err)}")
                 logger.error(f"Error type: {type(kokoro_err).__name__}")
                 logger.info("Will try to fall back to Dia TTS engine")
+        if KOKORO_SPACE_AVAILABLE:
+            logger.info(f"Using Kokoro FastAPI server as primary TTS engine with language code: {lang_code}")
+            try:
+                self.client = Client("Remsky/Kokoro-TTS-Zero")
+                self.engine_type = "kokoro_space"
+                logger.info("TTS engine successfully initialized with Kokoro FastAPI server")
+                result = client.predict(
+                        text="The studio was filled with the rich odour of roses, and when the light",
+                        voice_names=None,
+                        speed=1,
+                        api_name="/generate_speech_from_ui"
+                )
+                logger.info(result)
+            except Exception as kokoro_err:
+                logger.error(f"Failed to initialize Kokoro pipeline: {str(kokoro_err)}")
+                logger.error(f"Error type: {type(kokoro_err).__name__}")
+                logger.info("Will try to fall back to Dia TTS engine")
         # Try Dia if Kokoro is not available or failed to initialize
         if self.engine_type is None and DIA_AVAILABLE:
             logger.info("Using Dia as fallback TTS engine")
             # For Dia, we don't need to initialize anything here
             # The model will be lazy-loaded when needed
             self.pipeline = None
+            self.client = None
             self.engine_type = "dia"
             logger.info("TTS engine initialized with Dia (lazy loading)")
             logger.warning("Using dummy TTS implementation as no TTS engines are available")
             logger.warning("Check logs above for specific errors that prevented Kokoro or Dia initialization")
             self.pipeline = None
+            self.client = None
             self.engine_type = "dummy"
     def generate_speech(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> str:
                     logger.info(f"Saving Kokoro audio to {output_path}")
                     sf.write(output_path, audio, 24000)
                     break
+            elif self.engine_type == "kokoro_space":
+                # Use Kokoro FastAPI server for TTS generation
+                logger.info("Generating speech using Kokoro FastAPI server")
+                result = self.client.predict(
+                        text=text,
+                        voice_names=None,
+                        speed=speed,
+                        api_name="/generate_speech_from_ui"
+                )
             elif self.engine_type == "dia":
                 # Use Dia for TTS generation
                 try: