Spaces:
Running
Running
Michael Hu
commited on
Commit
·
22bd0b9
1
Parent(s):
5a72681
use kokoro fastAPI server to generate voice
Browse files- utils/tts.py +34 -3
utils/tts.py
CHANGED
@@ -9,6 +9,7 @@ logger = logging.getLogger(__name__)
|
|
9 |
|
10 |
# Flag to track TTS engine availability
|
11 |
KOKORO_AVAILABLE = False
|
|
|
12 |
DIA_AVAILABLE = False
|
13 |
|
14 |
# Try to import Kokoro first
|
@@ -25,7 +26,9 @@ except AttributeError as e:
|
|
25 |
result = client.predict(
|
26 |
api_name="/lambda"
|
27 |
)
|
28 |
-
|
|
|
|
|
29 |
else:
|
30 |
# Re-raise if it's a different error
|
31 |
logger.error(f"Kokoro import failed with unexpected error: {str(e)}")
|
@@ -97,14 +100,32 @@ class TTSEngine:
|
|
97 |
logger.error(f"Failed to initialize Kokoro pipeline: {str(kokoro_err)}")
|
98 |
logger.error(f"Error type: {type(kokoro_err).__name__}")
|
99 |
logger.info("Will try to fall back to Dia TTS engine")
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
# Try Dia if Kokoro is not available or failed to initialize
|
103 |
if self.engine_type is None and DIA_AVAILABLE:
|
104 |
logger.info("Using Dia as fallback TTS engine")
|
105 |
# For Dia, we don't need to initialize anything here
|
106 |
# The model will be lazy-loaded when needed
|
107 |
self.pipeline = None
|
|
|
108 |
self.engine_type = "dia"
|
109 |
logger.info("TTS engine initialized with Dia (lazy loading)")
|
110 |
|
@@ -113,6 +134,7 @@ class TTSEngine:
|
|
113 |
logger.warning("Using dummy TTS implementation as no TTS engines are available")
|
114 |
logger.warning("Check logs above for specific errors that prevented Kokoro or Dia initialization")
|
115 |
self.pipeline = None
|
|
|
116 |
self.engine_type = "dummy"
|
117 |
|
118 |
def generate_speech(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> str:
|
@@ -145,6 +167,15 @@ class TTSEngine:
|
|
145 |
logger.info(f"Saving Kokoro audio to {output_path}")
|
146 |
sf.write(output_path, audio, 24000)
|
147 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
elif self.engine_type == "dia":
|
149 |
# Use Dia for TTS generation
|
150 |
try:
|
|
|
9 |
|
10 |
# Flag to track TTS engine availability
|
11 |
KOKORO_AVAILABLE = False
|
12 |
+
KOKORO_SPACE_AVAILABLE = False
|
13 |
DIA_AVAILABLE = False
|
14 |
|
15 |
# Try to import Kokoro first
|
|
|
26 |
result = client.predict(
|
27 |
api_name="/lambda"
|
28 |
)
|
29 |
+
logger.debug(f"result get back from Kokora FastAPI server: {result}")
|
30 |
+
if result:
|
31 |
+
KOKORO_SPACE_AVAILABLE = True
|
32 |
else:
|
33 |
# Re-raise if it's a different error
|
34 |
logger.error(f"Kokoro import failed with unexpected error: {str(e)}")
|
|
|
100 |
logger.error(f"Failed to initialize Kokoro pipeline: {str(kokoro_err)}")
|
101 |
logger.error(f"Error type: {type(kokoro_err).__name__}")
|
102 |
logger.info("Will try to fall back to Dia TTS engine")
|
103 |
+
|
104 |
+
if KOKORO_SPACE_AVAILABLE:
|
105 |
+
logger.info(f"Using Kokoro FastAPI server as primary TTS engine with language code: {lang_code}")
|
106 |
+
try:
|
107 |
+
self.client = Client("Remsky/Kokoro-TTS-Zero")
|
108 |
+
self.engine_type = "kokoro_space"
|
109 |
+
logger.info("TTS engine successfully initialized with Kokoro FastAPI server")
|
110 |
+
result = client.predict(
|
111 |
+
text="The studio was filled with the rich odour of roses, and when the light",
|
112 |
+
voice_names=None,
|
113 |
+
speed=1,
|
114 |
+
api_name="/generate_speech_from_ui"
|
115 |
+
)
|
116 |
+
logger.info(result)
|
117 |
+
except Exception as kokoro_err:
|
118 |
+
logger.error(f"Failed to initialize Kokoro pipeline: {str(kokoro_err)}")
|
119 |
+
logger.error(f"Error type: {type(kokoro_err).__name__}")
|
120 |
+
logger.info("Will try to fall back to Dia TTS engine")
|
121 |
+
|
122 |
# Try Dia if Kokoro is not available or failed to initialize
|
123 |
if self.engine_type is None and DIA_AVAILABLE:
|
124 |
logger.info("Using Dia as fallback TTS engine")
|
125 |
# For Dia, we don't need to initialize anything here
|
126 |
# The model will be lazy-loaded when needed
|
127 |
self.pipeline = None
|
128 |
+
self.client = None
|
129 |
self.engine_type = "dia"
|
130 |
logger.info("TTS engine initialized with Dia (lazy loading)")
|
131 |
|
|
|
134 |
logger.warning("Using dummy TTS implementation as no TTS engines are available")
|
135 |
logger.warning("Check logs above for specific errors that prevented Kokoro or Dia initialization")
|
136 |
self.pipeline = None
|
137 |
+
self.client = None
|
138 |
self.engine_type = "dummy"
|
139 |
|
140 |
def generate_speech(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> str:
|
|
|
167 |
logger.info(f"Saving Kokoro audio to {output_path}")
|
168 |
sf.write(output_path, audio, 24000)
|
169 |
break
|
170 |
+
elif self.engine_type == "kokoro_space":
|
171 |
+
# Use Kokoro FastAPI server for TTS generation
|
172 |
+
logger.info("Generating speech using Kokoro FastAPI server")
|
173 |
+
result = self.client.predict(
|
174 |
+
text=text,
|
175 |
+
voice_names=None,
|
176 |
+
speed=speed,
|
177 |
+
api_name="/generate_speech_from_ui"
|
178 |
+
)
|
179 |
elif self.engine_type == "dia":
|
180 |
# Use Dia for TTS generation
|
181 |
try:
|