Spaces:

thunnai
/

SparkTTS

Running on Zero

thunnai commited on Feb 28

Commit

9f28641

1 Parent(s): 66f7114

Test zero gpu

Files changed (1) hide show

webui.py CHANGED Viewed

@@ -23,7 +23,7 @@ from datetime import datetime
 from cli.SparkTTS import SparkTTS
 from sparktts.utils.token_parser import LEVELS_MAP_UI
 from huggingface_hub import snapshot_download
 def initialize_model(model_dir=None, device="cpu"):
     """Load the model once at the beginning."""
@@ -37,6 +37,32 @@ def initialize_model(model_dir=None, device="cpu"):
     model = SparkTTS(model_dir, device)
     return model
 def run_tts(
     text,
@@ -64,17 +90,15 @@ def run_tts(
     logging.info("Starting inference...")
     # Perform inference and save the output audio
-    with torch.no_grad():
-        wav = model.inference(
-            text,
             prompt_speech,
             prompt_text,
             gender,
             pitch,
-            speed,
-        )
-        sf.write(save_path, wav, samplerate=16000)
     logging.info(f"Audio saved at: {save_path}")

 from cli.SparkTTS import SparkTTS
 from sparktts.utils.token_parser import LEVELS_MAP_UI
 from huggingface_hub import snapshot_download
+import spaces
 def initialize_model(model_dir=None, device="cpu"):
     """Load the model once at the beginning."""
     model = SparkTTS(model_dir, device)
     return model
+@spaces.gpu
+def generate(model,
+            text,
+            prompt_speech,
+            prompt_text,
+            gender,
+            pitch,
+            speed,
+):
+    """Generate audio from text."""
+    # if gpu available, move model to gpu
+    if torch.cuda.is_available():
+        model = model.to("cuda")
+    with torch.no_grad():
+        wav = model.inference(
+            text,
+            prompt_speech,
+            prompt_text,
+            gender,
+            pitch,
+            speed,
+        )
+    return wav
 def run_tts(
     text,
     logging.info("Starting inference...")
     # Perform inference and save the output audio
+    wav = generate(model, text,
             prompt_speech,
             prompt_text,
             gender,
             pitch,
+            speed,)
+    sf.write(save_path, wav, samplerate=16000)
     logging.info(f"Audio saved at: {save_path}")