Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
b5b5082
1
Parent(s):
7890f41
first attempt
Browse files- app.py +37 -4
- neutts-air +1 -0
- packages.txt +1 -0
- requirements.txt +19 -0
app.py
CHANGED
|
@@ -1,7 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
sys.path.append("neutts-air")
|
| 3 |
+
from neuttsair.neutts import NeuTTSAir
|
| 4 |
import gradio as gr
|
| 5 |
+
import spaces
|
| 6 |
|
| 7 |
+
# load model
|
| 8 |
+
tts = NeuTTSAir(
|
| 9 |
+
backbone_repo=backbone,
|
| 10 |
+
backbone_device="gpu",
|
| 11 |
+
codec_repo="neuphonic/neucodec",
|
| 12 |
+
codec_device="gpu"
|
| 13 |
+
)
|
| 14 |
|
| 15 |
+
@spaces.GPU()
|
| 16 |
+
def infer(ref_text, ref_audio_path, gen_text):
|
| 17 |
+
|
| 18 |
+
gr.Info("Starting inference request!")
|
| 19 |
+
gr.Info("Encoding reference...")
|
| 20 |
+
ref_codes = tts.encode_reference(ref_audio_path)
|
| 21 |
+
|
| 22 |
+
gr.Info(f"Generating audio for input text: {input_text}")
|
| 23 |
+
wav = tts.infer(input_text, ref_codes, ref_text)
|
| 24 |
+
|
| 25 |
+
return (24_000, wav)
|
| 26 |
+
|
| 27 |
+
demo = gr.Interface(
|
| 28 |
+
fn=infer,
|
| 29 |
+
inputs=[
|
| 30 |
+
gr.Textbox(label="Reference Text"),
|
| 31 |
+
gr.Audio(source="upload", type="filepath", label="Reference Audio"),
|
| 32 |
+
gr.Textbox(label="Text to Generate"),
|
| 33 |
+
],
|
| 34 |
+
outputs=gr.Audio(type="numpy", label="Generated Speech"),
|
| 35 |
+
title="NeuTTS-Air",
|
| 36 |
+
description="Upload a reference audio sample, provide the reference text, and enter new text to synthesize."
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
if __name__ == "__main__":
|
| 40 |
+
demo.launch()
|
neutts-air
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Subproject commit ededc7d354b05cb6d245c2a8563e04c5f8ac12a2
|
packages.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
espeak
|
requirements.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
datasets==4.0.0
|
| 2 |
+
librosa==0.11.0
|
| 3 |
+
neucodec>=0.0.3
|
| 4 |
+
numpy==2.2.6
|
| 5 |
+
pandas==2.3.2
|
| 6 |
+
phonemizer==3.3.0
|
| 7 |
+
requests==2.32.5
|
| 8 |
+
scipy>=1.15
|
| 9 |
+
soundfile==0.13.1
|
| 10 |
+
torch==2.8.0
|
| 11 |
+
torchao==0.13.0
|
| 12 |
+
torchaudio==2.8.0
|
| 13 |
+
torchtune==0.6.1
|
| 14 |
+
tqdm==4.67.1
|
| 15 |
+
transformers==4.56.1
|
| 16 |
+
vector-quantize-pytorch==1.17.8
|
| 17 |
+
resemble-perth==1.0.1
|
| 18 |
+
accelerate==1.10.1
|
| 19 |
+
gradio
|