Spaces:
Sleeping
Sleeping
xlr8
commited on
Commit
·
1cb8653
1
Parent(s):
1278f47
update default text
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ CONTEXT_WINDOW = 3
|
|
11 |
FADE_MS = 200
|
12 |
PAUSE_MS = 300
|
13 |
|
14 |
-
generator = load_csm_1b(device="cuda")
|
15 |
|
16 |
def make_silence(duration_ms=300):
|
17 |
num_samples = int((SAMPLE_RATE * duration_ms) / 1000)
|
@@ -25,7 +25,7 @@ def fade(audio_np, fade_duration_ms=200):
|
|
25 |
audio_np[-fade_len:] *= fade_out
|
26 |
return audio_np
|
27 |
|
28 |
-
@spaces.GPU(duration=
|
29 |
def infer(input_text, temp, top_k, top_p):
|
30 |
lines = [line.strip() for line in input_text.strip().split("\n") if line.strip()]
|
31 |
all_audio = []
|
@@ -45,7 +45,7 @@ def infer(input_text, temp, top_k, top_p):
|
|
45 |
max_audio_length_ms=8000,
|
46 |
temperature=temp,
|
47 |
topk=top_k,
|
48 |
-
#top_p
|
49 |
)
|
50 |
|
51 |
audio_np = audio.cpu().numpy()
|
@@ -61,20 +61,26 @@ def infer(input_text, temp, top_k, top_p):
|
|
61 |
audio_int16 = (full_audio * 32768).astype(np.int16)
|
62 |
return SAMPLE_RATE, audio_int16
|
63 |
|
|
|
|
|
|
|
|
|
64 |
|
65 |
with gr.Blocks() as app:
|
66 |
gr.Markdown("""
|
67 |
# 🐸 Tahm Kench Voice Synth
|
68 |
Enter lines of dialogue for Tahm Kench.
|
69 |
-
|
|
|
|
|
70 |
- Use shorter sentences and conservative sampling parameters to avoid hallucinations and degenerate output.
|
71 |
""")
|
72 |
with gr.Row():
|
73 |
-
input_text = gr.TextArea(lines=10, label="Input (multi-line)")
|
74 |
with gr.Row():
|
75 |
temp = gr.Slider(0.1, 1.5, value=0.3, step=0.05, label="Temperature")
|
76 |
top_k = gr.Slider(1, 100, value=10, step=1, label="Top-K")
|
77 |
-
top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
|
78 |
output_audio = gr.Audio(label="Generated Audio", type="numpy")
|
79 |
run_button = gr.Button("🎤 Synthesize")
|
80 |
|
|
|
11 |
FADE_MS = 200
|
12 |
PAUSE_MS = 300
|
13 |
|
14 |
+
generator = load_csm_1b(device="cuda")
|
15 |
|
16 |
def make_silence(duration_ms=300):
|
17 |
num_samples = int((SAMPLE_RATE * duration_ms) / 1000)
|
|
|
25 |
audio_np[-fade_len:] *= fade_out
|
26 |
return audio_np
|
27 |
|
28 |
+
@spaces.GPU(duration=180)
|
29 |
def infer(input_text, temp, top_k, top_p):
|
30 |
lines = [line.strip() for line in input_text.strip().split("\n") if line.strip()]
|
31 |
all_audio = []
|
|
|
45 |
max_audio_length_ms=8000,
|
46 |
temperature=temp,
|
47 |
topk=top_k,
|
48 |
+
# top_p is not currently used
|
49 |
)
|
50 |
|
51 |
audio_np = audio.cpu().numpy()
|
|
|
61 |
audio_int16 = (full_audio * 32768).astype(np.int16)
|
62 |
return SAMPLE_RATE, audio_int16
|
63 |
|
64 |
+
# Default script with dramatic flair
|
65 |
+
DEFAULT_TEXT = """I hunger...
|
66 |
+
__PAUSE__
|
67 |
+
For love!"""
|
68 |
|
69 |
with gr.Blocks() as app:
|
70 |
gr.Markdown("""
|
71 |
# 🐸 Tahm Kench Voice Synth
|
72 |
Enter lines of dialogue for Tahm Kench.
|
73 |
+
|
74 |
+
- ⚠️ **Note:** This model may become confused by apostrophes (`'`) — avoid them if possible.
|
75 |
+
- Use `__PAUSE__` on a line to insert a 300ms silent break.
|
76 |
- Use shorter sentences and conservative sampling parameters to avoid hallucinations and degenerate output.
|
77 |
""")
|
78 |
with gr.Row():
|
79 |
+
input_text = gr.TextArea(lines=10, label="Input (multi-line)", value=DEFAULT_TEXT)
|
80 |
with gr.Row():
|
81 |
temp = gr.Slider(0.1, 1.5, value=0.3, step=0.05, label="Temperature")
|
82 |
top_k = gr.Slider(1, 100, value=10, step=1, label="Top-K")
|
83 |
+
top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P (not used)")
|
84 |
output_audio = gr.Audio(label="Generated Audio", type="numpy")
|
85 |
run_button = gr.Button("🎤 Synthesize")
|
86 |
|