xlr8 commited on
Commit
1cb8653
·
1 Parent(s): 1278f47

update default text

Browse files
Files changed (1) hide show
  1. app.py +12 -6
app.py CHANGED
@@ -11,7 +11,7 @@ CONTEXT_WINDOW = 3
11
  FADE_MS = 200
12
  PAUSE_MS = 300
13
 
14
- generator = load_csm_1b(device="cuda") # XXX FIXME
15
 
16
  def make_silence(duration_ms=300):
17
  num_samples = int((SAMPLE_RATE * duration_ms) / 1000)
@@ -25,7 +25,7 @@ def fade(audio_np, fade_duration_ms=200):
25
  audio_np[-fade_len:] *= fade_out
26
  return audio_np
27
 
28
- @spaces.GPU(duration=90)
29
  def infer(input_text, temp, top_k, top_p):
30
  lines = [line.strip() for line in input_text.strip().split("\n") if line.strip()]
31
  all_audio = []
@@ -45,7 +45,7 @@ def infer(input_text, temp, top_k, top_p):
45
  max_audio_length_ms=8000,
46
  temperature=temp,
47
  topk=top_k,
48
- #top_p=top_p,
49
  )
50
 
51
  audio_np = audio.cpu().numpy()
@@ -61,20 +61,26 @@ def infer(input_text, temp, top_k, top_p):
61
  audio_int16 = (full_audio * 32768).astype(np.int16)
62
  return SAMPLE_RATE, audio_int16
63
 
 
 
 
 
64
 
65
  with gr.Blocks() as app:
66
  gr.Markdown("""
67
  # 🐸 Tahm Kench Voice Synth
68
  Enter lines of dialogue for Tahm Kench.
69
- - Use `__PAUSE__` on a line to insert 300ms silence.
 
 
70
  - Use shorter sentences and conservative sampling parameters to avoid hallucinations and degenerate output.
71
  """)
72
  with gr.Row():
73
- input_text = gr.TextArea(lines=10, label="Input (multi-line)")
74
  with gr.Row():
75
  temp = gr.Slider(0.1, 1.5, value=0.3, step=0.05, label="Temperature")
76
  top_k = gr.Slider(1, 100, value=10, step=1, label="Top-K")
77
- top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
78
  output_audio = gr.Audio(label="Generated Audio", type="numpy")
79
  run_button = gr.Button("🎤 Synthesize")
80
 
 
11
  FADE_MS = 200
12
  PAUSE_MS = 300
13
 
14
+ generator = load_csm_1b(device="cuda")
15
 
16
  def make_silence(duration_ms=300):
17
  num_samples = int((SAMPLE_RATE * duration_ms) / 1000)
 
25
  audio_np[-fade_len:] *= fade_out
26
  return audio_np
27
 
28
+ @spaces.GPU(duration=180)
29
  def infer(input_text, temp, top_k, top_p):
30
  lines = [line.strip() for line in input_text.strip().split("\n") if line.strip()]
31
  all_audio = []
 
45
  max_audio_length_ms=8000,
46
  temperature=temp,
47
  topk=top_k,
48
+ # top_p is not currently used
49
  )
50
 
51
  audio_np = audio.cpu().numpy()
 
61
  audio_int16 = (full_audio * 32768).astype(np.int16)
62
  return SAMPLE_RATE, audio_int16
63
 
64
+ # Default script with dramatic flair
65
+ DEFAULT_TEXT = """I hunger...
66
+ __PAUSE__
67
+ For love!"""
68
 
69
  with gr.Blocks() as app:
70
  gr.Markdown("""
71
  # 🐸 Tahm Kench Voice Synth
72
  Enter lines of dialogue for Tahm Kench.
73
+
74
+ - ⚠️ **Note:** This model may become confused by apostrophes (`'`) — avoid them if possible.
75
+ - Use `__PAUSE__` on a line to insert a 300ms silent break.
76
  - Use shorter sentences and conservative sampling parameters to avoid hallucinations and degenerate output.
77
  """)
78
  with gr.Row():
79
+ input_text = gr.TextArea(lines=10, label="Input (multi-line)", value=DEFAULT_TEXT)
80
  with gr.Row():
81
  temp = gr.Slider(0.1, 1.5, value=0.3, step=0.05, label="Temperature")
82
  top_k = gr.Slider(1, 100, value=10, step=1, label="Top-K")
83
+ top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P (not used)")
84
  output_audio = gr.Audio(label="Generated Audio", type="numpy")
85
  run_button = gr.Button("🎤 Synthesize")
86