m-ric HF Staff commited on
Commit
8569025
·
1 Parent(s): 4af8987

This works well

Browse files
Files changed (1) hide show
  1. app.py +24 -25
app.py CHANGED
@@ -25,8 +25,26 @@ client = InferenceClient(
25
  token=os.getenv("HF_TOKEN"),
26
  )
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  # -----------------------------------------------------------------------------
29
- # Kokoro TTS setup (replaces Dia)
30
  # -----------------------------------------------------------------------------
31
  CUDA_AVAILABLE = torch.cuda.is_available()
32
 
@@ -40,29 +58,13 @@ FEMALE_VOICE = "af_heart" # [S2]
40
  for v in (MALE_VOICE, FEMALE_VOICE):
41
  kpipeline.load_voice(v)
42
 
 
 
 
43
 
44
  audio_queue: queue.Queue[tuple[int, np.ndarray] | None] = queue.Queue()
45
  stop_signal = threading.Event()
46
 
47
-
48
-
49
- def generate_podcast_text(subject: str) -> str:
50
- """Ask the LLM for a ~5‑minute two‑host script."""
51
- prompt = f"""Generate a podcast told by 2 hosts about {subject}.
52
- The podcast should be an insightful discussion, with some amount of playful banter.
53
- Separate dialog as follows using [S1] for the male host and [S2] for the female host, for instance:
54
- [S1] Hello, how are you?
55
- [S2] I'm good, thank you. How are you?
56
- [S1] I'm good, thank you. (laughs)
57
- [S2] Great.
58
- Now go on, make 5 minutes of podcast.
59
- """
60
- response = client.chat_completion(
61
- [{"role": "user", "content": prompt}],
62
- max_tokens=1000,
63
- )
64
- return response.choices[0].message.content
65
-
66
  @spaces.GPU
67
  def process_audio_chunks(podcast_text: str, speed: float = 1.0) -> None:
68
  """Read each line, pick voice via tag, send chunks to the queue."""
@@ -96,7 +98,6 @@ def process_audio_chunks(podcast_text: str, speed: float = 1.0) -> None:
96
  audio = kmodel(ps, ref_s, speed)
97
  audio_queue.put((24000, audio.numpy()))
98
  audio_numpy = audio.numpy()
99
- print("GENERATED AUDIO", audio_numpy[-100:], audio_numpy.max())
100
  if first:
101
  first = False
102
  audio_queue.put((24000, torch.zeros(1).numpy()))
@@ -111,14 +112,12 @@ def stream_audio_generator(podcast_text: str):
111
  chunk = audio_queue.get()
112
  if chunk is None:
113
  break
114
- print("CHUNK", chunk, type(chunk))
115
  sr, data = chunk
116
 
117
  buf = io.BytesIO()
118
  sf.write(buf, data, sr, format="wav")
119
  buf.seek(0)
120
- yield buf.getvalue()
121
-
122
 
123
 
124
  def stop_generation():
@@ -155,7 +154,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
155
 
156
  generate_btn.click(fn=generate_podcast, outputs=podcast_output)
157
 
158
- start_audio_btn.click(fn=stream_audio_generator, inputs=podcast_output, outputs=audio_output)
159
  stop_btn.click(fn=stop_generation, outputs=status_text)
160
 
161
  if __name__ == "__main__":
 
25
  token=os.getenv("HF_TOKEN"),
26
  )
27
 
28
+
29
+ def generate_podcast_text(subject: str) -> str:
30
+ """Ask the LLM for a script of a podcast given by two hosts."""
31
+ prompt = f"""Generate the script of "Open Paper review", a podcast told by 2 hosts about {subject}.
32
+ The podcast should be an insightful discussion, with some amount of playful banter.
33
+ Separate dialog as follows, using [S1] for the male host and [S2] for the female host, for instance:
34
+ [S1] Hello, how are you?
35
+ [S2] I'm good, thank you. How are you?
36
+ [S1] I'm good, thank you.
37
+ [S2] Great.
38
+ Now go on, make 5 minutes of podcast.
39
+ """
40
+ response = client.chat_completion(
41
+ [{"role": "user", "content": prompt}],
42
+ max_tokens=8156,
43
+ )
44
+ return response.choices[0].message.content
45
+
46
  # -----------------------------------------------------------------------------
47
+ # Kokoro TTS
48
  # -----------------------------------------------------------------------------
49
  CUDA_AVAILABLE = torch.cuda.is_available()
50
 
 
58
  for v in (MALE_VOICE, FEMALE_VOICE):
59
  kpipeline.load_voice(v)
60
 
61
+ # -----------------------------------------------------------------------------
62
+ # Audio generation system with queue
63
+ # -----------------------------------------------------------------------------
64
 
65
  audio_queue: queue.Queue[tuple[int, np.ndarray] | None] = queue.Queue()
66
  stop_signal = threading.Event()
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  @spaces.GPU
69
  def process_audio_chunks(podcast_text: str, speed: float = 1.0) -> None:
70
  """Read each line, pick voice via tag, send chunks to the queue."""
 
98
  audio = kmodel(ps, ref_s, speed)
99
  audio_queue.put((24000, audio.numpy()))
100
  audio_numpy = audio.numpy()
 
101
  if first:
102
  first = False
103
  audio_queue.put((24000, torch.zeros(1).numpy()))
 
112
  chunk = audio_queue.get()
113
  if chunk is None:
114
  break
 
115
  sr, data = chunk
116
 
117
  buf = io.BytesIO()
118
  sf.write(buf, data, sr, format="wav")
119
  buf.seek(0)
120
+ yield buf.getvalue(), "Generating podcast..."
 
121
 
122
 
123
  def stop_generation():
 
154
 
155
  generate_btn.click(fn=generate_podcast, outputs=podcast_output)
156
 
157
+ start_audio_btn.click(fn=stream_audio_generator, inputs=podcast_output, outputs=[audio_output, status_text])
158
  stop_btn.click(fn=stop_generation, outputs=status_text)
159
 
160
  if __name__ == "__main__":