m-ric HF Staff commited on
Commit
4d88a72
·
1 Parent(s): 7746966

Working interface kokoro

Browse files
Files changed (1) hide show
  1. app.py +64 -80
app.py CHANGED
@@ -6,11 +6,11 @@ import io
6
  import soundfile as sf
7
  import gradio as gr
8
  import numpy as np
 
 
9
  import torch
10
- from transformers import set_seed
11
  from huggingface_hub import InferenceClient
12
  from kokoro import KModel, KPipeline
13
-
14
  # -----------------------------------------------------------------------------
15
  # Get podcast subject
16
  # -----------------------------------------------------------------------------
@@ -33,17 +33,25 @@ client = InferenceClient(
33
  )
34
 
35
 
36
- def generate_podcast_text(subject: str) -> str:
37
  """Ask the LLM for a script of a podcast given by two hosts."""
38
- response = client.chat_completion(
39
- [
40
- {"role": "system", "content": SYSTEM_PROMPT},
41
- {"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.
42
  {subject[:1000]}"""},
43
- ],
 
 
 
 
 
44
  max_tokens=8156,
45
  )
46
- return response.choices[0].message.content
 
 
 
 
47
 
48
  # -----------------------------------------------------------------------------
49
  # Kokoro TTS
@@ -64,22 +72,19 @@ for v in (MALE_VOICE, FEMALE_VOICE):
64
  # Audio generation system with queue
65
  # -----------------------------------------------------------------------------
66
 
67
- audio_queue: queue.Queue[tuple[int, np.ndarray] | None] = queue.Queue()
68
- stop_signal = threading.Event()
69
-
70
  @spaces.GPU
71
- def process_audio_chunks(podcast_text: str, speed: float = 1.0) -> None:
72
- """Read each line, pick voice via tag, send chunks to the queue."""
73
  lines = [l for l in podcast_text.strip().splitlines() if l.strip()]
74
 
75
  pipeline = kpipeline
76
  pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
77
  pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
78
 
79
- for line in lines:
80
- if stop_signal.is_set():
81
- break
82
 
 
83
  # Expect "[S1] ..." or "[S2] ..."
84
  if line.startswith("[MIKE]"):
85
  pipeline_voice = pipeline_voice_male
@@ -94,70 +99,49 @@ def process_audio_chunks(podcast_text: str, speed: float = 1.0) -> None:
94
  voice = FEMALE_VOICE
95
  utterance = line
96
 
97
- first = True
98
  for _, ps, _ in pipeline(utterance, voice, speed):
 
99
  ref_s = pipeline_voice[len(ps) - 1]
100
- audio = kmodel(ps, ref_s, speed)
101
- audio_queue.put((24000, audio.numpy()))
102
- audio_numpy = audio.numpy()
103
- if first:
104
- first = False
105
- audio_queue.put((24000, torch.zeros(1).numpy()))
106
- audio_queue.put(None) # Signal end of stream
107
-
108
-
109
- def stream_audio_generator(podcast_text: str):
110
- stop_signal.clear()
111
- threading.Thread(target=process_audio_chunks, args=(podcast_text,)).start()
112
-
113
- while True:
114
- chunk = audio_queue.get()
115
- if chunk is None:
116
- break
117
- sr, data = chunk
118
-
119
- buf = io.BytesIO()
120
- sf.write(buf, data, sr, format="wav")
121
- buf.seek(0)
122
- yield buf.getvalue(), "Generating podcast..."
123
-
124
-
125
- def stop_generation():
126
- stop_signal.set()
127
- return "Generation stopped"
128
-
129
-
130
- def generate_podcast():
131
- return generate_podcast_text(PODCAST_SUBJECT)
132
-
133
-
134
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
135
- gr.Markdown("# NotebookLM Podcast Generator")
136
-
137
- with gr.Row():
138
- with gr.Column(scale=2):
139
- # gr.Markdown(f"## Current Topic: {PODCAST_SUBJECT}")
140
- gr.Markdown(
141
- "This app generates a podcast discussion between two hosts about the specified topic."
142
- )
143
-
144
- generate_btn = gr.Button("Generate Podcast Script", variant="primary")
145
- podcast_output = gr.Textbox(label="Generated Podcast Script", lines=15)
146
-
147
- gr.Markdown("## Audio Preview")
148
- gr.Markdown("Click below to hear the podcast with realistic voices:")
149
-
150
- with gr.Row():
151
- start_audio_btn = gr.Button("▶️ Generate Podcast", variant="secondary")
152
- stop_btn = gr.Button("⏹️ Stop", variant="stop")
153
-
154
- audio_output = gr.Audio(label="Podcast Audio", streaming=True)
155
- status_text = gr.Textbox(label="Status", visible=True)
156
-
157
- generate_btn.click(fn=generate_podcast, outputs=podcast_output)
158
-
159
- start_audio_btn.click(fn=stream_audio_generator, inputs=podcast_output, outputs=[audio_output, status_text])
160
- stop_btn.click(fn=stop_generation, outputs=status_text)
161
 
162
  if __name__ == "__main__":
163
- demo.queue().launch()
 
6
  import soundfile as sf
7
  import gradio as gr
8
  import numpy as np
9
+ import time
10
+
11
  import torch
 
12
  from huggingface_hub import InferenceClient
13
  from kokoro import KModel, KPipeline
 
14
  # -----------------------------------------------------------------------------
15
  # Get podcast subject
16
  # -----------------------------------------------------------------------------
 
33
  )
34
 
35
 
36
+ def generate_podcast_text(subject: str, steering_question: str | None = None) -> str:
37
  """Ask the LLM for a script of a podcast given by two hosts."""
38
+ messages = [
39
+ {"role": "system", "content": SYSTEM_PROMPT},
40
+ {"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.
 
41
  {subject[:1000]}"""},
42
+ ]
43
+ if steering_question and len(steering_question) > 0:
44
+ messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
45
+
46
+ response = client.chat_completion(
47
+ messages,
48
  max_tokens=8156,
49
  )
50
+ full_text = response.choices[0].message.content
51
+ assert "[JANE]" in full_text
52
+ dialogue_start_index = full_text.find("[JANE]")
53
+ podcast_text = full_text[dialogue_start_index:]
54
+ return podcast_text
55
 
56
  # -----------------------------------------------------------------------------
57
  # Kokoro TTS
 
72
  # Audio generation system with queue
73
  # -----------------------------------------------------------------------------
74
 
 
 
 
75
  @spaces.GPU
76
+ def generate_podcast(pdf, url, topic):
77
+ podcast_text = generate_podcast_text(PODCAST_SUBJECT, topic)
78
  lines = [l for l in podcast_text.strip().splitlines() if l.strip()]
79
 
80
  pipeline = kpipeline
81
  pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
82
  pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
83
 
84
+ speed = 1.
85
+ sr = 24000
 
86
 
87
+ for line in lines:
88
  # Expect "[S1] ..." or "[S2] ..."
89
  if line.startswith("[MIKE]"):
90
  pipeline_voice = pipeline_voice_male
 
99
  voice = FEMALE_VOICE
100
  utterance = line
101
 
 
102
  for _, ps, _ in pipeline(utterance, voice, speed):
103
+ t0 = time.time()
104
  ref_s = pipeline_voice[len(ps) - 1]
105
+ audio_numpy = kmodel(ps, ref_s, speed).numpy()
106
+ yield (sr, audio_numpy)
107
+ t1 = time.time()
108
+ print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")
109
+
110
+ demo = gr.Interface(
111
+ title="Open NotebookLM",
112
+ description=f"""Generates a podcast discussion between two hosts about the materials of your choice. Based on [Kokoro](https://huggingface.co/hexgrad/Kokoro-82M), and uses elements from a NotebookLM app by [Gabriel Chua](https://huggingface.co/spaces/gabrielchua/open-notebooklm).
113
+
114
+ If you do not specify any source materials below, the podcast will be about the top trending [Daily paper](https://huggingface.co/papers/), '**{list(top_papers.keys())[0]}**'""",
115
+ fn=generate_podcast,
116
+ inputs=[
117
+ gr.File(
118
+ label="Optional - Upload a pdf",
119
+ file_types=[".pdf"],
120
+ file_count="single",
121
+ ),
122
+ gr.Textbox(
123
+ label="Optional - Type a URL to read its page",
124
+ ),
125
+ gr.Textbox(label="Do you have a more specific topic or question on the materials?"),
126
+ # gr.Dropdown(
127
+ # label=UI_INPUTS["length"]["label"],
128
+ # choices=UI_INPUTS["length"]["choices"],
129
+ # value=UI_INPUTS["length"]["value"],
130
+ # ),
131
+ ],
132
+ outputs=[
133
+ gr.Audio(
134
+ label="Listen to your podcast",
135
+ format="wav",
136
+ streaming=True,
137
+ ),
138
+ # gr.Markdown(label=UI_OUTPUTS["transcript"]["label"]),
139
+ ],
140
+ theme=gr.themes.Soft(),
141
+ submit_btn="Generate podcast 🎙️",
142
+ # examples=UI_EXAMPLES,
143
+ # cache_examples=UI_CACHE_EXAMPLES,
144
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  if __name__ == "__main__":
147
+ demo.launch()