Spaces:

m-ric
/

open-notebooklm

Running on Zero

App Files Files Community

m-ric HF Staff commited on May 9

Commit

4d88a72

1 Parent(s): 7746966

Working interface kokoro

Browse files

Files changed (1) hide show

app.py +64 -80

app.py CHANGED Viewed

@@ -6,11 +6,11 @@ import io
 import soundfile as sf
 import gradio as gr
 import numpy as np
 import torch
-from transformers import set_seed
 from huggingface_hub import InferenceClient
 from kokoro import KModel, KPipeline
 # -----------------------------------------------------------------------------
 # Get podcast subject
 # -----------------------------------------------------------------------------
@@ -33,17 +33,25 @@ client = InferenceClient(
 )
-def generate_podcast_text(subject: str) -> str:
     """Ask the LLM for a script of a podcast given by two hosts."""
-    response = client.chat_completion(
-        [
-            {"role": "system", "content": SYSTEM_PROMPT},
-            {"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.
 {subject[:1000]}"""},
-        ],
         max_tokens=8156,
     )
-    return response.choices[0].message.content
 # -----------------------------------------------------------------------------
 # Kokoro TTS
@@ -64,22 +72,19 @@ for v in (MALE_VOICE, FEMALE_VOICE):
 # Audio generation system with queue
 # -----------------------------------------------------------------------------
-audio_queue: queue.Queue[tuple[int, np.ndarray] | None] = queue.Queue()
-stop_signal = threading.Event()
 @spaces.GPU
-def process_audio_chunks(podcast_text: str, speed: float = 1.0) -> None:
-    """Read each line, pick voice via tag, send chunks to the queue."""
     lines = [l for l in podcast_text.strip().splitlines() if l.strip()]
     pipeline = kpipeline
     pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
     pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
-    for line in lines:
-        if stop_signal.is_set():
-            break
         # Expect "[S1] ..." or "[S2] ..."
         if line.startswith("[MIKE]"):
             pipeline_voice = pipeline_voice_male
@@ -94,70 +99,49 @@ def process_audio_chunks(podcast_text: str, speed: float = 1.0) -> None:
             voice = FEMALE_VOICE
             utterance = line
-        first = True
         for _, ps, _ in pipeline(utterance, voice, speed):
             ref_s = pipeline_voice[len(ps) - 1]
-            audio = kmodel(ps, ref_s, speed)
-            audio_queue.put((24000, audio.numpy()))
-            audio_numpy = audio.numpy()
-            if first:
-                first = False
-                audio_queue.put((24000, torch.zeros(1).numpy()))
-    audio_queue.put(None)  # Signal end of stream
-def stream_audio_generator(podcast_text: str):
-    stop_signal.clear()
-    threading.Thread(target=process_audio_chunks, args=(podcast_text,)).start()
-    while True:
-        chunk = audio_queue.get()
-        if chunk is None:
-            break
-        sr, data = chunk
-        buf = io.BytesIO()
-        sf.write(buf, data, sr, format="wav")
-        buf.seek(0)
-        yield buf.getvalue(), "Generating podcast..."
-def stop_generation():
-    stop_signal.set()
-    return "Generation stopped"
-def generate_podcast():
-    return generate_podcast_text(PODCAST_SUBJECT)
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# NotebookLM Podcast Generator")
-    with gr.Row():
-        with gr.Column(scale=2):
-            # gr.Markdown(f"## Current Topic: {PODCAST_SUBJECT}")
-            gr.Markdown(
-                "This app generates a podcast discussion between two hosts about the specified topic."
-            )
-            generate_btn = gr.Button("Generate Podcast Script", variant="primary")
-            podcast_output = gr.Textbox(label="Generated Podcast Script", lines=15)
-            gr.Markdown("## Audio Preview")
-            gr.Markdown("Click below to hear the podcast with realistic voices:")
-            with gr.Row():
-                start_audio_btn = gr.Button("▶️ Generate Podcast", variant="secondary")
-                stop_btn = gr.Button("⏹️ Stop", variant="stop")
-            audio_output = gr.Audio(label="Podcast Audio", streaming=True)
-            status_text = gr.Textbox(label="Status", visible=True)
-    generate_btn.click(fn=generate_podcast, outputs=podcast_output)
-    start_audio_btn.click(fn=stream_audio_generator, inputs=podcast_output, outputs=[audio_output, status_text])
-    stop_btn.click(fn=stop_generation, outputs=status_text)
 if __name__ == "__main__":
-    demo.queue().launch()

 import soundfile as sf
 import gradio as gr
 import numpy as np
+import time
 import torch
 from huggingface_hub import InferenceClient
 from kokoro import KModel, KPipeline
 # -----------------------------------------------------------------------------
 # Get podcast subject
 # -----------------------------------------------------------------------------
 )
+def generate_podcast_text(subject: str, steering_question: str | None = None) -> str:
     """Ask the LLM for a script of a podcast given by two hosts."""
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.
 {subject[:1000]}"""},
+    ]
+    if steering_question and len(steering_question) > 0:
+        messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
+    response = client.chat_completion(
+        messages,
         max_tokens=8156,
     )
+    full_text = response.choices[0].message.content
+    assert "[JANE]" in full_text
+    dialogue_start_index = full_text.find("[JANE]")
+    podcast_text = full_text[dialogue_start_index:]
+    return podcast_text
 # -----------------------------------------------------------------------------
 # Kokoro TTS
 # Audio generation system with queue
 # -----------------------------------------------------------------------------
 @spaces.GPU
+def generate_podcast(pdf, url, topic):
+    podcast_text = generate_podcast_text(PODCAST_SUBJECT, topic)
     lines = [l for l in podcast_text.strip().splitlines() if l.strip()]
     pipeline = kpipeline
     pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
     pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
+    speed = 1.
+    sr = 24000
+    for line in lines:
         # Expect "[S1] ..." or "[S2] ..."
         if line.startswith("[MIKE]"):
             pipeline_voice = pipeline_voice_male
             voice = FEMALE_VOICE
             utterance = line
         for _, ps, _ in pipeline(utterance, voice, speed):
+            t0 = time.time()
             ref_s = pipeline_voice[len(ps) - 1]
+            audio_numpy = kmodel(ps, ref_s, speed).numpy()
+            yield (sr, audio_numpy)
+            t1 = time.time()
+            print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")
+demo = gr.Interface(
+    title="Open NotebookLM",
+    description=f"""Generates a podcast discussion between two hosts about the materials of your choice. Based on [Kokoro](https://huggingface.co/hexgrad/Kokoro-82M), and uses elements from a NotebookLM app by [Gabriel Chua](https://huggingface.co/spaces/gabrielchua/open-notebooklm).
+If you do not specify any source materials below, the podcast will be about the top trending [Daily paper](https://huggingface.co/papers/), '**{list(top_papers.keys())[0]}**'""",
+    fn=generate_podcast,
+    inputs=[
+        gr.File(
+            label="Optional - Upload a pdf",
+            file_types=[".pdf"],
+            file_count="single",
+        ),
+        gr.Textbox(
+            label="Optional - Type a URL to read its page",
+        ),
+        gr.Textbox(label="Do you have a more specific topic or question on the materials?"),
+        # gr.Dropdown(
+        #     label=UI_INPUTS["length"]["label"],
+        #     choices=UI_INPUTS["length"]["choices"],
+        #     value=UI_INPUTS["length"]["value"],
+        # ),
+    ],
+    outputs=[
+        gr.Audio(
+            label="Listen to your podcast",
+            format="wav",
+            streaming=True,
+        ),
+        # gr.Markdown(label=UI_OUTPUTS["transcript"]["label"]),
+    ],
+    theme=gr.themes.Soft(),
+    submit_btn="Generate podcast 🎙️",
+    # examples=UI_EXAMPLES,
+    # cache_examples=UI_CACHE_EXAMPLES,
+)
 if __name__ == "__main__":
+    demo.launch()