Spaces:

m-ric
/

open-notebooklm

Running on Zero

App Files Files Community

m-ric HF Staff commited on May 8

Commit

8569025

1 Parent(s): 4af8987

This works well

Browse files

Files changed (1) hide show

app.py +24 -25

app.py CHANGED Viewed

@@ -25,8 +25,26 @@ client = InferenceClient(
     token=os.getenv("HF_TOKEN"),
 )
 # -----------------------------------------------------------------------------
-# Kokoro TTS setup (replaces Dia)
 # -----------------------------------------------------------------------------
 CUDA_AVAILABLE = torch.cuda.is_available()
@@ -40,29 +58,13 @@ FEMALE_VOICE = "af_heart"  # [S2]
 for v in (MALE_VOICE, FEMALE_VOICE):
     kpipeline.load_voice(v)
 audio_queue: queue.Queue[tuple[int, np.ndarray] | None] = queue.Queue()
 stop_signal = threading.Event()
-def generate_podcast_text(subject: str) -> str:
-    """Ask the LLM for a ~5‑minute two‑host script."""
-    prompt = f"""Generate a podcast told by 2 hosts about {subject}.
-The podcast should be an insightful discussion, with some amount of playful banter.
-Separate dialog as follows using [S1] for the male host and [S2] for the female host, for instance:
-[S1] Hello, how are you?
-[S2] I'm good, thank you. How are you?
-[S1] I'm good, thank you. (laughs)
-[S2] Great.
-Now go on, make 5 minutes of podcast.
-"""
-    response = client.chat_completion(
-        [{"role": "user", "content": prompt}],
-        max_tokens=1000,
-    )
-    return response.choices[0].message.content
 @spaces.GPU
 def process_audio_chunks(podcast_text: str, speed: float = 1.0) -> None:
     """Read each line, pick voice via tag, send chunks to the queue."""
@@ -96,7 +98,6 @@ def process_audio_chunks(podcast_text: str, speed: float = 1.0) -> None:
             audio = kmodel(ps, ref_s, speed)
             audio_queue.put((24000, audio.numpy()))
             audio_numpy = audio.numpy()
-            print("GENERATED AUDIO", audio_numpy[-100:], audio_numpy.max())
             if first:
                 first = False
                 audio_queue.put((24000, torch.zeros(1).numpy()))
@@ -111,14 +112,12 @@ def stream_audio_generator(podcast_text: str):
         chunk = audio_queue.get()
         if chunk is None:
             break
-        print("CHUNK", chunk, type(chunk))
         sr, data = chunk
         buf = io.BytesIO()
         sf.write(buf, data, sr, format="wav")
         buf.seek(0)
-        yield buf.getvalue()
 def stop_generation():
@@ -155,7 +154,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     generate_btn.click(fn=generate_podcast, outputs=podcast_output)
-    start_audio_btn.click(fn=stream_audio_generator, inputs=podcast_output, outputs=audio_output)
     stop_btn.click(fn=stop_generation, outputs=status_text)
 if __name__ == "__main__":

     token=os.getenv("HF_TOKEN"),
 )
+def generate_podcast_text(subject: str) -> str:
+    """Ask the LLM for a script of a podcast given by two hosts."""
+    prompt = f"""Generate the script of "Open Paper review", a podcast told by 2 hosts about {subject}.
+The podcast should be an insightful discussion, with some amount of playful banter.
+Separate dialog as follows, using [S1] for the male host and [S2] for the female host, for instance:
+[S1] Hello, how are you?
+[S2] I'm good, thank you. How are you?
+[S1] I'm good, thank you.
+[S2] Great.
+Now go on, make 5 minutes of podcast.
+"""
+    response = client.chat_completion(
+        [{"role": "user", "content": prompt}],
+        max_tokens=8156,
+    )
+    return response.choices[0].message.content
 # -----------------------------------------------------------------------------
+# Kokoro TTS
 # -----------------------------------------------------------------------------
 CUDA_AVAILABLE = torch.cuda.is_available()
 for v in (MALE_VOICE, FEMALE_VOICE):
     kpipeline.load_voice(v)
+# -----------------------------------------------------------------------------
+# Audio generation system with queue
+# -----------------------------------------------------------------------------
 audio_queue: queue.Queue[tuple[int, np.ndarray] | None] = queue.Queue()
 stop_signal = threading.Event()
 @spaces.GPU
 def process_audio_chunks(podcast_text: str, speed: float = 1.0) -> None:
     """Read each line, pick voice via tag, send chunks to the queue."""
             audio = kmodel(ps, ref_s, speed)
             audio_queue.put((24000, audio.numpy()))
             audio_numpy = audio.numpy()
             if first:
                 first = False
                 audio_queue.put((24000, torch.zeros(1).numpy()))
         chunk = audio_queue.get()
         if chunk is None:
             break
         sr, data = chunk
         buf = io.BytesIO()
         sf.write(buf, data, sr, format="wav")
         buf.seek(0)
+        yield buf.getvalue(), "Generating podcast..."
 def stop_generation():
     generate_btn.click(fn=generate_podcast, outputs=podcast_output)
+    start_audio_btn.click(fn=stream_audio_generator, inputs=podcast_output, outputs=[audio_output, status_text])
     stop_btn.click(fn=stop_generation, outputs=status_text)
 if __name__ == "__main__":