Spaces:
Running
on
Zero
Running
on
Zero
import queue | |
import threading | |
import spaces | |
import os | |
import io | |
import soundfile as sf | |
import gradio as gr | |
import numpy as np | |
import time | |
import torch | |
from huggingface_hub import InferenceClient | |
from kokoro import KModel, KPipeline | |
# ----------------------------------------------------------------------------- | |
# Get podcast subject | |
# ----------------------------------------------------------------------------- | |
from papers import PaperManager | |
paper_manager = PaperManager() | |
top_papers = paper_manager.get_top_content() | |
PODCAST_SUBJECT = list(top_papers.values())[0] | |
# ----------------------------------------------------------------------------- | |
# LLM that writes the script (unchanged) | |
# ----------------------------------------------------------------------------- | |
from prompts import SYSTEM_PROMPT | |
client = InferenceClient( | |
"meta-llama/Llama-3.3-70B-Instruct", | |
provider="cerebras", | |
token=os.getenv("HF_TOKEN"), | |
) | |
def generate_podcast_text(subject: str, steering_question: str | None = None) -> str: | |
"""Ask the LLM for a script of a podcast given by two hosts.""" | |
messages = [ | |
{"role": "system", "content": SYSTEM_PROMPT}, | |
{"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights. | |
{subject[:1000]}"""}, | |
] | |
if steering_question and len(steering_question) > 0: | |
messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"}) | |
response = client.chat_completion( | |
messages, | |
max_tokens=8156, | |
) | |
full_text = response.choices[0].message.content | |
assert "[JANE]" in full_text | |
dialogue_start_index = full_text.find("[JANE]") | |
podcast_text = full_text[dialogue_start_index:] | |
return podcast_text | |
# ----------------------------------------------------------------------------- | |
# Kokoro TTS | |
# ----------------------------------------------------------------------------- | |
CUDA_AVAILABLE = torch.cuda.is_available() | |
kmodel = KModel().to("cuda" if CUDA_AVAILABLE else "cpu").eval() | |
kpipeline = KPipeline(lang_code="a") # English voices | |
MALE_VOICE = "am_michael" # [MIKE] | |
FEMALE_VOICE = "af_heart" # [JANE] | |
# Pre‑warm voices to avoid first‑call latency | |
for v in (MALE_VOICE, FEMALE_VOICE): | |
kpipeline.load_voice(v) | |
# ----------------------------------------------------------------------------- | |
# Audio generation system with queue | |
# ----------------------------------------------------------------------------- | |
def generate_podcast(pdf, url, topic): | |
podcast_text = generate_podcast_text(PODCAST_SUBJECT, topic) | |
lines = [l for l in podcast_text.strip().splitlines() if l.strip()] | |
pipeline = kpipeline | |
pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE) | |
pipeline_voice_male = pipeline.load_voice(MALE_VOICE) | |
speed = 1. | |
sr = 24000 | |
for line in lines: | |
# Expect "[S1] ..." or "[S2] ..." | |
if line.startswith("[MIKE]"): | |
pipeline_voice = pipeline_voice_male | |
voice = MALE_VOICE | |
utterance = line[len("[MIKE]"):].strip() | |
elif line.startswith("[JANE]"): | |
pipeline_voice = pipeline_voice_female | |
voice = FEMALE_VOICE | |
utterance = line[len("[JANE]"):].strip() | |
else: # fallback | |
pipeline_voice = pipeline_voice_female | |
voice = FEMALE_VOICE | |
utterance = line | |
for _, ps, _ in pipeline(utterance, voice, speed): | |
t0 = time.time() | |
ref_s = pipeline_voice[len(ps) - 1] | |
audio_numpy = kmodel(ps, ref_s, speed).numpy() | |
yield (sr, audio_numpy) | |
t1 = time.time() | |
print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}") | |
demo = gr.Interface( | |
title="Open NotebookLM", | |
description=f"""Generates a podcast discussion between two hosts about the materials of your choice. Based on [Kokoro](https://huggingface.co/hexgrad/Kokoro-82M), and uses elements from a NotebookLM app by [Gabriel Chua](https://huggingface.co/spaces/gabrielchua/open-notebooklm). | |
If you do not specify any source materials below, the podcast will be about the top trending [Daily paper](https://huggingface.co/papers/), '**{list(top_papers.keys())[0]}**'""", | |
fn=generate_podcast, | |
inputs=[ | |
gr.File( | |
label="Optional - Upload a pdf", | |
file_types=[".pdf"], | |
file_count="single", | |
), | |
gr.Textbox( | |
label="Optional - Type a URL to read its page", | |
), | |
gr.Textbox(label="Do you have a more specific topic or question on the materials?"), | |
# gr.Dropdown( | |
# label=UI_INPUTS["length"]["label"], | |
# choices=UI_INPUTS["length"]["choices"], | |
# value=UI_INPUTS["length"]["value"], | |
# ), | |
], | |
outputs=[ | |
gr.Audio( | |
label="Listen to your podcast", | |
format="wav", | |
streaming=True, | |
), | |
# gr.Markdown(label=UI_OUTPUTS["transcript"]["label"]), | |
], | |
theme=gr.themes.Soft(), | |
submit_btn="Generate podcast 🎙️", | |
# examples=UI_EXAMPLES, | |
# cache_examples=UI_CACHE_EXAMPLES, | |
) | |
if __name__ == "__main__": | |
demo.launch() | |