Spaces:

m-ric
/

open-notebooklm

Running on Zero

App Files Files Community

open-notebooklm / app.py

m-ric HF Staff

Working interface kokoro

4d88a72 about 2 months ago

raw

history blame

5.31 kB

	import queue
	import threading
	import spaces
	import os
	import io
	import soundfile as sf
	import gradio as gr
	import numpy as np
	import time

	import torch
	from huggingface_hub import InferenceClient
	from kokoro import KModel, KPipeline
	# -----------------------------------------------------------------------------
	# Get podcast subject
	# -----------------------------------------------------------------------------
	from papers import PaperManager

	paper_manager = PaperManager()
	top_papers = paper_manager.get_top_content()

	PODCAST_SUBJECT = list(top_papers.values())[0]

	# -----------------------------------------------------------------------------
	# LLM that writes the script (unchanged)
	# -----------------------------------------------------------------------------
	from prompts import SYSTEM_PROMPT

	client = InferenceClient(
	"meta-llama/Llama-3.3-70B-Instruct",
	provider="cerebras",
	token=os.getenv("HF_TOKEN"),
	)


	def generate_podcast_text(subject: str, steering_question: str \| None = None) -> str:
	"""Ask the LLM for a script of a podcast given by two hosts."""
	messages = [
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.
	{subject[:1000]}"""},
	]
	if steering_question and len(steering_question) > 0:
	messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})

	response = client.chat_completion(
	messages,
	max_tokens=8156,
	)
	full_text = response.choices[0].message.content
	assert "[JANE]" in full_text
	dialogue_start_index = full_text.find("[JANE]")
	podcast_text = full_text[dialogue_start_index:]
	return podcast_text

	# -----------------------------------------------------------------------------
	# Kokoro TTS
	# -----------------------------------------------------------------------------
	CUDA_AVAILABLE = torch.cuda.is_available()

	kmodel = KModel().to("cuda" if CUDA_AVAILABLE else "cpu").eval()
	kpipeline = KPipeline(lang_code="a") # English voices

	MALE_VOICE = "am_michael" # [MIKE]
	FEMALE_VOICE = "af_heart" # [JANE]

	# Pre‑warm voices to avoid first‑call latency
	for v in (MALE_VOICE, FEMALE_VOICE):
	kpipeline.load_voice(v)

	# -----------------------------------------------------------------------------
	# Audio generation system with queue
	# -----------------------------------------------------------------------------

	@spaces.GPU
	def generate_podcast(pdf, url, topic):
	podcast_text = generate_podcast_text(PODCAST_SUBJECT, topic)
	lines = [l for l in podcast_text.strip().splitlines() if l.strip()]

	pipeline = kpipeline
	pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
	pipeline_voice_male = pipeline.load_voice(MALE_VOICE)

	speed = 1.
	sr = 24000

	for line in lines:
	# Expect "[S1] ..." or "[S2] ..."
	if line.startswith("[MIKE]"):
	pipeline_voice = pipeline_voice_male
	voice = MALE_VOICE
	utterance = line[len("[MIKE]"):].strip()
	elif line.startswith("[JANE]"):
	pipeline_voice = pipeline_voice_female
	voice = FEMALE_VOICE
	utterance = line[len("[JANE]"):].strip()
	else: # fallback
	pipeline_voice = pipeline_voice_female
	voice = FEMALE_VOICE
	utterance = line

	for _, ps, _ in pipeline(utterance, voice, speed):
	t0 = time.time()
	ref_s = pipeline_voice[len(ps) - 1]
	audio_numpy = kmodel(ps, ref_s, speed).numpy()
	yield (sr, audio_numpy)
	t1 = time.time()
	print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")

	demo = gr.Interface(
	title="Open NotebookLM",
	description=f"""Generates a podcast discussion between two hosts about the materials of your choice. Based on [Kokoro](https://huggingface.co/hexgrad/Kokoro-82M), and uses elements from a NotebookLM app by [Gabriel Chua](https://huggingface.co/spaces/gabrielchua/open-notebooklm).

	If you do not specify any source materials below, the podcast will be about the top trending [Daily paper](https://huggingface.co/papers/), '{list(top_papers.keys())[0]}'""",
	fn=generate_podcast,
	inputs=[
	gr.File(
	label="Optional - Upload a pdf",
	file_types=[".pdf"],
	file_count="single",
	),
	gr.Textbox(
	label="Optional - Type a URL to read its page",
	),
	gr.Textbox(label="Do you have a more specific topic or question on the materials?"),
	# gr.Dropdown(
	# label=UI_INPUTS["length"]["label"],
	# choices=UI_INPUTS["length"]["choices"],
	# value=UI_INPUTS["length"]["value"],
	# ),
	],
	outputs=[
	gr.Audio(
	label="Listen to your podcast",
	format="wav",
	streaming=True,
	),
	# gr.Markdown(label=UI_OUTPUTS["transcript"]["label"]),
	],
	theme=gr.themes.Soft(),
	submit_btn="Generate podcast 🎙️",
	# examples=UI_EXAMPLES,
	# cache_examples=UI_CACHE_EXAMPLES,
	)

	if __name__ == "__main__":
	demo.launch()