Spaces:
Running
on
Zero
Running
on
Zero
File size: 6,227 Bytes
1a6d10d 886bd4b 992fb70 4af8987 1a6d10d d065cac 4d88a72 00809b2 9bf9798 4d88a72 4af8987 e3f6ad9 4af8987 6cc0694 8511ecd 6cc0694 6b39085 1a6d10d e3f6ad9 4af8987 7746966 4af8987 8569025 00809b2 8569025 4d88a72 00809b2 4d88a72 8569025 4d88a72 8569025 4af8987 8569025 4af8987 00809b2 018d4a8 1a6d10d 018d4a8 5a4b231 4af8987 00809b2 886bd4b 4af8987 886bd4b 4d88a72 4af8987 4d88a72 4af8987 7746966 4af8987 7746966 4af8987 7746966 4af8987 4d88a72 4af8987 4d88a72 9bf9798 55f9ab5 9bf9798 4d88a72 119a653 4d88a72 119a653 4d88a72 00809b2 856cb19 00809b2 4d88a72 856cb19 4d88a72 00809b2 4d88a72 856cb19 4d88a72 856cb19 9bf9798 4d88a72 1a6d10d 4d88a72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import queue
import threading
import spaces
import os
import io
import soundfile as sf
import gradio as gr
import numpy as np
import time
import pymupdf
import requests
from pathlib import Path
import torch
from huggingface_hub import InferenceClient
from kokoro import KModel, KPipeline
# -----------------------------------------------------------------------------
# Get default podcast materials, from Daily papers and one download
# -----------------------------------------------------------------------------
from papers import PaperManager
paper_manager = PaperManager()
top_papers = paper_manager.get_top_content()
PODCAST_SUBJECT = list(top_papers.values())[0]
os.makedirs("examples", exist_ok=True)
response = requests.get("https://www.palantir.com/assets/xrfr7uokpv1b/1wtb4LWF7XIuJisnMwH0XW/dc37fdda646a5df6c5b86f695ce990c0/NYT_-_Our_Oppenheimer_Moment-_The_Creation_of_A.I._Weapons.pdf")
with open("examples/Essay_Palantir.pdf", 'wb') as f:
f.write(response.content)
# -----------------------------------------------------------------------------
# LLM that writes the script (unchanged)
# -----------------------------------------------------------------------------
from prompts import SYSTEM_PROMPT
client = InferenceClient(
"meta-llama/Llama-3.3-70B-Instruct",
provider="cerebras",
token=os.getenv("HF_TOKEN"),
)
def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
"""Ask the LLM for a script of a podcast given by two hosts."""
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.
{subject[:10000]}"""},
]
if steering_question and len(steering_question) > 0:
messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
response = client.chat_completion(
messages,
max_tokens=8156,
)
full_text = response.choices[0].message.content
assert "[JANE]" in full_text
dialogue_start_index = full_text.find("[JANE]")
podcast_text = full_text[dialogue_start_index:]
return podcast_text
# -----------------------------------------------------------------------------
# Kokoro TTS
# -----------------------------------------------------------------------------
CUDA_AVAILABLE = torch.cuda.is_available()
kmodel = KModel(repo_id='hexgrad/Kokoro-82M').to("cuda" if CUDA_AVAILABLE else "cpu").eval()
kpipeline = KPipeline(lang_code="a") # English voices
MALE_VOICE = "am_adam"
FEMALE_VOICE = "af_heart"
# Pre‑warm voices to avoid first‑call latency
for v in (MALE_VOICE, FEMALE_VOICE):
kpipeline.load_voice(v)
@spaces.GPU
def generate_podcast(url: str, pdf_path: str, topic: str):
if pdf_path:
with pymupdf.open(pdf_path) as pdf_doc:
material_text = ""
for page in pdf_doc:
material_text += page.get_text()
elif url:
response = requests.get(f'https://r.jina.ai/{url}')
material_text = response.text
else:
material_text = PODCAST_SUBJECT
# Generate podcast script!
podcast_script = generate_podcast_script(material_text, topic)
lines = [l for l in podcast_script.strip().splitlines() if l.strip()]
pipeline = kpipeline
pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
speed = 1.
sr = 24000
for line in lines:
# Expect "[S1] ..." or "[S2] ..."
if line.startswith("[MIKE]"):
pipeline_voice = pipeline_voice_male
voice = MALE_VOICE
utterance = line[len("[MIKE]"):].strip()
elif line.startswith("[JANE]"):
pipeline_voice = pipeline_voice_female
voice = FEMALE_VOICE
utterance = line[len("[JANE]"):].strip()
else: # fallback
pipeline_voice = pipeline_voice_female
voice = FEMALE_VOICE
utterance = line
for _, ps, _ in pipeline(utterance, voice, speed):
t0 = time.time()
ref_s = pipeline_voice[len(ps) - 1]
audio_numpy = kmodel(ps, ref_s, speed).numpy()
yield (sr, audio_numpy)
t1 = time.time()
print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")
EXAMPLES = [
["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
[None, str(Path("examples/Essay_Palantir.pdf")), "Make sure to keep some critic spirit in the analysis!"],
]
demo = gr.Interface(
title="Open NotebookLM 🎙️",
description=f"""Generates a podcast discussion between two hosts about the materials of your choice.
If you do not specify any source materials below, the podcast will be about the top trending [Daily paper](https://huggingface.co/papers/), '**{list(top_papers.keys())[0]}**'
Based on [Kokoro TTS](https://huggingface.co/hexgrad/Kokoro-82M), lightning-fast inference for [Llama-3.3-70B](meta-llama/Llama-3.3-70B-Instruct) by Cerebras, and uses elements from a NotebookLM app by [Gabriel Chua](https://huggingface.co/spaces/gabrielchua/open-notebooklm).""",
fn=generate_podcast,
inputs=[
gr.Textbox(
label="🔗 Type a Webpage URL to discuss it (Optional)",
placeholder="The URL you want to discuss the content for.",
),
gr.File(
label="Upload a PDF as discussion material (Optional)",
file_types=[".pdf"],
file_count="single",
),
gr.Textbox(label="🤔 Do you have a more specific topic or question on the materials?", placeholder="You can leave this blank."),
],
outputs=[
gr.Audio(
label="Listen to your podcast! 🔊",
format="wav",
streaming=True,
),
],
theme=gr.themes.Soft(),
submit_btn="Generate podcast 🎙️",
# clear_btn=gr.Button("🗑️"),
examples=EXAMPLES,
cache_examples=True,
)
if __name__ == "__main__":
demo.launch()
|