File size: 6,231 Bytes
1a6d10d
 
886bd4b
992fb70
4af8987
 
1a6d10d
d065cac
4d88a72
00809b2
 
9bf9798
4d88a72
4af8987
 
 
 
e3f6ad9
4af8987
6cc0694
 
8511ecd
6cc0694
 
6b39085
1a6d10d
e3f6ad9
 
 
 
 
4af8987
 
 
7746966
 
4af8987
 
 
 
 
 
8569025
00809b2
8569025
4d88a72
 
069ce09
4d88a72
 
 
 
 
 
8569025
 
4d88a72
 
 
 
 
8569025
4af8987
8569025
4af8987
 
 
00809b2
018d4a8
1a6d10d
018d4a8
5a4b231
4af8987
 
 
 
 
 
00809b2
 
 
 
 
 
 
 
 
 
069ce09
00809b2
 
 
 
 
886bd4b
4af8987
 
 
886bd4b
4d88a72
 
4af8987
4d88a72
4af8987
7746966
4af8987
 
7746966
 
4af8987
 
7746966
4af8987
 
 
 
 
 
4d88a72
4af8987
4d88a72
 
 
 
 
9bf9798
55f9ab5
 
9bf9798
4d88a72
119a653
 
4d88a72
119a653
 
 
4d88a72
 
00809b2
856cb19
 
00809b2
4d88a72
856cb19
4d88a72
 
 
00809b2
4d88a72
 
 
856cb19
4d88a72
 
 
 
 
 
856cb19
9bf9798
 
4d88a72
1a6d10d
 
4d88a72
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import queue
import threading
import spaces
import os
import io
import soundfile as sf
import gradio as gr
import numpy as np
import time
import pymupdf
import requests
from pathlib import Path

import torch
from huggingface_hub import InferenceClient
from kokoro import KModel, KPipeline
# -----------------------------------------------------------------------------
# Get default podcast materials, from Daily papers and one download
# -----------------------------------------------------------------------------
from papers import PaperManager

paper_manager = PaperManager()
top_papers = paper_manager.get_top_content()

PODCAST_SUBJECT = list(top_papers.values())[0]

os.makedirs("examples", exist_ok=True)
response = requests.get("https://www.palantir.com/assets/xrfr7uokpv1b/1wtb4LWF7XIuJisnMwH0XW/dc37fdda646a5df6c5b86f695ce990c0/NYT_-_Our_Oppenheimer_Moment-_The_Creation_of_A.I._Weapons.pdf")
with open("examples/Essay_Palantir.pdf", 'wb') as f:
    f.write(response.content)

# -----------------------------------------------------------------------------
# LLM that writes the script (unchanged)
# -----------------------------------------------------------------------------
from prompts import SYSTEM_PROMPT

client = InferenceClient(
    "meta-llama/Llama-3.3-70B-Instruct",
    provider="cerebras",
    token=os.getenv("HF_TOKEN"),
)


def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
    """Ask the LLM for a script of a podcast given by two hosts."""
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": f"""Here is the topic: {subject[:10000]}"""},
    ]
    if steering_question and len(steering_question) > 0:
        messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})

    response = client.chat_completion(
        messages,
        max_tokens=8156,
    )
    full_text = response.choices[0].message.content
    assert "[JANE]" in full_text
    dialogue_start_index = full_text.find("[JANE]")
    podcast_text = full_text[dialogue_start_index:]
    return podcast_text

# -----------------------------------------------------------------------------
# Kokoro TTS
# -----------------------------------------------------------------------------
CUDA_AVAILABLE = torch.cuda.is_available()

kmodel = KModel(repo_id='hexgrad/Kokoro-82M').to("cuda" if CUDA_AVAILABLE else "cpu").eval()
kpipeline = KPipeline(lang_code="a")  # English voices

MALE_VOICE = "am_adam"
FEMALE_VOICE = "af_heart"

# Pre‑warm voices to avoid first‑call latency
for v in (MALE_VOICE, FEMALE_VOICE):
    kpipeline.load_voice(v)

@spaces.GPU
def generate_podcast(url: str, pdf_path: str, topic: str):
    if pdf_path:
        with pymupdf.open(pdf_path) as pdf_doc:
            material_text = ""
            for page in pdf_doc:
                material_text += page.get_text()
    elif url:
        response = requests.get(f'https://r.jina.ai/{url}')
        material_text = response.text
    else:
        material_text = "It's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights." + PODCAST_SUBJECT
    
    # Generate podcast script!
    podcast_script = generate_podcast_script(material_text, topic)

    lines = [l for l in podcast_script.strip().splitlines() if l.strip()]

    pipeline = kpipeline
    pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
    pipeline_voice_male = pipeline.load_voice(MALE_VOICE)

    speed = 1.
    sr = 24000

    for line in lines:
        # Expect "[S1] ..." or "[S2] ..."
        if line.startswith("[MIKE]"):
            pipeline_voice = pipeline_voice_male
            voice = MALE_VOICE
            utterance = line[len("[MIKE]"):].strip()
        elif line.startswith("[JANE]"):
            pipeline_voice = pipeline_voice_female
            voice = FEMALE_VOICE
            utterance = line[len("[JANE]"):].strip()
        else:  # fallback
            pipeline_voice = pipeline_voice_female
            voice = FEMALE_VOICE
            utterance = line

        for _, ps, _ in pipeline(utterance, voice, speed):
            t0 = time.time()
            ref_s = pipeline_voice[len(ps) - 1]
            audio_numpy = kmodel(ps, ref_s, speed).numpy()
            yield (sr, audio_numpy)
            t1 = time.time()
            print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")

EXAMPLES = [
    ["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
    [None, str(Path("examples/Essay_Palantir.pdf")), "Make sure to keep some critic spirit in the analysis!"],
]
demo = gr.Interface(
    title="Open NotebookLM 🎙️",
    description=f"""Generates a podcast discussion between two hosts about the materials of your choice. 

If you do not specify any source materials below, the podcast will be about the top trending [Daily paper](https://huggingface.co/papers/), '**{list(top_papers.keys())[0]}**'

Based on [Kokoro TTS](https://huggingface.co/hexgrad/Kokoro-82M), lightning-fast inference for [Llama-3.3-70B](meta-llama/Llama-3.3-70B-Instruct) by Cerebras, and uses elements from a NotebookLM app by [Gabriel Chua](https://huggingface.co/spaces/gabrielchua/open-notebooklm).""",
    fn=generate_podcast,
    inputs=[
        gr.Textbox(
            label="🔗 Type a Webpage URL to discuss it (Optional)",
            placeholder="The URL you want to discuss the content for.",
        ),
        gr.File(
            label="Upload a PDF as discussion material (Optional)",
            file_types=[".pdf"],
            file_count="single",
        ),
        gr.Textbox(label="🤔 Do you have a more specific topic or question on the materials?", placeholder="You can leave this blank."),
    ],
    outputs=[
        gr.Audio(
            label="Listen to your podcast! 🔊",
            format="wav",
            streaming=True,
        ),
    ],
    theme=gr.themes.Soft(),
    submit_btn="Generate podcast 🎙️", 
    # clear_btn=gr.Button("🗑️"),
    examples=EXAMPLES,
    cache_examples=True,
)

if __name__ == "__main__":
    demo.launch()