File size: 6,853 Bytes
547fef1
 
 
 
 
 
 
 
 
 
 
 
c301481
 
86f9ba1
547fef1
 
 
 
c301481
 
 
 
 
 
 
547fef1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86f9ba1
 
 
 
 
547fef1
 
 
 
86f9ba1
547fef1
 
 
 
 
 
 
 
 
 
 
 
86f9ba1
547fef1
 
c301481
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547fef1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4cb353b
547fef1
4cb353b
547fef1
 
 
 
 
 
 
 
 
01296b8
547fef1
 
 
 
 
 
 
4cb353b
 
 
 
 
 
 
547fef1
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import queue
import threading
import spaces
import os
import io
import soundfile as sf
import gradio as gr
import numpy as np
import time
import pymupdf
import requests
from pathlib import Path
from pydub import AudioSegment  # Add this import
import tempfile
import re

import torch
from huggingface_hub import InferenceClient
from kokoro import KModel, KPipeline

# -----------------------------------------------------------------------------
# to-do 
# - Add field for the podcast title and description
# - add field for the script
# -----------------------------------------------------------------------------

# -----------------------------------------------------------------------------
# Get default podcast materials, from Daily papers and one download
# -----------------------------------------------------------------------------
from papers import PaperManager

paper_manager = PaperManager()
top_papers = paper_manager.get_top_content()

PODCAST_SUBJECT = list(top_papers.values())[0]

# -----------------------------------------------------------------------------
# LLM that writes the script (unchanged)
# -----------------------------------------------------------------------------
from prompts import SYSTEM_PROMPT

# client = InferenceClient(
#     "meta-llama/Llama-3.3-70B-Instruct",
#     provider="cerebras",
#     token=os.getenv("HF_TOKEN"),
# )
client = InferenceClient(
    "Qwen/Qwen3-32B",
    provider="hf-inference",
    token=os.getenv("HF_TOKEN"),
)

def sanitize_script(script: str) -> str:
    """Remove special characters like '*' from the script."""
    # Remove asterisk and other special formatting characters (add more as needed)
    return re.sub(r'[\*\_\~\`]', '', script)

def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
    """Ask the LLM for a script of a podcast given by two hosts."""
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.\n{subject[:10000]}"""},
    ]
    if steering_question and len(steering_question) > 0:
        messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})

    response = client.chat_completion(
        messages,
        max_tokens=8156,
    )
    full_text = response.choices[0].message.content
    assert "[JANE]" in full_text
    dialogue_start_index = full_text.find("[JANE]")
    podcast_text = full_text[dialogue_start_index:]
    podcast_text = sanitize_script(podcast_text)
    return podcast_text

def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
    """Ask the LLM for a headline and a short description for the podcast episode."""
    prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
1. A catchy, informative headline for a podcast episode about it (max 15 words).
2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.

Here is the topic:
{subject[:10000]}
"""
    messages = [
        {"role": "system", "content": "You are a world-class podcast producer."},
        {"role": "user", "content": prompt},
    ]
    response = client.chat_completion(
        messages,
        max_tokens=512,
    )
    full_text = response.choices[0].message.content.strip()
    # Try to split headline and description
    lines = [l.strip() for l in full_text.splitlines() if l.strip()]
    if len(lines) >= 2:
        headline = lines[0]
        description = " ".join(lines[1:])
    else:
        headline = full_text[:80]
        description = full_text
    return headline, description

# -----------------------------------------------------------------------------
# Kokoro TTS
# -----------------------------------------------------------------------------
CUDA_AVAILABLE = torch.cuda.is_available()

kmodel = KModel(repo_id='hexgrad/Kokoro-82M').to("cuda" if CUDA_AVAILABLE else "cpu").eval()
kpipeline = KPipeline(lang_code="a")  # English voices

MALE_VOICE = "am_adam"
FEMALE_VOICE = "af_heart"

# Pre‑warm voices to avoid first‑call latency
for v in (MALE_VOICE, FEMALE_VOICE):
    kpipeline.load_voice(v)

@spaces.GPU
def generate_podcast(topic: str):
    material_text = PODCAST_SUBJECT
    
    # Generate podcast script!
    podcast_script = generate_podcast_script(material_text, topic)

    lines = [l for l in podcast_script.strip().splitlines() if l.strip()]

    pipeline = kpipeline
    pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
    pipeline_voice_male = pipeline.load_voice(MALE_VOICE)

    speed = 1.
    sr = 24000

    for line in lines:
        if line.startswith("[MIKE]"):
            pipeline_voice = pipeline_voice_male
            voice = MALE_VOICE
            utterance = line[len("[MIKE]"):].strip()
        elif line.startswith("[JANE]"):
            pipeline_voice = pipeline_voice_female
            voice = FEMALE_VOICE
            utterance = line[len("[JANE]"):].strip()
        else:  # fallback
            pipeline_voice = pipeline_voice_female
            voice = FEMALE_VOICE
            utterance = line

        for _, ps, _ in pipeline(utterance, voice, speed):
            t0 = time.time()
            ref_s = pipeline_voice[len(ps) - 1]
            audio_numpy = kmodel(ps, ref_s, speed).numpy()
            yield (sr, audio_numpy)
            t1 = time.time()
            print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")

EXAMPLES = [
    ["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
    [None, str(Path("examples/Essay_Palantir.pdf")), "Make sure to keep some critic spirit in the analysis!"],
]
demo = gr.Interface(
    title="Daily Paper Podcast 🎙️",
    description=f"""Generates a podcast discussion between two hosts about today's top trending paper on Hugging Face: '**{list(top_papers.keys())[0]}**'

Based on [Open NotebookLM](spaces/m-ric/open-notebooklm), powered by [Kokoro TTS](hexgrad/Kokoro-82M) and [Qwen3-32B](Qwen/Qwen3-32B) running on HF Inference.""",
    fn=generate_podcast,
    inputs=[
        gr.Textbox(
            label="🤔 Do you have a specific aspect of the paper you'd like the hosts to focus on?",
            placeholder="You can leave this blank for a general discussion.",
        ),
    ],
    outputs=[
        gr.Audio(
            label="Listen to your podcast! 🔊",
            format="wav",
            streaming=True,
        ),
    ],
    theme=gr.themes.Soft(),
    submit_btn="Generate podcast 🎙️",
)

if __name__ == "__main__":
    demo.launch()