Spaces:

fdaudens
/

podcast-jobs-rss-test

Running on Zero

File size: 3,236 Bytes

import xml.etree.ElementTree as ET
from datetime import datetime
import os
from huggingface_hub import InferenceClient

client = InferenceClient(
    "Qwen/Qwen3-32B",
    provider="hf-inference",
    token=os.getenv("HF_TOKEN"),
)

def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
    """Ask the LLM for a headline and a short description for the podcast episode."""
    prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
1. A catchy, informative headline for a podcast episode about it (max 15 words).
2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.

Here is the topic:
{subject[:10000]}
"""
    messages = [
        {"role": "system", "content": "You are a world-class podcast producer."},
        {"role": "user", "content": prompt},
    ]
    response = client.chat_completion(
        messages,
        max_tokens=512,
    )
    full_text = response.choices[0].message.content.strip()
    # Try to split headline and description
    lines = [l.strip() for l in full_text.splitlines() if l.strip()]
    if len(lines) >= 2:
        headline = lines[0]
        description = " ".join(lines[1:])
    else:
        headline = full_text[:80]
        description = full_text
    return headline, description

# -----------------------------------------------------------------------------
# UPDATE RSS
# -----------------------------------------------------------------------------
def get_next_episode_number(podcast_dir="podcasts"):
    files = [f for f in os.listdir(podcast_dir) if f.endswith(".wav")]
    return len(files) + 1

def update_rss(subject, audio_url, audio_length, paper_id=None, rss_path="rss.xml"):
    # Generate headline and description automatically
    title, description = generate_headline_and_description(subject)
    if paper_id:
        paper_url = f"https://huggingface.co/papers/{paper_id}"
        description += f"\n\n[Read the paper on Hugging Face]({paper_url})"
    
    tree = ET.parse(rss_path)
    root = tree.getroot()
    channel = root.find("channel")
    
    # Update lastBuildDate
    last_build_date = channel.find("lastBuildDate")
    now_rfc2822 = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000")
    if last_build_date is not None:
        last_build_date.text = now_rfc2822
    
    # Create new item
    item = ET.Element("item")
    ET.SubElement(item, "title").text = title
    ET.SubElement(item, "description").text = description
    ET.SubElement(item, "pubDate").text = now_rfc2822
    ET.SubElement(item, "enclosure", url=audio_url, length=str(audio_length), type="audio/mpeg")
    ET.SubElement(item, "guid").text = audio_url
    ET.SubElement(item, "itunes:explicit").text = "false"
    
    # Insert new item after lastBuildDate (i.e., as the first item)
    # Find the first <item> and insert before it, or append if none exist
    items = channel.findall("item")
    if items:
        channel.insert(list(channel).index(items[0]), item)
    else:
        channel.append(item)
    
    # Write back to file
    tree.write(rss_path, encoding="utf-8", xml_declaration=True)