Spaces:
Running
on
Zero
Running
on
Zero
update rss structure
Browse files- update_rss.py +25 -3
update_rss.py
CHANGED
@@ -2,19 +2,26 @@ import xml.etree.ElementTree as ET
|
|
2 |
from datetime import datetime
|
3 |
import os
|
4 |
from huggingface_hub import InferenceClient
|
|
|
5 |
|
6 |
client = InferenceClient(
|
7 |
-
"
|
8 |
provider="hf-inference",
|
9 |
token=os.getenv("HF_TOKEN"),
|
10 |
)
|
11 |
|
|
|
|
|
|
|
|
|
12 |
def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
|
13 |
"""Ask the LLM for a headline and a short description for the podcast episode."""
|
14 |
prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
|
15 |
1. A catchy, informative headline for a podcast episode about it (max 15 words).
|
16 |
2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.
|
17 |
|
|
|
|
|
18 |
Here is the topic:
|
19 |
{subject[:10000]}
|
20 |
"""
|
@@ -28,7 +35,7 @@ Here is the topic:
|
|
28 |
)
|
29 |
full_text = response.choices[0].message.content.strip()
|
30 |
# Try to split headline and description
|
31 |
-
lines = [l.strip() for l in full_text.splitlines() if l.strip()]
|
32 |
if len(lines) >= 2:
|
33 |
headline = lines[0]
|
34 |
description = " ".join(lines[1:])
|
@@ -37,6 +44,19 @@ Here is the topic:
|
|
37 |
description = full_text
|
38 |
return headline, description
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
# -----------------------------------------------------------------------------
|
41 |
# UPDATE RSS
|
42 |
# -----------------------------------------------------------------------------
|
@@ -75,5 +95,7 @@ def update_rss(subject, audio_url, audio_length, rss_path="rss.xml"):
|
|
75 |
else:
|
76 |
channel.append(item)
|
77 |
|
78 |
-
# Write back to file
|
|
|
|
|
79 |
tree.write(rss_path, encoding="utf-8", xml_declaration=True)
|
|
|
2 |
from datetime import datetime
|
3 |
import os
|
4 |
from huggingface_hub import InferenceClient
|
5 |
+
import re
|
6 |
|
7 |
client = InferenceClient(
|
8 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
9 |
provider="hf-inference",
|
10 |
token=os.getenv("HF_TOKEN"),
|
11 |
)
|
12 |
|
13 |
+
def clean_label(line):
|
14 |
+
# Remove common label patterns
|
15 |
+
return re.sub(r"^\s*(\*\*?)?(Headline|Description)\:?\*?\*?\s*", "", line, flags=re.IGNORECASE)
|
16 |
+
|
17 |
def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
|
18 |
"""Ask the LLM for a headline and a short description for the podcast episode."""
|
19 |
prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
|
20 |
1. A catchy, informative headline for a podcast episode about it (max 15 words).
|
21 |
2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.
|
22 |
|
23 |
+
Output ONLY the headline on the first line, and the description on the second line. Do NOT include any labels, markdown, or extra formatting.
|
24 |
+
|
25 |
Here is the topic:
|
26 |
{subject[:10000]}
|
27 |
"""
|
|
|
35 |
)
|
36 |
full_text = response.choices[0].message.content.strip()
|
37 |
# Try to split headline and description
|
38 |
+
lines = [clean_label(l.strip()) for l in full_text.splitlines() if l.strip()]
|
39 |
if len(lines) >= 2:
|
40 |
headline = lines[0]
|
41 |
description = " ".join(lines[1:])
|
|
|
44 |
description = full_text
|
45 |
return headline, description
|
46 |
|
47 |
+
def indent(elem, level=0):
|
48 |
+
i = "\n" + level * " "
|
49 |
+
if len(elem):
|
50 |
+
if not elem.text or not elem.text.strip():
|
51 |
+
elem.text = i + " "
|
52 |
+
for child in elem:
|
53 |
+
indent(child, level + 1)
|
54 |
+
if not elem.tail or not elem.tail.strip():
|
55 |
+
elem.tail = i
|
56 |
+
else:
|
57 |
+
if level and (not elem.tail or not elem.tail.strip()):
|
58 |
+
elem.tail = i
|
59 |
+
|
60 |
# -----------------------------------------------------------------------------
|
61 |
# UPDATE RSS
|
62 |
# -----------------------------------------------------------------------------
|
|
|
95 |
else:
|
96 |
channel.append(item)
|
97 |
|
98 |
+
# Write back to file with pretty formatting
|
99 |
+
indent(root)
|
100 |
+
ET.register_namespace('itunes', "http://www.itunes.com/dtds/podcast-1.0.dtd")
|
101 |
tree.write(rss_path, encoding="utf-8", xml_declaration=True)
|