fdaudens HF Staff commited on
Commit
8279ac9
·
1 Parent(s): 9f26586

update rss structure

Browse files
Files changed (1) hide show
  1. update_rss.py +25 -3
update_rss.py CHANGED
@@ -2,19 +2,26 @@ import xml.etree.ElementTree as ET
2
  from datetime import datetime
3
  import os
4
  from huggingface_hub import InferenceClient
 
5
 
6
  client = InferenceClient(
7
- "Qwen/Qwen3-32B",
8
  provider="hf-inference",
9
  token=os.getenv("HF_TOKEN"),
10
  )
11
 
 
 
 
 
12
  def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
13
  """Ask the LLM for a headline and a short description for the podcast episode."""
14
  prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
15
  1. A catchy, informative headline for a podcast episode about it (max 15 words).
16
  2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.
17
 
 
 
18
  Here is the topic:
19
  {subject[:10000]}
20
  """
@@ -28,7 +35,7 @@ Here is the topic:
28
  )
29
  full_text = response.choices[0].message.content.strip()
30
  # Try to split headline and description
31
- lines = [l.strip() for l in full_text.splitlines() if l.strip()]
32
  if len(lines) >= 2:
33
  headline = lines[0]
34
  description = " ".join(lines[1:])
@@ -37,6 +44,19 @@ Here is the topic:
37
  description = full_text
38
  return headline, description
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  # -----------------------------------------------------------------------------
41
  # UPDATE RSS
42
  # -----------------------------------------------------------------------------
@@ -75,5 +95,7 @@ def update_rss(subject, audio_url, audio_length, rss_path="rss.xml"):
75
  else:
76
  channel.append(item)
77
 
78
- # Write back to file
 
 
79
  tree.write(rss_path, encoding="utf-8", xml_declaration=True)
 
2
  from datetime import datetime
3
  import os
4
  from huggingface_hub import InferenceClient
5
+ import re
6
 
7
  client = InferenceClient(
8
+ "meta-llama/Llama-3.1-8B-Instruct",
9
  provider="hf-inference",
10
  token=os.getenv("HF_TOKEN"),
11
  )
12
 
13
+ def clean_label(line):
14
+ # Remove common label patterns
15
+ return re.sub(r"^\s*(\*\*?)?(Headline|Description)\:?\*?\*?\s*", "", line, flags=re.IGNORECASE)
16
+
17
  def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
18
  """Ask the LLM for a headline and a short description for the podcast episode."""
19
  prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
20
  1. A catchy, informative headline for a podcast episode about it (max 15 words).
21
  2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.
22
 
23
+ Output ONLY the headline on the first line, and the description on the second line. Do NOT include any labels, markdown, or extra formatting.
24
+
25
  Here is the topic:
26
  {subject[:10000]}
27
  """
 
35
  )
36
  full_text = response.choices[0].message.content.strip()
37
  # Try to split headline and description
38
+ lines = [clean_label(l.strip()) for l in full_text.splitlines() if l.strip()]
39
  if len(lines) >= 2:
40
  headline = lines[0]
41
  description = " ".join(lines[1:])
 
44
  description = full_text
45
  return headline, description
46
 
47
+ def indent(elem, level=0):
48
+ i = "\n" + level * " "
49
+ if len(elem):
50
+ if not elem.text or not elem.text.strip():
51
+ elem.text = i + " "
52
+ for child in elem:
53
+ indent(child, level + 1)
54
+ if not elem.tail or not elem.tail.strip():
55
+ elem.tail = i
56
+ else:
57
+ if level and (not elem.tail or not elem.tail.strip()):
58
+ elem.tail = i
59
+
60
  # -----------------------------------------------------------------------------
61
  # UPDATE RSS
62
  # -----------------------------------------------------------------------------
 
95
  else:
96
  channel.append(item)
97
 
98
+ # Write back to file with pretty formatting
99
+ indent(root)
100
+ ET.register_namespace('itunes', "http://www.itunes.com/dtds/podcast-1.0.dtd")
101
  tree.write(rss_path, encoding="utf-8", xml_declaration=True)