Spaces:

fdaudens
/

podcast-jobs

Running on Zero

App Files Files Community

fdaudens HF Staff commited on May 15

Commit

8279ac9

1 Parent(s): 9f26586

update rss structure

Browse files

Files changed (1) hide show

update_rss.py +25 -3

update_rss.py CHANGED Viewed

@@ -2,19 +2,26 @@ import xml.etree.ElementTree as ET
 from datetime import datetime
 import os
 from huggingface_hub import InferenceClient
 client = InferenceClient(
-    "Qwen/Qwen3-32B",
     provider="hf-inference",
     token=os.getenv("HF_TOKEN"),
 )
 def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
     """Ask the LLM for a headline and a short description for the podcast episode."""
     prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
 1. A catchy, informative headline for a podcast episode about it (max 15 words).
 2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.
 Here is the topic:
 {subject[:10000]}
 """
@@ -28,7 +35,7 @@ Here is the topic:
     )
     full_text = response.choices[0].message.content.strip()
     # Try to split headline and description
-    lines = [l.strip() for l in full_text.splitlines() if l.strip()]
     if len(lines) >= 2:
         headline = lines[0]
         description = " ".join(lines[1:])
@@ -37,6 +44,19 @@ Here is the topic:
         description = full_text
     return headline, description
 # -----------------------------------------------------------------------------
 # UPDATE RSS
 # -----------------------------------------------------------------------------
@@ -75,5 +95,7 @@ def update_rss(subject, audio_url, audio_length, rss_path="rss.xml"):
     else:
         channel.append(item)
-    # Write back to file
     tree.write(rss_path, encoding="utf-8", xml_declaration=True)

 from datetime import datetime
 import os
 from huggingface_hub import InferenceClient
+import re
 client = InferenceClient(
+    "meta-llama/Llama-3.1-8B-Instruct",
     provider="hf-inference",
     token=os.getenv("HF_TOKEN"),
 )
+def clean_label(line):
+    # Remove common label patterns
+    return re.sub(r"^\s*(\*\*?)?(Headline|Description)\:?\*?\*?\s*", "", line, flags=re.IGNORECASE)
 def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
     """Ask the LLM for a headline and a short description for the podcast episode."""
     prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
 1. A catchy, informative headline for a podcast episode about it (max 15 words).
 2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.
+Output ONLY the headline on the first line, and the description on the second line. Do NOT include any labels, markdown, or extra formatting.
 Here is the topic:
 {subject[:10000]}
 """
     )
     full_text = response.choices[0].message.content.strip()
     # Try to split headline and description
+    lines = [clean_label(l.strip()) for l in full_text.splitlines() if l.strip()]
     if len(lines) >= 2:
         headline = lines[0]
         description = " ".join(lines[1:])
         description = full_text
     return headline, description
+def indent(elem, level=0):
+    i = "\n" + level * "  "
+    if len(elem):
+        if not elem.text or not elem.text.strip():
+            elem.text = i + "  "
+        for child in elem:
+            indent(child, level + 1)
+        if not elem.tail or not elem.tail.strip():
+            elem.tail = i
+    else:
+        if level and (not elem.tail or not elem.tail.strip()):
+            elem.tail = i
 # -----------------------------------------------------------------------------
 # UPDATE RSS
 # -----------------------------------------------------------------------------
     else:
         channel.append(item)
+    # Write back to file with pretty formatting
+    indent(root)
+    ET.register_namespace('itunes', "http://www.itunes.com/dtds/podcast-1.0.dtd")
     tree.write(rss_path, encoding="utf-8", xml_declaration=True)