Spaces:

fdaudens
/

podcast-jobs

Running on Zero

fdaudens HF Staff commited on about 8 hours ago

Commit

86f9ba1

1 Parent(s): d04a952

sanitize scripts

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ import requests
 from pathlib import Path
 from pydub import AudioSegment  # Add this import
 import tempfile
 import torch
 from huggingface_hub import InferenceClient
@@ -49,12 +50,16 @@ client = InferenceClient(
     token=os.getenv("HF_TOKEN"),
 )
 def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
     """Ask the LLM for a script of a podcast given by two hosts."""
     messages = [
         {"role": "system", "content": SYSTEM_PROMPT},
-        {"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.
-{subject[:10000]}"""},
     ]
     if steering_question and len(steering_question) > 0:
         messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
@@ -67,6 +72,7 @@ def generate_podcast_script(subject: str, steering_question: str | None = None)
     assert "[JANE]" in full_text
     dialogue_start_index = full_text.find("[JANE]")
     podcast_text = full_text[dialogue_start_index:]
     return podcast_text
 def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:

 from pathlib import Path
 from pydub import AudioSegment  # Add this import
 import tempfile
+import re
 import torch
 from huggingface_hub import InferenceClient
     token=os.getenv("HF_TOKEN"),
 )
+def sanitize_script(script: str) -> str:
+    """Remove special characters like '*' from the script."""
+    # Remove asterisk and other special formatting characters (add more as needed)
+    return re.sub(r'[\*\_\~\`]', '', script)
 def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
     """Ask the LLM for a script of a podcast given by two hosts."""
     messages = [
         {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.\n{subject[:10000]}"""},
     ]
     if steering_question and len(steering_question) > 0:
         messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
     assert "[JANE]" in full_text
     dialogue_start_index = full_text.find("[JANE]")
     podcast_text = full_text[dialogue_start_index:]
+    podcast_text = sanitize_script(podcast_text)
     return podcast_text
 def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]: