fdaudens HF Staff commited on
Commit
86f9ba1
·
1 Parent(s): d04a952

sanitize scripts

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -12,6 +12,7 @@ import requests
12
  from pathlib import Path
13
  from pydub import AudioSegment # Add this import
14
  import tempfile
 
15
 
16
  import torch
17
  from huggingface_hub import InferenceClient
@@ -49,12 +50,16 @@ client = InferenceClient(
49
  token=os.getenv("HF_TOKEN"),
50
  )
51
 
 
 
 
 
 
52
  def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
53
  """Ask the LLM for a script of a podcast given by two hosts."""
54
  messages = [
55
  {"role": "system", "content": SYSTEM_PROMPT},
56
- {"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.
57
- {subject[:10000]}"""},
58
  ]
59
  if steering_question and len(steering_question) > 0:
60
  messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
@@ -67,6 +72,7 @@ def generate_podcast_script(subject: str, steering_question: str | None = None)
67
  assert "[JANE]" in full_text
68
  dialogue_start_index = full_text.find("[JANE]")
69
  podcast_text = full_text[dialogue_start_index:]
 
70
  return podcast_text
71
 
72
  def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
 
12
  from pathlib import Path
13
  from pydub import AudioSegment # Add this import
14
  import tempfile
15
+ import re
16
 
17
  import torch
18
  from huggingface_hub import InferenceClient
 
50
  token=os.getenv("HF_TOKEN"),
51
  )
52
 
53
+ def sanitize_script(script: str) -> str:
54
+ """Remove special characters like '*' from the script."""
55
+ # Remove asterisk and other special formatting characters (add more as needed)
56
+ return re.sub(r'[\*\_\~\`]', '', script)
57
+
58
  def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
59
  """Ask the LLM for a script of a podcast given by two hosts."""
60
  messages = [
61
  {"role": "system", "content": SYSTEM_PROMPT},
62
+ {"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.\n{subject[:10000]}"""},
 
63
  ]
64
  if steering_question and len(steering_question) > 0:
65
  messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
 
72
  assert "[JANE]" in full_text
73
  dialogue_start_index = full_text.find("[JANE]")
74
  podcast_text = full_text[dialogue_start_index:]
75
+ podcast_text = sanitize_script(podcast_text)
76
  return podcast_text
77
 
78
  def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]: