Spaces:
Running
on
Zero
Running
on
Zero
sanitize scripts
Browse files
app.py
CHANGED
@@ -12,6 +12,7 @@ import requests
|
|
12 |
from pathlib import Path
|
13 |
from pydub import AudioSegment # Add this import
|
14 |
import tempfile
|
|
|
15 |
|
16 |
import torch
|
17 |
from huggingface_hub import InferenceClient
|
@@ -49,12 +50,16 @@ client = InferenceClient(
|
|
49 |
token=os.getenv("HF_TOKEN"),
|
50 |
)
|
51 |
|
|
|
|
|
|
|
|
|
|
|
52 |
def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
|
53 |
"""Ask the LLM for a script of a podcast given by two hosts."""
|
54 |
messages = [
|
55 |
{"role": "system", "content": SYSTEM_PROMPT},
|
56 |
-
{"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights
|
57 |
-
{subject[:10000]}"""},
|
58 |
]
|
59 |
if steering_question and len(steering_question) > 0:
|
60 |
messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
|
@@ -67,6 +72,7 @@ def generate_podcast_script(subject: str, steering_question: str | None = None)
|
|
67 |
assert "[JANE]" in full_text
|
68 |
dialogue_start_index = full_text.find("[JANE]")
|
69 |
podcast_text = full_text[dialogue_start_index:]
|
|
|
70 |
return podcast_text
|
71 |
|
72 |
def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
|
|
|
12 |
from pathlib import Path
|
13 |
from pydub import AudioSegment # Add this import
|
14 |
import tempfile
|
15 |
+
import re
|
16 |
|
17 |
import torch
|
18 |
from huggingface_hub import InferenceClient
|
|
|
50 |
token=os.getenv("HF_TOKEN"),
|
51 |
)
|
52 |
|
53 |
+
def sanitize_script(script: str) -> str:
|
54 |
+
"""Remove special characters like '*' from the script."""
|
55 |
+
# Remove asterisk and other special formatting characters (add more as needed)
|
56 |
+
return re.sub(r'[\*\_\~\`]', '', script)
|
57 |
+
|
58 |
def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
|
59 |
"""Ask the LLM for a script of a podcast given by two hosts."""
|
60 |
messages = [
|
61 |
{"role": "system", "content": SYSTEM_PROMPT},
|
62 |
+
{"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.\n{subject[:10000]}"""},
|
|
|
63 |
]
|
64 |
if steering_question and len(steering_question) > 0:
|
65 |
messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
|
|
|
72 |
assert "[JANE]" in full_text
|
73 |
dialogue_start_index = full_text.find("[JANE]")
|
74 |
podcast_text = full_text[dialogue_start_index:]
|
75 |
+
podcast_text = sanitize_script(podcast_text)
|
76 |
return podcast_text
|
77 |
|
78 |
def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
|