podcast-jobs / run_job.py
fdaudens's picture
fdaudens HF Staff
add functions to app.py
72ae2e5
from papers import PaperManager
from app import generate_podcast_script, kmodel, kpipeline, MALE_VOICE, FEMALE_VOICE
import soundfile as sf
import numpy as np
import argparse
from huggingface_hub import HfApi
import requests
import json
from datetime import datetime
import os
import tempfile
from update_rss import generate_headline_and_description, get_next_episode_number, update_rss
def submit_job(
inference_provider: str,
hf_token: str
):
# Configuration variables
username = HfApi(token=hf_token).whoami()["name"]
space_id = "fdaudens/podcast-jobs" # Your Space ID
flavor = "cpu-basic" # Machine type
# Create the API request
url = f"https://huggingface.co/api/jobs/{username}"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {hf_token}"
}
payload = {
"spaceId": space_id,
"command": ["python", "run_job.py"],
"arguments": [
"--provider", inference_provider
],
"environment": {
"HF_API_KEY": hf_token
},
"flavor": flavor
}
# Launch the job
response = requests.post(url, headers=headers, data=json.dumps(payload))
return response.text
def main():
parser = argparse.ArgumentParser(description="Podcast job runner")
parser.add_argument("--provider", type=str, default="hf-inference")
parser.add_argument("--name", type=str, default="podcast")
parser.add_argument("--flavor", type=str, default="t4-medium")
args = parser.parse_args()
print(f"Arguments: provider={args.provider}, name={args.name}, flavor={args.flavor}")
# 1. Get the most popular paper's content
paper_manager = PaperManager()
top_papers = paper_manager.get_top_content()
# Get the first (most popular) paper's text
subject = list(top_papers.values())[0]
# 2. Generate the podcast script
podcast_script = generate_podcast_script(subject)
# 3. Synthesize the podcast audio
lines = [l for l in podcast_script.strip().splitlines() if l.strip()]
sr = 24000
speed = 1.0
audio_segments = []
pipeline = kpipeline
pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
for line in lines:
if line.startswith("[MIKE]"):
pipeline_voice = pipeline_voice_male
voice = MALE_VOICE
utterance = line[len("[MIKE]"):].strip()
elif line.startswith("[JANE]"):
pipeline_voice = pipeline_voice_female
voice = FEMALE_VOICE
utterance = line[len("[JANE]"):].strip()
else:
pipeline_voice = pipeline_voice_female
voice = FEMALE_VOICE
utterance = line
for _, ps, _ in pipeline(utterance, voice, speed):
ref_s = pipeline_voice[len(ps) - 1]
audio_numpy = kmodel(ps, ref_s, speed).numpy()
audio_segments.append(audio_numpy)
# Concatenate all audio segments
if audio_segments:
full_audio = np.concatenate(audio_segments)
# Create a temporary file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
temp_path = temp_file.name
sf.write(temp_path, full_audio, sr)
# Get API token from environment
hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HF_API_KEY")
if hf_token is None:
print("No Hugging Face token found in environment. Cannot upload to Space.")
return
# Initialize the Hugging Face API
api = HfApi(token=hf_token)
# Set up Space path info
space_id = "fdaudens/podcast-jobs" # Your Space ID
today = datetime.now().strftime("%Y-%m-%d")
base_name = args.name
podcast_filename = f"{base_name}-{today}.wav"
# Path in the Space repository
space_path = f"podcasts/{podcast_filename}"
# Upload directly to the Space (crucial: repo_type="space")
print(f"Uploading podcast to Space {space_id} at path {space_path}...")
api.upload_file(
path_or_fileobj=temp_path,
path_in_repo=space_path,
repo_id=space_id,
repo_type="space",
token=hf_token
)
audio_url = f"https://huggingface.co/spaces/{space_id}/blob/main/{space_path}"
audio_length = os.path.getsize(temp_path)
# Clean up temporary file
os.unlink(temp_path)
print(f"Podcast audio uploaded to Space at {space_path}")
print(f"Access URL: {audio_url}")
# After uploading the podcast audio
# headline, description = generate_headline_and_description(subject)
# episode_number = get_next_episode_number()
update_rss(subject, audio_url, audio_length)
else:
print("No audio generated.")
if __name__ == "__main__":
main()