Spaces:
Runtime error
Runtime error
from papers import PaperManager | |
from app import generate_podcast_script, kmodel, kpipeline, MALE_VOICE, FEMALE_VOICE | |
import soundfile as sf | |
import numpy as np | |
import argparse | |
from huggingface_hub import HfApi | |
import requests | |
import json | |
from datetime import datetime | |
import os | |
import tempfile | |
from update_rss import generate_headline_and_description, get_next_episode_number, update_rss | |
def submit_job( | |
inference_provider: str, | |
hf_token: str | |
): | |
# Configuration variables | |
username = HfApi(token=hf_token).whoami()["name"] | |
space_id = "fdaudens/podcast-jobs" # Your Space ID | |
flavor = "cpu-basic" # Machine type | |
# Create the API request | |
url = f"https://huggingface.co/api/jobs/{username}" | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {hf_token}" | |
} | |
payload = { | |
"spaceId": space_id, | |
"command": ["python", "run_job.py"], | |
"arguments": [ | |
"--provider", inference_provider | |
], | |
"environment": { | |
"HF_API_KEY": hf_token | |
}, | |
"flavor": flavor | |
} | |
# Launch the job | |
response = requests.post(url, headers=headers, data=json.dumps(payload)) | |
return response.text | |
def main(): | |
parser = argparse.ArgumentParser(description="Podcast job runner") | |
parser.add_argument("--provider", type=str, default="hf-inference") | |
parser.add_argument("--name", type=str, default="podcast") | |
parser.add_argument("--flavor", type=str, default="t4-medium") | |
args = parser.parse_args() | |
print(f"Arguments: provider={args.provider}, name={args.name}, flavor={args.flavor}") | |
# 1. Get the most popular paper's content | |
paper_manager = PaperManager() | |
top_papers = paper_manager.get_top_content() | |
# Get the first (most popular) paper's text | |
subject = list(top_papers.values())[0] | |
# 2. Generate the podcast script | |
podcast_script = generate_podcast_script(subject) | |
# 3. Synthesize the podcast audio | |
lines = [l for l in podcast_script.strip().splitlines() if l.strip()] | |
sr = 24000 | |
speed = 1.0 | |
audio_segments = [] | |
pipeline = kpipeline | |
pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE) | |
pipeline_voice_male = pipeline.load_voice(MALE_VOICE) | |
for line in lines: | |
if line.startswith("[MIKE]"): | |
pipeline_voice = pipeline_voice_male | |
voice = MALE_VOICE | |
utterance = line[len("[MIKE]"):].strip() | |
elif line.startswith("[JANE]"): | |
pipeline_voice = pipeline_voice_female | |
voice = FEMALE_VOICE | |
utterance = line[len("[JANE]"):].strip() | |
else: | |
pipeline_voice = pipeline_voice_female | |
voice = FEMALE_VOICE | |
utterance = line | |
for _, ps, _ in pipeline(utterance, voice, speed): | |
ref_s = pipeline_voice[len(ps) - 1] | |
audio_numpy = kmodel(ps, ref_s, speed).numpy() | |
audio_segments.append(audio_numpy) | |
# Concatenate all audio segments | |
if audio_segments: | |
full_audio = np.concatenate(audio_segments) | |
# Create a temporary file | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: | |
temp_path = temp_file.name | |
sf.write(temp_path, full_audio, sr) | |
# Get API token from environment | |
hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HF_API_KEY") | |
if hf_token is None: | |
print("No Hugging Face token found in environment. Cannot upload to Space.") | |
return | |
# Initialize the Hugging Face API | |
api = HfApi(token=hf_token) | |
# Set up Space path info | |
space_id = "fdaudens/podcast-jobs" # Your Space ID | |
today = datetime.now().strftime("%Y-%m-%d") | |
base_name = args.name | |
podcast_filename = f"{base_name}-{today}.wav" | |
# Path in the Space repository | |
space_path = f"podcasts/{podcast_filename}" | |
# Upload directly to the Space (crucial: repo_type="space") | |
print(f"Uploading podcast to Space {space_id} at path {space_path}...") | |
api.upload_file( | |
path_or_fileobj=temp_path, | |
path_in_repo=space_path, | |
repo_id=space_id, | |
repo_type="space", | |
token=hf_token | |
) | |
audio_url = f"https://huggingface.co/spaces/{space_id}/blob/main/{space_path}" | |
audio_length = os.path.getsize(temp_path) | |
# Clean up temporary file | |
os.unlink(temp_path) | |
print(f"Podcast audio uploaded to Space at {space_path}") | |
print(f"Access URL: {audio_url}") | |
# After uploading the podcast audio | |
# headline, description = generate_headline_and_description(subject) | |
# episode_number = get_next_episode_number() | |
update_rss(subject, audio_url, audio_length) | |
else: | |
print("No audio generated.") | |
if __name__ == "__main__": | |
main() |