Spaces:

fdaudens
/

podcast-jobs-rss-test

Running on Zero

File size: 5,122 Bytes

from papers import PaperManager
from app import generate_podcast_script, kmodel, kpipeline, MALE_VOICE, FEMALE_VOICE
import soundfile as sf
import numpy as np
import argparse
from huggingface_hub import HfApi
import requests
import json
from datetime import datetime
import os
import tempfile
from update_rss import generate_headline_and_description, get_next_episode_number, update_rss

def submit_job(
    inference_provider: str,
    hf_token: str
):
    # Configuration variables
    username = HfApi(token=hf_token).whoami()["name"]
    space_id = "fdaudens/podcast-jobs-rss-test"  # Your Space ID
    flavor = "cpu-basic"  # Machine type

    # Create the API request
    url = f"https://huggingface.co/api/jobs/{username}"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {hf_token}"
    }

    payload = {
        "spaceId": space_id,
        "command": ["python", "run_job.py"],
        "arguments": [
            "--provider", inference_provider
        ],
        "environment": {
            "HF_API_KEY": hf_token
        },
        "flavor": flavor
    }

    # Launch the job
    response = requests.post(url, headers=headers, data=json.dumps(payload))
    return response.text

def main():
    parser = argparse.ArgumentParser(description="Podcast job runner")
    parser.add_argument("--provider", type=str, default="hf-inference")
    parser.add_argument("--name", type=str, default="podcast")
    parser.add_argument("--flavor", type=str, default="t4-medium")
    args = parser.parse_args()

    print(f"Arguments: provider={args.provider}, name={args.name}, flavor={args.flavor}")

    # 1. Get the most popular paper's content
    paper_manager = PaperManager()
    top_papers = paper_manager.get_top_content()
    # Get the first (most popular) paper's id and text
    first_paper = list(top_papers.values())[0]
    subject = first_paper['content']
    paper_id = first_paper['id']

    # 2. Generate the podcast script
    podcast_script = generate_podcast_script(subject)

    # 3. Synthesize the podcast audio
    lines = [l for l in podcast_script.strip().splitlines() if l.strip()]
    sr = 24000
    speed = 1.0
    audio_segments = []

    pipeline = kpipeline
    pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
    pipeline_voice_male = pipeline.load_voice(MALE_VOICE)

    for line in lines:
        if line.startswith("[MIKE]"):
            pipeline_voice = pipeline_voice_male
            voice = MALE_VOICE
            utterance = line[len("[MIKE]"):].strip()
        elif line.startswith("[JANE]"):
            pipeline_voice = pipeline_voice_female
            voice = FEMALE_VOICE
            utterance = line[len("[JANE]"):].strip()
        else:
            pipeline_voice = pipeline_voice_female
            voice = FEMALE_VOICE
            utterance = line

        for _, ps, _ in pipeline(utterance, voice, speed):
            ref_s = pipeline_voice[len(ps) - 1]
            audio_numpy = kmodel(ps, ref_s, speed).numpy()
            audio_segments.append(audio_numpy)

    # Concatenate all audio segments
    if audio_segments:
        full_audio = np.concatenate(audio_segments)
        
        # Create a temporary file
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
            temp_path = temp_file.name
            sf.write(temp_path, full_audio, sr)
        
        # Get API token from environment
        hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HF_API_KEY")
        if hf_token is None:
            print("No Hugging Face token found in environment. Cannot upload to Space.")
            return
            
        # Initialize the Hugging Face API
        api = HfApi(token=hf_token)
        
        # Set up Space path info
        space_id = "fdaudens/podcast-jobs-rss-test"  # Your Space ID
        today = datetime.now().strftime("%Y-%m-%d")
        base_name = args.name
        podcast_filename = f"{base_name}-{today}.wav"
        
        # Path in the Space repository
        space_path = f"podcasts/{podcast_filename}"
        
        # Upload directly to the Space (crucial: repo_type="space")
        print(f"Uploading podcast to Space {space_id} at path {space_path}...")
        api.upload_file(
            path_or_fileobj=temp_path,
            path_in_repo=space_path,
            repo_id=space_id,
            repo_type="space", 
            token=hf_token
        )
        
        audio_url = f"https://huggingface.co/spaces/{space_id}/blob/main/{space_path}"
        audio_length = os.path.getsize(temp_path)
        
        # Clean up temporary file
        os.unlink(temp_path)
        
        print(f"Podcast audio uploaded to Space at {space_path}")
        print(f"Access URL: {audio_url}")

        # After uploading the podcast audio
        # headline, description = generate_headline_and_description(subject)
        # episode_number = get_next_episode_number()
        update_rss(subject, audio_url, audio_length, paper_id=paper_id)
    else:
        print("No audio generated.")

if __name__ == "__main__":
    main()