File size: 5,122 Bytes
547fef1
 
 
 
 
7961a34
547fef1
 
a668eca
 
e211da0
72ae2e5
547fef1
 
 
 
 
 
7961a34
b594f58
547fef1
 
 
 
 
 
 
 
 
 
 
 
 
7961a34
547fef1
 
 
 
 
 
 
 
 
 
 
 
 
 
7961a34
547fef1
 
 
7961a34
547fef1
 
 
e325224
55e52b0
 
 
 
547fef1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7961a34
e211da0
 
 
 
 
 
 
 
 
 
 
 
 
7961a34
e211da0
b594f58
7961a34
 
 
 
e211da0
 
 
 
 
 
 
 
 
 
 
 
7961a34
72ae2e5
 
 
e211da0
 
7961a34
e211da0
72ae2e5
 
 
 
 
55e52b0
547fef1
 
 
 
7961a34
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
from papers import PaperManager
from app import generate_podcast_script, kmodel, kpipeline, MALE_VOICE, FEMALE_VOICE
import soundfile as sf
import numpy as np
import argparse
from huggingface_hub import HfApi
import requests
import json
from datetime import datetime
import os
import tempfile
from update_rss import generate_headline_and_description, get_next_episode_number, update_rss

def submit_job(
    inference_provider: str,
    hf_token: str
):
    # Configuration variables
    username = HfApi(token=hf_token).whoami()["name"]
    space_id = "fdaudens/podcast-jobs-rss-test"  # Your Space ID
    flavor = "cpu-basic"  # Machine type

    # Create the API request
    url = f"https://huggingface.co/api/jobs/{username}"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {hf_token}"
    }

    payload = {
        "spaceId": space_id,
        "command": ["python", "run_job.py"],
        "arguments": [
            "--provider", inference_provider
        ],
        "environment": {
            "HF_API_KEY": hf_token
        },
        "flavor": flavor
    }

    # Launch the job
    response = requests.post(url, headers=headers, data=json.dumps(payload))
    return response.text

def main():
    parser = argparse.ArgumentParser(description="Podcast job runner")
    parser.add_argument("--provider", type=str, default="hf-inference")
    parser.add_argument("--name", type=str, default="podcast")
    parser.add_argument("--flavor", type=str, default="t4-medium")
    args = parser.parse_args()

    print(f"Arguments: provider={args.provider}, name={args.name}, flavor={args.flavor}")

    # 1. Get the most popular paper's content
    paper_manager = PaperManager()
    top_papers = paper_manager.get_top_content()
    # Get the first (most popular) paper's id and text
    first_paper = list(top_papers.values())[0]
    subject = first_paper['content']
    paper_id = first_paper['id']

    # 2. Generate the podcast script
    podcast_script = generate_podcast_script(subject)

    # 3. Synthesize the podcast audio
    lines = [l for l in podcast_script.strip().splitlines() if l.strip()]
    sr = 24000
    speed = 1.0
    audio_segments = []

    pipeline = kpipeline
    pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
    pipeline_voice_male = pipeline.load_voice(MALE_VOICE)

    for line in lines:
        if line.startswith("[MIKE]"):
            pipeline_voice = pipeline_voice_male
            voice = MALE_VOICE
            utterance = line[len("[MIKE]"):].strip()
        elif line.startswith("[JANE]"):
            pipeline_voice = pipeline_voice_female
            voice = FEMALE_VOICE
            utterance = line[len("[JANE]"):].strip()
        else:
            pipeline_voice = pipeline_voice_female
            voice = FEMALE_VOICE
            utterance = line

        for _, ps, _ in pipeline(utterance, voice, speed):
            ref_s = pipeline_voice[len(ps) - 1]
            audio_numpy = kmodel(ps, ref_s, speed).numpy()
            audio_segments.append(audio_numpy)

    # Concatenate all audio segments
    if audio_segments:
        full_audio = np.concatenate(audio_segments)
        
        # Create a temporary file
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
            temp_path = temp_file.name
            sf.write(temp_path, full_audio, sr)
        
        # Get API token from environment
        hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HF_API_KEY")
        if hf_token is None:
            print("No Hugging Face token found in environment. Cannot upload to Space.")
            return
            
        # Initialize the Hugging Face API
        api = HfApi(token=hf_token)
        
        # Set up Space path info
        space_id = "fdaudens/podcast-jobs-rss-test"  # Your Space ID
        today = datetime.now().strftime("%Y-%m-%d")
        base_name = args.name
        podcast_filename = f"{base_name}-{today}.wav"
        
        # Path in the Space repository
        space_path = f"podcasts/{podcast_filename}"
        
        # Upload directly to the Space (crucial: repo_type="space")
        print(f"Uploading podcast to Space {space_id} at path {space_path}...")
        api.upload_file(
            path_or_fileobj=temp_path,
            path_in_repo=space_path,
            repo_id=space_id,
            repo_type="space", 
            token=hf_token
        )
        
        audio_url = f"https://huggingface.co/spaces/{space_id}/blob/main/{space_path}"
        audio_length = os.path.getsize(temp_path)
        
        # Clean up temporary file
        os.unlink(temp_path)
        
        print(f"Podcast audio uploaded to Space at {space_path}")
        print(f"Access URL: {audio_url}")

        # After uploading the podcast audio
        # headline, description = generate_headline_and_description(subject)
        # episode_number = get_next_episode_number()
        update_rss(subject, audio_url, audio_length, paper_id=paper_id)
    else:
        print("No audio generated.")

if __name__ == "__main__":
    main()