Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,122 Bytes
547fef1 7961a34 547fef1 a668eca e211da0 72ae2e5 547fef1 7961a34 b594f58 547fef1 7961a34 547fef1 7961a34 547fef1 7961a34 547fef1 e325224 55e52b0 547fef1 7961a34 e211da0 7961a34 e211da0 b594f58 7961a34 e211da0 7961a34 72ae2e5 e211da0 7961a34 e211da0 72ae2e5 55e52b0 547fef1 7961a34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
from papers import PaperManager
from app import generate_podcast_script, kmodel, kpipeline, MALE_VOICE, FEMALE_VOICE
import soundfile as sf
import numpy as np
import argparse
from huggingface_hub import HfApi
import requests
import json
from datetime import datetime
import os
import tempfile
from update_rss import generate_headline_and_description, get_next_episode_number, update_rss
def submit_job(
inference_provider: str,
hf_token: str
):
# Configuration variables
username = HfApi(token=hf_token).whoami()["name"]
space_id = "fdaudens/podcast-jobs-rss-test" # Your Space ID
flavor = "cpu-basic" # Machine type
# Create the API request
url = f"https://huggingface.co/api/jobs/{username}"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {hf_token}"
}
payload = {
"spaceId": space_id,
"command": ["python", "run_job.py"],
"arguments": [
"--provider", inference_provider
],
"environment": {
"HF_API_KEY": hf_token
},
"flavor": flavor
}
# Launch the job
response = requests.post(url, headers=headers, data=json.dumps(payload))
return response.text
def main():
parser = argparse.ArgumentParser(description="Podcast job runner")
parser.add_argument("--provider", type=str, default="hf-inference")
parser.add_argument("--name", type=str, default="podcast")
parser.add_argument("--flavor", type=str, default="t4-medium")
args = parser.parse_args()
print(f"Arguments: provider={args.provider}, name={args.name}, flavor={args.flavor}")
# 1. Get the most popular paper's content
paper_manager = PaperManager()
top_papers = paper_manager.get_top_content()
# Get the first (most popular) paper's id and text
first_paper = list(top_papers.values())[0]
subject = first_paper['content']
paper_id = first_paper['id']
# 2. Generate the podcast script
podcast_script = generate_podcast_script(subject)
# 3. Synthesize the podcast audio
lines = [l for l in podcast_script.strip().splitlines() if l.strip()]
sr = 24000
speed = 1.0
audio_segments = []
pipeline = kpipeline
pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
for line in lines:
if line.startswith("[MIKE]"):
pipeline_voice = pipeline_voice_male
voice = MALE_VOICE
utterance = line[len("[MIKE]"):].strip()
elif line.startswith("[JANE]"):
pipeline_voice = pipeline_voice_female
voice = FEMALE_VOICE
utterance = line[len("[JANE]"):].strip()
else:
pipeline_voice = pipeline_voice_female
voice = FEMALE_VOICE
utterance = line
for _, ps, _ in pipeline(utterance, voice, speed):
ref_s = pipeline_voice[len(ps) - 1]
audio_numpy = kmodel(ps, ref_s, speed).numpy()
audio_segments.append(audio_numpy)
# Concatenate all audio segments
if audio_segments:
full_audio = np.concatenate(audio_segments)
# Create a temporary file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
temp_path = temp_file.name
sf.write(temp_path, full_audio, sr)
# Get API token from environment
hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HF_API_KEY")
if hf_token is None:
print("No Hugging Face token found in environment. Cannot upload to Space.")
return
# Initialize the Hugging Face API
api = HfApi(token=hf_token)
# Set up Space path info
space_id = "fdaudens/podcast-jobs-rss-test" # Your Space ID
today = datetime.now().strftime("%Y-%m-%d")
base_name = args.name
podcast_filename = f"{base_name}-{today}.wav"
# Path in the Space repository
space_path = f"podcasts/{podcast_filename}"
# Upload directly to the Space (crucial: repo_type="space")
print(f"Uploading podcast to Space {space_id} at path {space_path}...")
api.upload_file(
path_or_fileobj=temp_path,
path_in_repo=space_path,
repo_id=space_id,
repo_type="space",
token=hf_token
)
audio_url = f"https://huggingface.co/spaces/{space_id}/blob/main/{space_path}"
audio_length = os.path.getsize(temp_path)
# Clean up temporary file
os.unlink(temp_path)
print(f"Podcast audio uploaded to Space at {space_path}")
print(f"Access URL: {audio_url}")
# After uploading the podcast audio
# headline, description = generate_headline_and_description(subject)
# episode_number = get_next_episode_number()
update_rss(subject, audio_url, audio_length, paper_id=paper_id)
else:
print("No audio generated.")
if __name__ == "__main__":
main() |