Spaces:

fdaudens
/

podcast-jobs

Runtime error

App Files Files Community

podcast-jobs / run_job.py

fdaudens HF Staff

add functions to app.py

72ae2e5 9 minutes ago

raw

history blame contribute delete

5 kB

	from papers import PaperManager
	from app import generate_podcast_script, kmodel, kpipeline, MALE_VOICE, FEMALE_VOICE
	import soundfile as sf
	import numpy as np
	import argparse
	from huggingface_hub import HfApi
	import requests
	import json
	from datetime import datetime
	import os
	import tempfile
	from update_rss import generate_headline_and_description, get_next_episode_number, update_rss

	def submit_job(
	inference_provider: str,
	hf_token: str
	):
	# Configuration variables
	username = HfApi(token=hf_token).whoami()["name"]
	space_id = "fdaudens/podcast-jobs" # Your Space ID
	flavor = "cpu-basic" # Machine type

	# Create the API request
	url = f"https://huggingface.co/api/jobs/{username}"
	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {hf_token}"
	}

	payload = {
	"spaceId": space_id,
	"command": ["python", "run_job.py"],
	"arguments": [
	"--provider", inference_provider
	],
	"environment": {
	"HF_API_KEY": hf_token
	},
	"flavor": flavor
	}

	# Launch the job
	response = requests.post(url, headers=headers, data=json.dumps(payload))
	return response.text

	def main():
	parser = argparse.ArgumentParser(description="Podcast job runner")
	parser.add_argument("--provider", type=str, default="hf-inference")
	parser.add_argument("--name", type=str, default="podcast")
	parser.add_argument("--flavor", type=str, default="t4-medium")
	args = parser.parse_args()

	print(f"Arguments: provider={args.provider}, name={args.name}, flavor={args.flavor}")

	# 1. Get the most popular paper's content
	paper_manager = PaperManager()
	top_papers = paper_manager.get_top_content()
	# Get the first (most popular) paper's text
	subject = list(top_papers.values())[0]

	# 2. Generate the podcast script
	podcast_script = generate_podcast_script(subject)

	# 3. Synthesize the podcast audio
	lines = [l for l in podcast_script.strip().splitlines() if l.strip()]
	sr = 24000
	speed = 1.0
	audio_segments = []

	pipeline = kpipeline
	pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
	pipeline_voice_male = pipeline.load_voice(MALE_VOICE)

	for line in lines:
	if line.startswith("[MIKE]"):
	pipeline_voice = pipeline_voice_male
	voice = MALE_VOICE
	utterance = line[len("[MIKE]"):].strip()
	elif line.startswith("[JANE]"):
	pipeline_voice = pipeline_voice_female
	voice = FEMALE_VOICE
	utterance = line[len("[JANE]"):].strip()
	else:
	pipeline_voice = pipeline_voice_female
	voice = FEMALE_VOICE
	utterance = line

	for _, ps, _ in pipeline(utterance, voice, speed):
	ref_s = pipeline_voice[len(ps) - 1]
	audio_numpy = kmodel(ps, ref_s, speed).numpy()
	audio_segments.append(audio_numpy)

	# Concatenate all audio segments
	if audio_segments:
	full_audio = np.concatenate(audio_segments)

	# Create a temporary file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
	temp_path = temp_file.name
	sf.write(temp_path, full_audio, sr)

	# Get API token from environment
	hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HF_API_KEY")
	if hf_token is None:
	print("No Hugging Face token found in environment. Cannot upload to Space.")
	return

	# Initialize the Hugging Face API
	api = HfApi(token=hf_token)

	# Set up Space path info
	space_id = "fdaudens/podcast-jobs" # Your Space ID
	today = datetime.now().strftime("%Y-%m-%d")
	base_name = args.name
	podcast_filename = f"{base_name}-{today}.wav"

	# Path in the Space repository
	space_path = f"podcasts/{podcast_filename}"

	# Upload directly to the Space (crucial: repo_type="space")
	print(f"Uploading podcast to Space {space_id} at path {space_path}...")
	api.upload_file(
	path_or_fileobj=temp_path,
	path_in_repo=space_path,
	repo_id=space_id,
	repo_type="space",
	token=hf_token
	)

	audio_url = f"https://huggingface.co/spaces/{space_id}/blob/main/{space_path}"
	audio_length = os.path.getsize(temp_path)

	# Clean up temporary file
	os.unlink(temp_path)

	print(f"Podcast audio uploaded to Space at {space_path}")
	print(f"Access URL: {audio_url}")

	# After uploading the podcast audio
	# headline, description = generate_headline_and_description(subject)
	# episode_number = get_next_episode_number()
	update_rss(subject, audio_url, audio_length)
	else:
	print("No audio generated.")

	if __name__ == "__main__":
	main()