Spaces:

fdaudens
/

podcast-jobs

Running on Zero

App Files Files Community

fdaudens HF Staff commited on May 13

Commit

7961a34

1 Parent(s): ffc5299

repo id...

Browse files

Files changed (2) hide show

.DS_Store +0 -0
run_job.py +42 -47

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

run_job.py CHANGED Viewed

@@ -3,29 +3,34 @@ from app import generate_podcast_script, kmodel, kpipeline, MALE_VOICE, FEMALE_V
 import soundfile as sf
 import numpy as np
 import argparse
-from huggingface_hub import HfApi, HfFolder
 import requests
 import json
 from datetime import datetime
 import os
-topics = [folder for folder in os.listdir("podcasts") if os.path.isdir(os.path.join("podcasts", folder))]
-podcasts = {}
-for topic in topics:
-    topic_path = os.path.join("podcasts", topic)
-    podcasts[topic] = sorted([f.replace(".wav", "") for f in os.listdir(topic_path) if f.endswith(".wav")], reverse=True)
 def submit_job(
-    repo_id: str,
     inference_provider: str,
     hf_token: str
 ):
     # Configuration variables
-    username = HfApi(token=hf_token).whoami()["name"]  # Your HuggingFace username
-    space_id = "fdaudens/podcast-jobs"  # Your space ID
-    # If you want to always use the username-based repo_id, remove repo_id from parameters
-    repo_id = f"{username}/podcast-jobs"
     flavor = "cpu-basic"  # Machine type
     # Create the API request
@@ -39,8 +44,7 @@ def submit_job(
         "spaceId": space_id,
         "command": ["python", "run_job.py"],
         "arguments": [
-            "--provider", inference_provider,
-            "--repo-id", repo_id
         ],
         "environment": {
             "HF_API_KEY": hf_token
@@ -55,11 +59,11 @@ def submit_job(
 def main():
     parser = argparse.ArgumentParser(description="Podcast job runner")
     parser.add_argument("--provider", type=str, default="hf-inference")
-    parser.add_argument("--repo-id", type=str, default="fdaudens/podcast-jobs")
     parser.add_argument("--flavor", type=str, default="t4-medium")
     args = parser.parse_args()
-    print(f"Arguments: provider={args.provider}, repo_id={args.repo_id}, flavor={args.flavor}")
     # 1. Get the most popular paper's content
     paper_manager = PaperManager()
@@ -102,39 +106,30 @@ def main():
     # Concatenate all audio segments
     if audio_segments:
         full_audio = np.concatenate(audio_segments)
-        # 4. Save as WAV file
         sf.write("podcast.wav", full_audio, sr)
-        print("Podcast audio saved as podcast.wav")
-        # --- Upload to Hugging Face repo ---
-        hf_token = os.environ.get("HF_TOKEN")
-        if hf_token is None:
-            print("No Hugging Face token found in environment. Skipping upload.")
-        else:
-            api = HfApi(token=hf_token)
-            username = api.whoami()["name"]
-            repo_id = f"{username}/podcast-jobs"  # Correct repo id
-            # Create the repo if it doesn't exist
-            try:
-                api.create_repo(repo_id, repo_type="model", private=False, exist_ok=True)
-            except Exception as e:
-                print(f"Warning: Could not create repo (it may already exist): {e}")
-            # Create a folder by date inside the podcasts subfolder
-            today = datetime.now().strftime("%Y-%m-%d")
-            remote_path = f"podcasts/podcast-{today}.wav"  # subfolder in repo
-            print(f"Uploading podcast.wav to {repo_id} at {remote_path} ...")
-            api.upload_file(
-                path_or_fileobj="podcast.wav",
-                path_in_repo=remote_path,
-                repo_id=repo_id,
-                token=hf_token
-            )
-            print(f"Uploaded podcast.wav to {repo_id}/{remote_path}")
     else:
         print("No audio generated.")
 if __name__ == "__main__":
-    main()

 import soundfile as sf
 import numpy as np
 import argparse
+from huggingface_hub import HfApi
 import requests
 import json
 from datetime import datetime
 import os
+# Scan for existing podcasts in the space
+def scan_podcasts():
+    podcast_dir = "podcasts"
+    if not os.path.exists(podcast_dir):
+        os.makedirs(podcast_dir, exist_ok=True)
+        return []
+    # Get all WAV files in the podcasts directory
+    podcasts = sorted([f.replace(".wav", "")
+                      for f in os.listdir(podcast_dir)
+                      if f.endswith(".wav")], reverse=True)
+    return podcasts
 def submit_job(
     inference_provider: str,
     hf_token: str
 ):
     # Configuration variables
+    username = HfApi(token=hf_token).whoami()["name"]
+    space_id = "fdaudens/podcast-jobs"  # Your Space ID
     flavor = "cpu-basic"  # Machine type
     # Create the API request
         "spaceId": space_id,
         "command": ["python", "run_job.py"],
         "arguments": [
+            "--provider", inference_provider
         ],
         "environment": {
             "HF_API_KEY": hf_token
 def main():
     parser = argparse.ArgumentParser(description="Podcast job runner")
     parser.add_argument("--provider", type=str, default="hf-inference")
+    parser.add_argument("--name", type=str, default="podcast")
     parser.add_argument("--flavor", type=str, default="t4-medium")
     args = parser.parse_args()
+    print(f"Arguments: provider={args.provider}, name={args.name}, flavor={args.flavor}")
     # 1. Get the most popular paper's content
     paper_manager = PaperManager()
     # Concatenate all audio segments
     if audio_segments:
         full_audio = np.concatenate(audio_segments)
+        # 4. Save as WAV file in the Space's file system
+        # Create podcasts directory if it doesn't exist
+        podcast_dir = "podcasts"
+        os.makedirs(podcast_dir, exist_ok=True)
+        # Generate filename with base name and date
+        today = datetime.now().strftime("%Y-%m-%d")
+        base_name = args.name
+        podcast_filename = f"{base_name}-{today}.wav"
+        podcast_path = os.path.join(podcast_dir, podcast_filename)
+        # Save the file
+        sf.write(podcast_path, full_audio, sr)
+        print(f"Podcast audio saved to {podcast_path}")
+        # Provide the access URL
+        print(f"Access URL: https://huggingface.co/spaces/fdaudens/podcast-jobs/blob/main/{podcast_path}")
+        # Also save a temporary local copy for debugging
         sf.write("podcast.wav", full_audio, sr)
+        print("Temporary copy saved as podcast.wav")
     else:
         print("No audio generated.")
 if __name__ == "__main__":
+    main()