fdaudens HF Staff commited on
Commit
7961a34
·
1 Parent(s): ffc5299

repo id...

Browse files
Files changed (2) hide show
  1. .DS_Store +0 -0
  2. run_job.py +42 -47
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
run_job.py CHANGED
@@ -3,29 +3,34 @@ from app import generate_podcast_script, kmodel, kpipeline, MALE_VOICE, FEMALE_V
3
  import soundfile as sf
4
  import numpy as np
5
  import argparse
6
- from huggingface_hub import HfApi, HfFolder
7
  import requests
8
  import json
9
  from datetime import datetime
10
  import os
11
 
12
- topics = [folder for folder in os.listdir("podcasts") if os.path.isdir(os.path.join("podcasts", folder))]
13
- podcasts = {}
14
-
15
- for topic in topics:
16
- topic_path = os.path.join("podcasts", topic)
17
- podcasts[topic] = sorted([f.replace(".wav", "") for f in os.listdir(topic_path) if f.endswith(".wav")], reverse=True)
 
 
 
 
 
 
 
 
18
 
19
  def submit_job(
20
- repo_id: str,
21
  inference_provider: str,
22
  hf_token: str
23
  ):
24
  # Configuration variables
25
- username = HfApi(token=hf_token).whoami()["name"] # Your HuggingFace username
26
- space_id = "fdaudens/podcast-jobs" # Your space ID
27
- # If you want to always use the username-based repo_id, remove repo_id from parameters
28
- repo_id = f"{username}/podcast-jobs"
29
  flavor = "cpu-basic" # Machine type
30
 
31
  # Create the API request
@@ -39,8 +44,7 @@ def submit_job(
39
  "spaceId": space_id,
40
  "command": ["python", "run_job.py"],
41
  "arguments": [
42
- "--provider", inference_provider,
43
- "--repo-id", repo_id
44
  ],
45
  "environment": {
46
  "HF_API_KEY": hf_token
@@ -55,11 +59,11 @@ def submit_job(
55
  def main():
56
  parser = argparse.ArgumentParser(description="Podcast job runner")
57
  parser.add_argument("--provider", type=str, default="hf-inference")
58
- parser.add_argument("--repo-id", type=str, default="fdaudens/podcast-jobs")
59
  parser.add_argument("--flavor", type=str, default="t4-medium")
60
  args = parser.parse_args()
61
 
62
- print(f"Arguments: provider={args.provider}, repo_id={args.repo_id}, flavor={args.flavor}")
63
 
64
  # 1. Get the most popular paper's content
65
  paper_manager = PaperManager()
@@ -102,39 +106,30 @@ def main():
102
  # Concatenate all audio segments
103
  if audio_segments:
104
  full_audio = np.concatenate(audio_segments)
105
- # 4. Save as WAV file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  sf.write("podcast.wav", full_audio, sr)
107
- print("Podcast audio saved as podcast.wav")
108
-
109
- # --- Upload to Hugging Face repo ---
110
- hf_token = os.environ.get("HF_TOKEN")
111
- if hf_token is None:
112
- print("No Hugging Face token found in environment. Skipping upload.")
113
- else:
114
- api = HfApi(token=hf_token)
115
- username = api.whoami()["name"]
116
- repo_id = f"{username}/podcast-jobs" # Correct repo id
117
-
118
- # Create the repo if it doesn't exist
119
- try:
120
- api.create_repo(repo_id, repo_type="model", private=False, exist_ok=True)
121
- except Exception as e:
122
- print(f"Warning: Could not create repo (it may already exist): {e}")
123
-
124
- # Create a folder by date inside the podcasts subfolder
125
- today = datetime.now().strftime("%Y-%m-%d")
126
- remote_path = f"podcasts/podcast-{today}.wav" # subfolder in repo
127
-
128
- print(f"Uploading podcast.wav to {repo_id} at {remote_path} ...")
129
- api.upload_file(
130
- path_or_fileobj="podcast.wav",
131
- path_in_repo=remote_path,
132
- repo_id=repo_id,
133
- token=hf_token
134
- )
135
- print(f"Uploaded podcast.wav to {repo_id}/{remote_path}")
136
  else:
137
  print("No audio generated.")
138
 
139
  if __name__ == "__main__":
140
- main()
 
3
  import soundfile as sf
4
  import numpy as np
5
  import argparse
6
+ from huggingface_hub import HfApi
7
  import requests
8
  import json
9
  from datetime import datetime
10
  import os
11
 
12
+ # Scan for existing podcasts in the space
13
+ def scan_podcasts():
14
+ podcast_dir = "podcasts"
15
+
16
+ if not os.path.exists(podcast_dir):
17
+ os.makedirs(podcast_dir, exist_ok=True)
18
+ return []
19
+
20
+ # Get all WAV files in the podcasts directory
21
+ podcasts = sorted([f.replace(".wav", "")
22
+ for f in os.listdir(podcast_dir)
23
+ if f.endswith(".wav")], reverse=True)
24
+
25
+ return podcasts
26
 
27
  def submit_job(
 
28
  inference_provider: str,
29
  hf_token: str
30
  ):
31
  # Configuration variables
32
+ username = HfApi(token=hf_token).whoami()["name"]
33
+ space_id = "fdaudens/podcast-jobs" # Your Space ID
 
 
34
  flavor = "cpu-basic" # Machine type
35
 
36
  # Create the API request
 
44
  "spaceId": space_id,
45
  "command": ["python", "run_job.py"],
46
  "arguments": [
47
+ "--provider", inference_provider
 
48
  ],
49
  "environment": {
50
  "HF_API_KEY": hf_token
 
59
  def main():
60
  parser = argparse.ArgumentParser(description="Podcast job runner")
61
  parser.add_argument("--provider", type=str, default="hf-inference")
62
+ parser.add_argument("--name", type=str, default="podcast")
63
  parser.add_argument("--flavor", type=str, default="t4-medium")
64
  args = parser.parse_args()
65
 
66
+ print(f"Arguments: provider={args.provider}, name={args.name}, flavor={args.flavor}")
67
 
68
  # 1. Get the most popular paper's content
69
  paper_manager = PaperManager()
 
106
  # Concatenate all audio segments
107
  if audio_segments:
108
  full_audio = np.concatenate(audio_segments)
109
+
110
+ # 4. Save as WAV file in the Space's file system
111
+ # Create podcasts directory if it doesn't exist
112
+ podcast_dir = "podcasts"
113
+ os.makedirs(podcast_dir, exist_ok=True)
114
+
115
+ # Generate filename with base name and date
116
+ today = datetime.now().strftime("%Y-%m-%d")
117
+ base_name = args.name
118
+ podcast_filename = f"{base_name}-{today}.wav"
119
+ podcast_path = os.path.join(podcast_dir, podcast_filename)
120
+
121
+ # Save the file
122
+ sf.write(podcast_path, full_audio, sr)
123
+ print(f"Podcast audio saved to {podcast_path}")
124
+
125
+ # Provide the access URL
126
+ print(f"Access URL: https://huggingface.co/spaces/fdaudens/podcast-jobs/blob/main/{podcast_path}")
127
+
128
+ # Also save a temporary local copy for debugging
129
  sf.write("podcast.wav", full_audio, sr)
130
+ print("Temporary copy saved as podcast.wav")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  else:
132
  print("No audio generated.")
133
 
134
  if __name__ == "__main__":
135
+ main()