udayl commited on
Commit
eab7fca
·
1 Parent(s): df1a68a

added multiprocessing capabilities

Browse files
Files changed (1) hide show
  1. gradio_app.py +73 -40
gradio_app.py CHANGED
@@ -10,10 +10,31 @@ import shutil
10
  import warnings
11
  import os
12
  import gradio as gr
 
 
13
  from notebook_lm_kokoro import generate_podcast_script, generate_audio_from_script
14
  warnings.filterwarnings("ignore")
15
 
16
- # A modified version of generate_audio_from_script to accept voice mapping
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def generate_audio_from_script_with_voices(script, speaker1_voice, speaker2_voice, output_file):
18
  voice_map = {"Speaker 1": speaker1_voice, "Speaker 2": speaker2_voice}
19
 
@@ -29,31 +50,24 @@ def generate_audio_from_script_with_voices(script, speaker1_voice, speaker2_voic
29
  raise ValueError("Transcript is not a list")
30
 
31
  all_audio_segments = []
32
- # Process each dialogue entry
33
- for i, entry in enumerate(transcript_list):
34
- if not isinstance(entry, tuple) or len(entry) != 2:
35
- print(f"Skipping invalid entry {i}: {entry}")
36
- continue
37
-
38
- speaker, dialogue = entry
39
- chosen_voice = voice_map.get(speaker, "af_heart")
40
- print(f"Generating audio for {speaker} with voice '{chosen_voice}'...")
41
-
42
- # Updated KPipeline initialization with explicit repo_id
43
- pipeline = KPipeline(lang_code="a", repo_id="hexgrad/Kokoro-82M")
44
- generator = pipeline(dialogue, voice=chosen_voice)
45
-
46
- segment_audio = []
47
- for j, (gs, ps, audio) in enumerate(generator):
48
- # print(f"{speaker} - Segment {j}: Global Step = {gs}, Partial Step = {ps}")
49
- segment_audio.append(audio)
50
-
51
- if segment_audio:
52
- segment_full = np.concatenate(segment_audio, axis=0)
53
- all_audio_segments.append(segment_full)
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  if not all_audio_segments:
56
- print("No audio segments were generated.")
57
  return None
58
 
59
  # Add a pause between segments
@@ -106,12 +120,31 @@ def process_pdf(pdf_file, speaker1_voice, speaker2_voice, provider, api_key, ope
106
  f"audio_{os.path.basename(tmp_path).replace('.pdf', '.wav')}"
107
  )
108
 
109
- result = generate_audio_from_script_with_voices(
110
- transcript,
111
- speaker1_voice,
112
- speaker2_voice,
113
- output_file=audio_output_path
114
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  if result is None:
117
  return "Error generating audio", None
@@ -201,16 +234,16 @@ def create_gradio_app():
201
  type="filepath"
202
  )
203
 
204
- # Examples section
205
- gr.Examples(
206
- examples=[
207
- ["sample.pdf", "af_heart", "af_nicole", "openrouter", "your-api-key-here", "https://openrouter.ai/api/v1"],
208
- ],
209
- inputs=[pdf_input, speaker1_voice, speaker2_voice, provider, api_key, openrouter_base],
210
- outputs=[status_output, audio_output],
211
- fn=process_pdf,
212
- cache_examples=True,
213
- )
214
 
215
  submit_btn.click(
216
  fn=process_pdf,
 
10
  import warnings
11
  import os
12
  import gradio as gr
13
+ import concurrent.futures
14
+ import multiprocessing
15
  from notebook_lm_kokoro import generate_podcast_script, generate_audio_from_script
16
  warnings.filterwarnings("ignore")
17
 
18
+ # Define number of workers based on CPU cores
19
+ NUM_WORKERS = multiprocessing.cpu_count() # Gets total CPU cores
20
+
21
+ def process_segment(entry_and_voice_map):
22
+ entry, voice_map = entry_and_voice_map # Unpack the tuple
23
+ speaker, dialogue = entry
24
+ chosen_voice = voice_map.get(speaker, "af_heart")
25
+ print(f"Generating audio for {speaker} with voice '{chosen_voice}'...")
26
+
27
+ pipeline = KPipeline(lang_code="a", repo_id="hexgrad/Kokoro-82M")
28
+ generator = pipeline(dialogue, voice=chosen_voice)
29
+
30
+ segment_audio = []
31
+ for _, _, audio in generator:
32
+ segment_audio.append(audio)
33
+
34
+ if segment_audio:
35
+ return np.concatenate(segment_audio, axis=0)
36
+ return None
37
+
38
  def generate_audio_from_script_with_voices(script, speaker1_voice, speaker2_voice, output_file):
39
  voice_map = {"Speaker 1": speaker1_voice, "Speaker 2": speaker2_voice}
40
 
 
50
  raise ValueError("Transcript is not a list")
51
 
52
  all_audio_segments = []
53
+ # Prepare input data with voice_map for each entry
54
+ entries_with_voice_map = [(entry, voice_map) for entry in transcript_list]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ try:
57
+ # Process segments in parallel
58
+ with concurrent.futures.ProcessPoolExecutor(max_workers=NUM_WORKERS) as executor:
59
+ # Map the processing function across all dialogue entries
60
+ results = list(executor.map(process_segment, entries_with_voice_map))
61
+
62
+ # Filter out None results and combine audio segments
63
+ all_audio_segments = [r for r in results if r is not None]
64
+
65
+ except Exception as e:
66
+ print(f"Error during audio generation: {e}")
67
+ return None
68
+
69
  if not all_audio_segments:
70
+ print("No audio segments were generated")
71
  return None
72
 
73
  # Add a pause between segments
 
120
  f"audio_{os.path.basename(tmp_path).replace('.pdf', '.wav')}"
121
  )
122
 
123
+ # result = generate_audio_from_script_with_voices(
124
+ # transcript,
125
+ # speaker1_voice,
126
+ # speaker2_voice,
127
+ # output_file=audio_output_path
128
+ # )
129
+
130
+ # Use ProcessPoolExecutor with explicit number of workers
131
+ with concurrent.futures.ProcessPoolExecutor(max_workers=NUM_WORKERS) as executor:
132
+ print(f"Processing with {NUM_WORKERS} CPU cores")
133
+ # Submit audio generation task to the executor
134
+ future = executor.submit(
135
+ generate_audio_from_script_with_voices,
136
+ transcript, speaker1_voice, speaker2_voice, audio_output_path
137
+ )
138
+ result = future.result()
139
+
140
+ if result is None:
141
+ return "Error generating audio", None
142
+
143
+ return "Process complete!", result
144
+
145
+ except Exception as e:
146
+ print(f"Error in process_pdf: {str(e)}")
147
+ return f"Error processing file: {str(e)}", None
148
 
149
  if result is None:
150
  return "Error generating audio", None
 
234
  type="filepath"
235
  )
236
 
237
+ # # Examples section
238
+ # gr.Examples(
239
+ # examples=[
240
+ # ["sample.pdf", "af_heart", "af_nicole", "openrouter", "your-api-key-here", "https://openrouter.ai/api/v1"],
241
+ # ],
242
+ # inputs=[pdf_input, speaker1_voice, speaker2_voice, provider, api_key, openrouter_base],
243
+ # outputs=[status_output, audio_output],
244
+ # fn=process_pdf,
245
+ # cache_examples=True,
246
+ # )
247
 
248
  submit_btn.click(
249
  fn=process_pdf,