added multiprocessing capabilities
Browse files- gradio_app.py +73 -40
gradio_app.py
CHANGED
@@ -10,10 +10,31 @@ import shutil
|
|
10 |
import warnings
|
11 |
import os
|
12 |
import gradio as gr
|
|
|
|
|
13 |
from notebook_lm_kokoro import generate_podcast_script, generate_audio_from_script
|
14 |
warnings.filterwarnings("ignore")
|
15 |
|
16 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def generate_audio_from_script_with_voices(script, speaker1_voice, speaker2_voice, output_file):
|
18 |
voice_map = {"Speaker 1": speaker1_voice, "Speaker 2": speaker2_voice}
|
19 |
|
@@ -29,31 +50,24 @@ def generate_audio_from_script_with_voices(script, speaker1_voice, speaker2_voic
|
|
29 |
raise ValueError("Transcript is not a list")
|
30 |
|
31 |
all_audio_segments = []
|
32 |
-
#
|
33 |
-
|
34 |
-
if not isinstance(entry, tuple) or len(entry) != 2:
|
35 |
-
print(f"Skipping invalid entry {i}: {entry}")
|
36 |
-
continue
|
37 |
-
|
38 |
-
speaker, dialogue = entry
|
39 |
-
chosen_voice = voice_map.get(speaker, "af_heart")
|
40 |
-
print(f"Generating audio for {speaker} with voice '{chosen_voice}'...")
|
41 |
-
|
42 |
-
# Updated KPipeline initialization with explicit repo_id
|
43 |
-
pipeline = KPipeline(lang_code="a", repo_id="hexgrad/Kokoro-82M")
|
44 |
-
generator = pipeline(dialogue, voice=chosen_voice)
|
45 |
-
|
46 |
-
segment_audio = []
|
47 |
-
for j, (gs, ps, audio) in enumerate(generator):
|
48 |
-
# print(f"{speaker} - Segment {j}: Global Step = {gs}, Partial Step = {ps}")
|
49 |
-
segment_audio.append(audio)
|
50 |
-
|
51 |
-
if segment_audio:
|
52 |
-
segment_full = np.concatenate(segment_audio, axis=0)
|
53 |
-
all_audio_segments.append(segment_full)
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
if not all_audio_segments:
|
56 |
-
print("No audio segments were generated
|
57 |
return None
|
58 |
|
59 |
# Add a pause between segments
|
@@ -106,12 +120,31 @@ def process_pdf(pdf_file, speaker1_voice, speaker2_voice, provider, api_key, ope
|
|
106 |
f"audio_{os.path.basename(tmp_path).replace('.pdf', '.wav')}"
|
107 |
)
|
108 |
|
109 |
-
result = generate_audio_from_script_with_voices(
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
if result is None:
|
117 |
return "Error generating audio", None
|
@@ -201,16 +234,16 @@ def create_gradio_app():
|
|
201 |
type="filepath"
|
202 |
)
|
203 |
|
204 |
-
# Examples section
|
205 |
-
gr.Examples(
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
)
|
214 |
|
215 |
submit_btn.click(
|
216 |
fn=process_pdf,
|
|
|
10 |
import warnings
|
11 |
import os
|
12 |
import gradio as gr
|
13 |
+
import concurrent.futures
|
14 |
+
import multiprocessing
|
15 |
from notebook_lm_kokoro import generate_podcast_script, generate_audio_from_script
|
16 |
warnings.filterwarnings("ignore")
|
17 |
|
18 |
+
# Define number of workers based on CPU cores
|
19 |
+
NUM_WORKERS = multiprocessing.cpu_count() # Gets total CPU cores
|
20 |
+
|
21 |
+
def process_segment(entry_and_voice_map):
|
22 |
+
entry, voice_map = entry_and_voice_map # Unpack the tuple
|
23 |
+
speaker, dialogue = entry
|
24 |
+
chosen_voice = voice_map.get(speaker, "af_heart")
|
25 |
+
print(f"Generating audio for {speaker} with voice '{chosen_voice}'...")
|
26 |
+
|
27 |
+
pipeline = KPipeline(lang_code="a", repo_id="hexgrad/Kokoro-82M")
|
28 |
+
generator = pipeline(dialogue, voice=chosen_voice)
|
29 |
+
|
30 |
+
segment_audio = []
|
31 |
+
for _, _, audio in generator:
|
32 |
+
segment_audio.append(audio)
|
33 |
+
|
34 |
+
if segment_audio:
|
35 |
+
return np.concatenate(segment_audio, axis=0)
|
36 |
+
return None
|
37 |
+
|
38 |
def generate_audio_from_script_with_voices(script, speaker1_voice, speaker2_voice, output_file):
|
39 |
voice_map = {"Speaker 1": speaker1_voice, "Speaker 2": speaker2_voice}
|
40 |
|
|
|
50 |
raise ValueError("Transcript is not a list")
|
51 |
|
52 |
all_audio_segments = []
|
53 |
+
# Prepare input data with voice_map for each entry
|
54 |
+
entries_with_voice_map = [(entry, voice_map) for entry in transcript_list]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
+
try:
|
57 |
+
# Process segments in parallel
|
58 |
+
with concurrent.futures.ProcessPoolExecutor(max_workers=NUM_WORKERS) as executor:
|
59 |
+
# Map the processing function across all dialogue entries
|
60 |
+
results = list(executor.map(process_segment, entries_with_voice_map))
|
61 |
+
|
62 |
+
# Filter out None results and combine audio segments
|
63 |
+
all_audio_segments = [r for r in results if r is not None]
|
64 |
+
|
65 |
+
except Exception as e:
|
66 |
+
print(f"Error during audio generation: {e}")
|
67 |
+
return None
|
68 |
+
|
69 |
if not all_audio_segments:
|
70 |
+
print("No audio segments were generated")
|
71 |
return None
|
72 |
|
73 |
# Add a pause between segments
|
|
|
120 |
f"audio_{os.path.basename(tmp_path).replace('.pdf', '.wav')}"
|
121 |
)
|
122 |
|
123 |
+
# result = generate_audio_from_script_with_voices(
|
124 |
+
# transcript,
|
125 |
+
# speaker1_voice,
|
126 |
+
# speaker2_voice,
|
127 |
+
# output_file=audio_output_path
|
128 |
+
# )
|
129 |
+
|
130 |
+
# Use ProcessPoolExecutor with explicit number of workers
|
131 |
+
with concurrent.futures.ProcessPoolExecutor(max_workers=NUM_WORKERS) as executor:
|
132 |
+
print(f"Processing with {NUM_WORKERS} CPU cores")
|
133 |
+
# Submit audio generation task to the executor
|
134 |
+
future = executor.submit(
|
135 |
+
generate_audio_from_script_with_voices,
|
136 |
+
transcript, speaker1_voice, speaker2_voice, audio_output_path
|
137 |
+
)
|
138 |
+
result = future.result()
|
139 |
+
|
140 |
+
if result is None:
|
141 |
+
return "Error generating audio", None
|
142 |
+
|
143 |
+
return "Process complete!", result
|
144 |
+
|
145 |
+
except Exception as e:
|
146 |
+
print(f"Error in process_pdf: {str(e)}")
|
147 |
+
return f"Error processing file: {str(e)}", None
|
148 |
|
149 |
if result is None:
|
150 |
return "Error generating audio", None
|
|
|
234 |
type="filepath"
|
235 |
)
|
236 |
|
237 |
+
# # Examples section
|
238 |
+
# gr.Examples(
|
239 |
+
# examples=[
|
240 |
+
# ["sample.pdf", "af_heart", "af_nicole", "openrouter", "your-api-key-here", "https://openrouter.ai/api/v1"],
|
241 |
+
# ],
|
242 |
+
# inputs=[pdf_input, speaker1_voice, speaker2_voice, provider, api_key, openrouter_base],
|
243 |
+
# outputs=[status_output, audio_output],
|
244 |
+
# fn=process_pdf,
|
245 |
+
# cache_examples=True,
|
246 |
+
# )
|
247 |
|
248 |
submit_btn.click(
|
249 |
fn=process_pdf,
|