Spaces:
Running
on
Zero
Running
on
Zero
backtrack to wav
Browse files
app.py
CHANGED
@@ -146,33 +146,9 @@ def generate_podcast(topic: str):
|
|
146 |
t0 = time.time()
|
147 |
ref_s = pipeline_voice[len(ps) - 1]
|
148 |
audio_numpy = kmodel(ps, ref_s, speed).numpy()
|
149 |
-
|
150 |
-
# Convert numpy array to MP3
|
151 |
-
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_wav:
|
152 |
-
sf.write(temp_wav.name, audio_numpy, sr)
|
153 |
-
temp_wav_path = temp_wav.name
|
154 |
-
|
155 |
-
# Use pydub to convert WAV to MP3
|
156 |
-
audio_segment = AudioSegment.from_wav(temp_wav_path)
|
157 |
-
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_mp3:
|
158 |
-
audio_segment.export(temp_mp3.name, format="mp3")
|
159 |
-
temp_mp3_path = temp_mp3.name
|
160 |
-
|
161 |
-
# Read the MP3 data
|
162 |
-
with open(temp_mp3_path, 'rb') as mp3_file:
|
163 |
-
mp3_data = mp3_file.read()
|
164 |
-
|
165 |
-
# Clean up temporary files
|
166 |
-
os.unlink(temp_wav_path)
|
167 |
-
os.unlink(temp_mp3_path)
|
168 |
-
|
169 |
-
# Yield MP3 data instead of numpy array
|
170 |
-
yield (sr, mp3_data)
|
171 |
-
|
172 |
t1 = time.time()
|
173 |
-
print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds.
|
174 |
-
|
175 |
-
return temp_mp3_path # Return the path to the MP3 file
|
176 |
|
177 |
EXAMPLES = [
|
178 |
["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
|
@@ -190,7 +166,13 @@ Based on [Kokoro TTS](https://huggingface.co/hexgrad/Kokoro-82M) and [Llama-3.3-
|
|
190 |
placeholder="You can leave this blank for a general discussion.",
|
191 |
),
|
192 |
],
|
193 |
-
outputs=
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
theme=gr.themes.Soft(),
|
195 |
submit_btn="Generate podcast ๐๏ธ",
|
196 |
)
|
|
|
146 |
t0 = time.time()
|
147 |
ref_s = pipeline_voice[len(ps) - 1]
|
148 |
audio_numpy = kmodel(ps, ref_s, speed).numpy()
|
149 |
+
yield (sr, audio_numpy)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
t1 = time.time()
|
151 |
+
print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")
|
|
|
|
|
152 |
|
153 |
EXAMPLES = [
|
154 |
["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
|
|
|
166 |
placeholder="You can leave this blank for a general discussion.",
|
167 |
),
|
168 |
],
|
169 |
+
outputs=[
|
170 |
+
gr.Audio(
|
171 |
+
label="Listen to your podcast! ๐",
|
172 |
+
format="wav",
|
173 |
+
streaming=True,
|
174 |
+
),
|
175 |
+
],
|
176 |
theme=gr.themes.Soft(),
|
177 |
submit_btn="Generate podcast ๐๏ธ",
|
178 |
)
|