fdaudens HF Staff commited on
Commit
4cb353b
ยท
1 Parent(s): e24b277

backtrack to wav

Browse files
Files changed (1) hide show
  1. app.py +9 -27
app.py CHANGED
@@ -146,33 +146,9 @@ def generate_podcast(topic: str):
146
  t0 = time.time()
147
  ref_s = pipeline_voice[len(ps) - 1]
148
  audio_numpy = kmodel(ps, ref_s, speed).numpy()
149
-
150
- # Convert numpy array to MP3
151
- with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_wav:
152
- sf.write(temp_wav.name, audio_numpy, sr)
153
- temp_wav_path = temp_wav.name
154
-
155
- # Use pydub to convert WAV to MP3
156
- audio_segment = AudioSegment.from_wav(temp_wav_path)
157
- with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_mp3:
158
- audio_segment.export(temp_mp3.name, format="mp3")
159
- temp_mp3_path = temp_mp3.name
160
-
161
- # Read the MP3 data
162
- with open(temp_mp3_path, 'rb') as mp3_file:
163
- mp3_data = mp3_file.read()
164
-
165
- # Clean up temporary files
166
- os.unlink(temp_wav_path)
167
- os.unlink(temp_mp3_path)
168
-
169
- # Yield MP3 data instead of numpy array
170
- yield (sr, mp3_data)
171
-
172
  t1 = time.time()
173
- print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. MP3 conversion completed.")
174
-
175
- return temp_mp3_path # Return the path to the MP3 file
176
 
177
  EXAMPLES = [
178
  ["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
@@ -190,7 +166,13 @@ Based on [Kokoro TTS](https://huggingface.co/hexgrad/Kokoro-82M) and [Llama-3.3-
190
  placeholder="You can leave this blank for a general discussion.",
191
  ),
192
  ],
193
- outputs=gr.Audio(type="filepath"),
 
 
 
 
 
 
194
  theme=gr.themes.Soft(),
195
  submit_btn="Generate podcast ๐ŸŽ™๏ธ",
196
  )
 
146
  t0 = time.time()
147
  ref_s = pipeline_voice[len(ps) - 1]
148
  audio_numpy = kmodel(ps, ref_s, speed).numpy()
149
+ yield (sr, audio_numpy)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  t1 = time.time()
151
+ print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")
 
 
152
 
153
  EXAMPLES = [
154
  ["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
 
166
  placeholder="You can leave this blank for a general discussion.",
167
  ),
168
  ],
169
+ outputs=[
170
+ gr.Audio(
171
+ label="Listen to your podcast! ๐Ÿ”Š",
172
+ format="wav",
173
+ streaming=True,
174
+ ),
175
+ ],
176
  theme=gr.themes.Soft(),
177
  submit_btn="Generate podcast ๐ŸŽ™๏ธ",
178
  )