Spaces:

fdaudens
/

podcast-jobs-rss-test

Running on Zero

App Files Files Community

fdaudens HF Staff commited on May 13

Commit

4cb353b

1 Parent(s): e24b277

backtrack to wav

Browse files

Files changed (1) hide show

app.py +9 -27

app.py CHANGED Viewed

@@ -146,33 +146,9 @@ def generate_podcast(topic: str):
             t0 = time.time()
             ref_s = pipeline_voice[len(ps) - 1]
             audio_numpy = kmodel(ps, ref_s, speed).numpy()
-            # Convert numpy array to MP3
-            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_wav:
-                sf.write(temp_wav.name, audio_numpy, sr)
-                temp_wav_path = temp_wav.name
-            # Use pydub to convert WAV to MP3
-            audio_segment = AudioSegment.from_wav(temp_wav_path)
-            with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_mp3:
-                audio_segment.export(temp_mp3.name, format="mp3")
-                temp_mp3_path = temp_mp3.name
-            # Read the MP3 data
-            with open(temp_mp3_path, 'rb') as mp3_file:
-                mp3_data = mp3_file.read()
-            # Clean up temporary files
-            os.unlink(temp_wav_path)
-            os.unlink(temp_mp3_path)
-            # Yield MP3 data instead of numpy array
-            yield (sr, mp3_data)
             t1 = time.time()
-            print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. MP3 conversion completed.")
-    return temp_mp3_path  # Return the path to the MP3 file
 EXAMPLES = [
     ["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
@@ -190,7 +166,13 @@ Based on [Kokoro TTS](https://huggingface.co/hexgrad/Kokoro-82M) and [Llama-3.3-
             placeholder="You can leave this blank for a general discussion.",
         ),
     ],
-    outputs=gr.Audio(type="filepath"),
     theme=gr.themes.Soft(),
     submit_btn="Generate podcast 🎙️",
 )

             t0 = time.time()
             ref_s = pipeline_voice[len(ps) - 1]
             audio_numpy = kmodel(ps, ref_s, speed).numpy()
+            yield (sr, audio_numpy)
             t1 = time.time()
+            print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")
 EXAMPLES = [
     ["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
             placeholder="You can leave this blank for a general discussion.",
         ),
     ],
+    outputs=[
+        gr.Audio(
+            label="Listen to your podcast! 🔊",
+            format="wav",
+            streaming=True,
+        ),
+    ],
     theme=gr.themes.Soft(),
     submit_btn="Generate podcast 🎙️",
 )