podcast-generator

Sleeping

bluenevus commited on Apr 16

Commit

47f4f14

verified ·

1 Parent(s): b7f405c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -135,9 +135,6 @@ def redistribute_codes(code_list, snac_model):
     audio_hat = snac_model.decode(codes)
     return audio_hat.detach().squeeze().cpu().numpy()  # Always return CPU numpy array
-@spaces.GPU()
-@spaces.GPU()
-@spaces.GPU()
 @spaces.GPU()
 def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty, max_new_tokens, num_hosts, progress=gr.Progress()):
     if not text.strip():
@@ -194,22 +191,24 @@ def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty
             channels=1
         )
         # Combine intro, speech, and outro
-        combined_audio = music + speech_audio + music
         # Convert back to numpy array
         combined_numpy = np.array(combined_audio.get_array_of_samples())
-        # Add a check for 15-second limitation
-        max_samples = 24000 * 15  # 15 seconds at 24kHz sample rate
-        if len(combined_numpy) > max_samples:
-            combined_numpy = combined_numpy[:max_samples]
-        return (24000, combined_numpy)
     except Exception as e:
         print(f"Error generating speech: {e}")
         return None
 with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
     with gr.Row():
         with gr.Column(scale=1):

     audio_hat = snac_model.decode(codes)
     return audio_hat.detach().squeeze().cpu().numpy()  # Always return CPU numpy array
 @spaces.GPU()
 def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty, max_new_tokens, num_hosts, progress=gr.Progress()):
     if not text.strip():
             channels=1
         )
+        # Adjust the volume of the intro/outro music (reduce by 6dB)
+        music = music - 6
         # Combine intro, speech, and outro
+        combined_audio = music[:5000] + speech_audio + music[-5000:]
         # Convert back to numpy array
         combined_numpy = np.array(combined_audio.get_array_of_samples())
+        # Ensure the audio is in the correct data type
+        combined_numpy = combined_numpy.astype(np.int16)
+        return (combined_audio.frame_rate, combined_numpy)
     except Exception as e:
         print(f"Error generating speech: {e}")
         return None
 with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
     with gr.Row():
         with gr.Column(scale=1):