Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -135,9 +135,6 @@ def redistribute_codes(code_list, snac_model):
|
|
135 |
audio_hat = snac_model.decode(codes)
|
136 |
return audio_hat.detach().squeeze().cpu().numpy() # Always return CPU numpy array
|
137 |
|
138 |
-
@spaces.GPU()
|
139 |
-
@spaces.GPU()
|
140 |
-
@spaces.GPU()
|
141 |
@spaces.GPU()
|
142 |
def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty, max_new_tokens, num_hosts, progress=gr.Progress()):
|
143 |
if not text.strip():
|
@@ -194,22 +191,24 @@ def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty
|
|
194 |
channels=1
|
195 |
)
|
196 |
|
|
|
|
|
|
|
197 |
# Combine intro, speech, and outro
|
198 |
-
combined_audio = music + speech_audio + music
|
199 |
|
200 |
# Convert back to numpy array
|
201 |
combined_numpy = np.array(combined_audio.get_array_of_samples())
|
202 |
|
203 |
-
#
|
204 |
-
|
205 |
-
if len(combined_numpy) > max_samples:
|
206 |
-
combined_numpy = combined_numpy[:max_samples]
|
207 |
|
208 |
-
return (
|
209 |
except Exception as e:
|
210 |
print(f"Error generating speech: {e}")
|
211 |
return None
|
212 |
|
|
|
213 |
with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
|
214 |
with gr.Row():
|
215 |
with gr.Column(scale=1):
|
|
|
135 |
audio_hat = snac_model.decode(codes)
|
136 |
return audio_hat.detach().squeeze().cpu().numpy() # Always return CPU numpy array
|
137 |
|
|
|
|
|
|
|
138 |
@spaces.GPU()
|
139 |
def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty, max_new_tokens, num_hosts, progress=gr.Progress()):
|
140 |
if not text.strip():
|
|
|
191 |
channels=1
|
192 |
)
|
193 |
|
194 |
+
# Adjust the volume of the intro/outro music (reduce by 6dB)
|
195 |
+
music = music - 6
|
196 |
+
|
197 |
# Combine intro, speech, and outro
|
198 |
+
combined_audio = music[:5000] + speech_audio + music[-5000:]
|
199 |
|
200 |
# Convert back to numpy array
|
201 |
combined_numpy = np.array(combined_audio.get_array_of_samples())
|
202 |
|
203 |
+
# Ensure the audio is in the correct data type
|
204 |
+
combined_numpy = combined_numpy.astype(np.int16)
|
|
|
|
|
205 |
|
206 |
+
return (combined_audio.frame_rate, combined_numpy)
|
207 |
except Exception as e:
|
208 |
print(f"Error generating speech: {e}")
|
209 |
return None
|
210 |
|
211 |
+
|
212 |
with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
|
213 |
with gr.Row():
|
214 |
with gr.Column(scale=1):
|