bluenevus commited on
Commit
47f4f14
·
verified ·
1 Parent(s): b7f405c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -135,9 +135,6 @@ def redistribute_codes(code_list, snac_model):
135
  audio_hat = snac_model.decode(codes)
136
  return audio_hat.detach().squeeze().cpu().numpy() # Always return CPU numpy array
137
 
138
- @spaces.GPU()
139
- @spaces.GPU()
140
- @spaces.GPU()
141
  @spaces.GPU()
142
  def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty, max_new_tokens, num_hosts, progress=gr.Progress()):
143
  if not text.strip():
@@ -194,22 +191,24 @@ def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty
194
  channels=1
195
  )
196
 
 
 
 
197
  # Combine intro, speech, and outro
198
- combined_audio = music + speech_audio + music
199
 
200
  # Convert back to numpy array
201
  combined_numpy = np.array(combined_audio.get_array_of_samples())
202
 
203
- # Add a check for 15-second limitation
204
- max_samples = 24000 * 15 # 15 seconds at 24kHz sample rate
205
- if len(combined_numpy) > max_samples:
206
- combined_numpy = combined_numpy[:max_samples]
207
 
208
- return (24000, combined_numpy)
209
  except Exception as e:
210
  print(f"Error generating speech: {e}")
211
  return None
212
 
 
213
  with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
214
  with gr.Row():
215
  with gr.Column(scale=1):
 
135
  audio_hat = snac_model.decode(codes)
136
  return audio_hat.detach().squeeze().cpu().numpy() # Always return CPU numpy array
137
 
 
 
 
138
  @spaces.GPU()
139
  def generate_speech(text, voice1, voice2, temperature, top_p, repetition_penalty, max_new_tokens, num_hosts, progress=gr.Progress()):
140
  if not text.strip():
 
191
  channels=1
192
  )
193
 
194
+ # Adjust the volume of the intro/outro music (reduce by 6dB)
195
+ music = music - 6
196
+
197
  # Combine intro, speech, and outro
198
+ combined_audio = music[:5000] + speech_audio + music[-5000:]
199
 
200
  # Convert back to numpy array
201
  combined_numpy = np.array(combined_audio.get_array_of_samples())
202
 
203
+ # Ensure the audio is in the correct data type
204
+ combined_numpy = combined_numpy.astype(np.int16)
 
 
205
 
206
+ return (combined_audio.frame_rate, combined_numpy)
207
  except Exception as e:
208
  print(f"Error generating speech: {e}")
209
  return None
210
 
211
+
212
  with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
213
  with gr.Row():
214
  with gr.Column(scale=1):