SAUL19 commited on
Commit
04db253
·
1 Parent(s): 0b22baf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -18
app.py CHANGED
@@ -77,15 +77,11 @@ def generateAudio(text_to_audio, s3_save_as, key_id):
77
  # Preprocess text and recortar
78
  text = cut_text(text, max_tokens=500)
79
 
80
- # Divide el texto en segmentos de 30 palabras
81
  palabras = text.split()
82
- segmentos = [' '.join(palabras[i:i+30])
83
- for i in range(0, len(palabras), 30)]
84
-
85
- # Generar audio para cada segmento y combinarlos
86
- audio_segments = []
87
- for segment in segmentos:
88
- inputs = processor(text=segment, return_tensors="pt").to(device)
89
  if speaker is not None:
90
  speaker_embeddings = torch.tensor(
91
  embeddings_dataset[speaker]["xvector"]).unsqueeze(0).to(device)
@@ -93,18 +89,42 @@ def generateAudio(text_to_audio, s3_save_as, key_id):
93
  speaker_embeddings = torch.randn((1, 512)).to(device)
94
  speech = model.generate_speech(
95
  inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
96
- audio_segments.append(speech)
97
-
98
- combined_audio = torch.cat(audio_segments, dim=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
- # Crear objeto BytesIO para almacenar el audio
101
- audio_buffer = BytesIO()
102
- sf.write(audio_buffer, combined_audio.cpu().numpy(),
103
- samplerate=16000, format='WAV')
104
- audio_buffer.seek(0)
 
105
 
106
- # Guardar el audio combinado en S3
107
- save_audio_to_s3(audio_buffer)
 
 
108
 
109
  save_text_to_speech(text_to_audio, 2271)
110
  return s3_save_as
@@ -167,6 +187,7 @@ def list_s3_files():
167
 
168
 
169
  demo = gr.Blocks()
 
170
  with demo:
171
 
172
  text = gr.Textbox()
 
77
  # Preprocess text and recortar
78
  text = cut_text(text, max_tokens=500)
79
 
80
+ # Verificar si el texto tiene menos de 30 palabras
81
  palabras = text.split()
82
+ if len(palabras) <= 30:
83
+ # Generar audio para el texto completo
84
+ inputs = processor(text=text, return_tensors="pt").to(device)
 
 
 
 
85
  if speaker is not None:
86
  speaker_embeddings = torch.tensor(
87
  embeddings_dataset[speaker]["xvector"]).unsqueeze(0).to(device)
 
89
  speaker_embeddings = torch.randn((1, 512)).to(device)
90
  speech = model.generate_speech(
91
  inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
92
+ combined_audio = speech
93
+ else:
94
+ # Divide el texto en segmentos de 30 palabras
95
+ segmentos = [' '.join(palabras[i:i+30])
96
+ for i in range(0, len(palabras), 30)]
97
+
98
+ # Generar audio para cada segmento y combinarlos
99
+ audio_segments = []
100
+ for segment in segmentos:
101
+ inputs = processor(
102
+ text=segment, return_tensors="pt").to(device)
103
+ if speaker is not None:
104
+ speaker_embeddings = torch.tensor(
105
+ embeddings_dataset[speaker]["xvector"]).unsqueeze(0).to(device)
106
+ else:
107
+ speaker_embeddings = torch.randn((1, 512)).to(device)
108
+ speech = model.generate_speech(
109
+ inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
110
+ audio_segments.append(speech)
111
+
112
+ if len(audio_segments) > 0:
113
+ combined_audio = torch.cat(audio_segments, dim=0)
114
+ else:
115
+ combined_audio = None
116
 
117
+ if combined_audio is not None:
118
+ # Crear objeto BytesIO para almacenar el audio
119
+ audio_buffer = BytesIO()
120
+ sf.write(audio_buffer, combined_audio.cpu().numpy(),
121
+ samplerate=16000, format='WAV')
122
+ audio_buffer.seek(0)
123
 
124
+ # Guardar el audio combinado en S3
125
+ save_audio_to_s3(audio_buffer)
126
+ else:
127
+ print("File with content null")
128
 
129
  save_text_to_speech(text_to_audio, 2271)
130
  return s3_save_as
 
187
 
188
 
189
  demo = gr.Blocks()
190
+
191
  with demo:
192
 
193
  text = gr.Textbox()