SAUL19 commited on
Commit
fc96cf9
·
1 Parent(s): ea1deaf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -59
app.py CHANGED
@@ -46,83 +46,67 @@ def generateAudio(text_to_audio, s3_save_as, key_id):
46
 
47
  if AWS_ACCESS_KEY_ID != key_id:
48
  return "not permition"
49
-
50
  s3_save_as = '-'.join(s3_save_as.split()) + ".wav"
51
-
52
  def cut_text(text, max_tokens=500):
53
  # Remove non-alphanumeric characters, except periods and commas
54
  text = re.sub(r"[^\w\s.,]", "", text)
55
-
56
  # Replace multiple spaces with a single space
57
  text = re.sub(r"\s{2,}", " ", text)
58
-
59
  # Remove line breaks
60
  text = re.sub(r"\n", " ", text)
61
 
62
  return text
63
 
64
  def save_audio_to_s3(audio):
65
- # Create an instance of the S3 client
66
- s3 = boto3.client('s3',
67
- aws_access_key_id=AWS_ACCESS_KEY_ID,
68
- aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
69
-
70
- # Full path of the file in the bucket
71
- s3_key = "public/" + s3_save_as
72
-
73
- # Upload the audio file to the S3 bucket
74
- s3.upload_fileobj(audio, S3_BUCKET_NAME, s3_key)
 
 
 
 
75
 
76
  def save_text_to_speech(text, speaker=None):
77
  # Preprocess text and recortar
78
  text = cut_text(text, max_tokens=500)
79
-
80
- # Verificar si el texto tiene menos de 30 palabras
81
  palabras = text.split()
82
- if len(palabras) <= 30:
83
- # Generar audio para el texto completo
84
- inputs = processor(text=text, return_tensors="pt").to(device)
 
 
 
85
  if speaker is not None:
86
- speaker_embeddings = torch.tensor(
87
- embeddings_dataset[speaker]["xvector"]).unsqueeze(0).to(device)
88
  else:
89
  speaker_embeddings = torch.randn((1, 512)).to(device)
90
- speech = model.generate_speech(
91
- inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
92
- combined_audio = speech
 
 
 
 
 
 
 
 
 
93
 
94
- # Crear objeto BytesIO para almacenar el audio
95
- audio_buffer = BytesIO()
96
- sf.write(audio_buffer, combined_audio.cpu().numpy(),
97
- samplerate=16000, format='WAV')
98
- audio_buffer.seek(0)
99
-
100
- # Guardar el audio combinado en S3
101
- save_audio_to_s3(audio_buffer)
102
- else:
103
- # Divide el texto en segmentos de 30 palabras
104
- segmentos = [' '.join(palabras[i:i+30])
105
- for i in range(0, len(palabras), 30)]
106
 
107
- # Generar audio para cada segmento y combinarlos
108
- audio_segments = []
109
- for segment in segmentos:
110
- inputs = processor(
111
- text=segment, return_tensors="pt").to(device)
112
- if speaker is not None:
113
- speaker_embeddings = torch.tensor(
114
- embeddings_dataset[speaker]["xvector"]).unsqueeze(0).to(device)
115
- else:
116
- speaker_embeddings = torch.randn((1, 512)).to(device)
117
- speech = model.generate_speech(
118
- inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
119
- audio_segments.append(speech)
120
-
121
- if len(audio_segments) > 0:
122
- combined_audio = torch.cat(audio_segments, dim=0)
123
- else:
124
- combined_audio = None
125
-
126
  save_text_to_speech(text_to_audio, 2271)
127
  return s3_save_as
128
 
@@ -165,9 +149,6 @@ def list_s3_files():
165
  filename = os.path.splitext(filename_ext)[0]
166
  s3audio = 'public/%s.wav' % filename
167
 
168
- print("GENERATING ------------------")
169
- print(filename_ext)
170
-
171
  if check_if_exist(S3_BUCKET_NAME, s3audio):
172
  print('Audio %s already exists!' % s3audio)
173
  else:
@@ -175,7 +156,6 @@ def list_s3_files():
175
  response = s3_client.get_object(Bucket=S3_BUCKET_NAME, Key=KEY)
176
  content = response['Body'].read().decode('utf-8')
177
 
178
- print(content)
179
  if (len(content)):
180
  generateAudio(content, filename, AWS_ACCESS_KEY_ID)
181
  print("SUCCESS " + filename + ".wap")
 
46
 
47
  if AWS_ACCESS_KEY_ID != key_id:
48
  return "not permition"
49
+
50
  s3_save_as = '-'.join(s3_save_as.split()) + ".wav"
51
+
52
  def cut_text(text, max_tokens=500):
53
  # Remove non-alphanumeric characters, except periods and commas
54
  text = re.sub(r"[^\w\s.,]", "", text)
55
+
56
  # Replace multiple spaces with a single space
57
  text = re.sub(r"\s{2,}", " ", text)
58
+
59
  # Remove line breaks
60
  text = re.sub(r"\n", " ", text)
61
 
62
  return text
63
 
64
  def save_audio_to_s3(audio):
65
+ try:
66
+ # Create an instance of the S3 client
67
+ s3 = boto3.client('s3',
68
+ aws_access_key_id=AWS_ACCESS_KEY_ID,
69
+ aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
70
+
71
+ # Full path of the file in the bucket
72
+ s3_key = "public/" + s3_save_as
73
+
74
+ # Upload the audio file to the S3 bucket
75
+ s3.upload_fileobj(audio, S3_BUCKET_NAME, s3_key)
76
+
77
+ Exception:
78
+ print("Error al guardar")
79
 
80
  def save_text_to_speech(text, speaker=None):
81
  # Preprocess text and recortar
82
  text = cut_text(text, max_tokens=500)
83
+
84
+ # Divide el texto en segmentos de 30 palabras
85
  palabras = text.split()
86
+ segmentos = [' '.join(palabras[i:i+30]) for i in range(0, len(palabras), 30)]
87
+
88
+ # Generar audio para cada segmento y combinarlos
89
+ audio_segments = []
90
+ for segment in segmentos:
91
+ inputs = processor(text=segment, return_tensors="pt").to(device)
92
  if speaker is not None:
93
+ speaker_embeddings = torch.tensor(embeddings_dataset[speaker]["xvector"]).unsqueeze(0).to(device)
 
94
  else:
95
  speaker_embeddings = torch.randn((1, 512)).to(device)
96
+ speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
97
+ audio_segments.append(speech)
98
+
99
+ combined_audio = torch.cat(audio_segments, dim=0)
100
+
101
+ # Crear objeto BytesIO para almacenar el audio
102
+ audio_buffer = BytesIO()
103
+ sf.write(audio_buffer, combined_audio.cpu().numpy(), samplerate=16000, format='WAV')
104
+ audio_buffer.seek(0)
105
+
106
+ # Guardar el audio combinado en S3
107
+ save_audio_to_s3(audio_buffer)
108
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  save_text_to_speech(text_to_audio, 2271)
111
  return s3_save_as
112
 
 
149
  filename = os.path.splitext(filename_ext)[0]
150
  s3audio = 'public/%s.wav' % filename
151
 
 
 
 
152
  if check_if_exist(S3_BUCKET_NAME, s3audio):
153
  print('Audio %s already exists!' % s3audio)
154
  else:
 
156
  response = s3_client.get_object(Bucket=S3_BUCKET_NAME, Key=KEY)
157
  content = response['Body'].read().decode('utf-8')
158
 
 
159
  if (len(content)):
160
  generateAudio(content, filename, AWS_ACCESS_KEY_ID)
161
  print("SUCCESS " + filename + ".wap")