SAUL19 commited on
Commit
0b22baf
·
1 Parent(s): 11eb5d0

update app 2

Browse files
Files changed (1) hide show
  1. app.py +88 -20
app.py CHANGED
@@ -5,16 +5,17 @@ import re
5
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
6
  from datasets import load_dataset
7
  import torch
8
- import random
9
- import string
10
  import soundfile as sf
11
  import boto3
12
  from io import BytesIO
13
  import os
 
 
14
 
15
  AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
16
  AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
17
  S3_BUCKET_NAME = os.getenv("BUCKET_NAME")
 
18
 
19
  device = "cuda" if torch.cuda.is_available() else "cpu"
20
  # load the processor
@@ -40,20 +41,21 @@ speakers = {
40
  'slt': 6799 # US female
41
  }
42
 
 
43
  def generateAudio(text_to_audio, s3_save_as, key_id):
44
 
45
  if AWS_ACCESS_KEY_ID != key_id:
46
  return "not permition"
47
-
48
  s3_save_as = '-'.join(s3_save_as.split()) + ".wav"
49
-
50
  def cut_text(text, max_tokens=500):
51
  # Remove non-alphanumeric characters, except periods and commas
52
  text = re.sub(r"[^\w\s.,]", "", text)
53
-
54
  # Replace multiple spaces with a single space
55
  text = re.sub(r"\s{2,}", " ", text)
56
-
57
  # Remove line breaks
58
  text = re.sub(r"\n", " ", text)
59
 
@@ -74,37 +76,103 @@ def generateAudio(text_to_audio, s3_save_as, key_id):
74
  def save_text_to_speech(text, speaker=None):
75
  # Preprocess text and recortar
76
  text = cut_text(text, max_tokens=500)
77
-
78
  # Divide el texto en segmentos de 30 palabras
79
  palabras = text.split()
80
- segmentos = [' '.join(palabras[i:i+30]) for i in range(0, len(palabras), 30)]
81
-
 
82
  # Generar audio para cada segmento y combinarlos
83
  audio_segments = []
84
  for segment in segmentos:
85
  inputs = processor(text=segment, return_tensors="pt").to(device)
86
  if speaker is not None:
87
- speaker_embeddings = torch.tensor(embeddings_dataset[speaker]["xvector"]).unsqueeze(0).to(device)
 
88
  else:
89
  speaker_embeddings = torch.randn((1, 512)).to(device)
90
- speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
 
91
  audio_segments.append(speech)
92
-
93
  combined_audio = torch.cat(audio_segments, dim=0)
94
-
95
  # Crear objeto BytesIO para almacenar el audio
96
  audio_buffer = BytesIO()
97
- sf.write(audio_buffer, combined_audio.cpu().numpy(), samplerate=16000, format='WAV')
 
98
  audio_buffer.seek(0)
99
-
100
  # Guardar el audio combinado en S3
101
  save_audio_to_s3(audio_buffer)
102
-
103
-
104
  save_text_to_speech(text_to_audio, 2271)
105
  return s3_save_as
106
-
107
 
108
- iface = gr.Interface(fn=generateAudio, inputs=[Textbox(label="text_to_audio"), Textbox(label="S3url"), Textbox(label="aws_key_id")], outputs="text", title="Text-to-Audio")
109
- iface.launch()
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
6
  from datasets import load_dataset
7
  import torch
 
 
8
  import soundfile as sf
9
  import boto3
10
  from io import BytesIO
11
  import os
12
+ import botocore
13
+ from time import sleep
14
 
15
  AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
16
  AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
17
  S3_BUCKET_NAME = os.getenv("BUCKET_NAME")
18
+ FOLDER = 'public/mdx/'
19
 
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
  # load the processor
 
41
  'slt': 6799 # US female
42
  }
43
 
44
+
45
  def generateAudio(text_to_audio, s3_save_as, key_id):
46
 
47
  if AWS_ACCESS_KEY_ID != key_id:
48
  return "not permition"
49
+
50
  s3_save_as = '-'.join(s3_save_as.split()) + ".wav"
51
+
52
  def cut_text(text, max_tokens=500):
53
  # Remove non-alphanumeric characters, except periods and commas
54
  text = re.sub(r"[^\w\s.,]", "", text)
55
+
56
  # Replace multiple spaces with a single space
57
  text = re.sub(r"\s{2,}", " ", text)
58
+
59
  # Remove line breaks
60
  text = re.sub(r"\n", " ", text)
61
 
 
76
  def save_text_to_speech(text, speaker=None):
77
  # Preprocess text and recortar
78
  text = cut_text(text, max_tokens=500)
79
+
80
  # Divide el texto en segmentos de 30 palabras
81
  palabras = text.split()
82
+ segmentos = [' '.join(palabras[i:i+30])
83
+ for i in range(0, len(palabras), 30)]
84
+
85
  # Generar audio para cada segmento y combinarlos
86
  audio_segments = []
87
  for segment in segmentos:
88
  inputs = processor(text=segment, return_tensors="pt").to(device)
89
  if speaker is not None:
90
+ speaker_embeddings = torch.tensor(
91
+ embeddings_dataset[speaker]["xvector"]).unsqueeze(0).to(device)
92
  else:
93
  speaker_embeddings = torch.randn((1, 512)).to(device)
94
+ speech = model.generate_speech(
95
+ inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
96
  audio_segments.append(speech)
97
+
98
  combined_audio = torch.cat(audio_segments, dim=0)
99
+
100
  # Crear objeto BytesIO para almacenar el audio
101
  audio_buffer = BytesIO()
102
+ sf.write(audio_buffer, combined_audio.cpu().numpy(),
103
+ samplerate=16000, format='WAV')
104
  audio_buffer.seek(0)
105
+
106
  # Guardar el audio combinado en S3
107
  save_audio_to_s3(audio_buffer)
108
+
 
109
  save_text_to_speech(text_to_audio, 2271)
110
  return s3_save_as
 
111
 
 
 
112
 
113
+ def check_if_exist(bucket_name, key):
114
+
115
+ s3 = boto3.resource('s3',
116
+ aws_access_key_id=AWS_ACCESS_KEY_ID,
117
+ aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
118
+
119
+ try:
120
+ s3.Object(bucket_name, key).load()
121
+ except botocore.exceptions.ClientError as e:
122
+ if e.response['Error']['Code'] == "404":
123
+ # The object does not exist.
124
+ return False
125
+ else:
126
+ # Something else has gone wrong.
127
+ raise
128
+ else:
129
+ return True
130
+
131
+
132
+ def list_s3_files():
133
+
134
+ s3_client = boto3.client('s3',
135
+ aws_access_key_id=AWS_ACCESS_KEY_ID,
136
+ aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
137
+
138
+ s3 = boto3.resource('s3',
139
+ aws_access_key_id=AWS_ACCESS_KEY_ID,
140
+ aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
141
+
142
+ my_bucket = s3.Bucket(S3_BUCKET_NAME)
143
+
144
+ for objects in my_bucket.objects.filter(Prefix=FOLDER):
145
+ print(objects.key)
146
+
147
+ filename_ext = '%s' % os.path.basename(objects.key)
148
+ filename = os.path.splitext(filename_ext)[0]
149
+ s3audio = 'public/%s.wav' % filename
150
+
151
+ if check_if_exist(S3_BUCKET_NAME, s3audio):
152
+ print('Audio %s already exists!' % s3audio)
153
+ else:
154
+ response = s3_client.head_object(
155
+ Bucket=S3_BUCKET_NAME, Key=objects.key)
156
+ metadata = response['Metadata']
157
+ print(metadata)
158
+ if 'titulo' in metadata:
159
+ print('Has titulo, ready to create Audio!')
160
+ print('Start creating audio.. %s ' % s3audio)
161
+ title = metadata['titulo']
162
+ generateAudio(title, filename, AWS_ACCESS_KEY_ID)
163
+ else:
164
+ print('There is NOT resume, skipping..')
165
+
166
+ sleep(500/1000)
167
+
168
+
169
+ demo = gr.Blocks()
170
+ with demo:
171
+
172
+ text = gr.Textbox()
173
+
174
+ bimage = gr.Button("Generate Blog Images for PineSearch!")
175
+
176
+ bimage.click(list_s3_files, outputs=text)
177
+
178
+ demo.launch()