Mishmosh commited on
Commit
0476324
·
1 Parent(s): 9ec9b9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -26
app.py CHANGED
@@ -273,36 +273,15 @@ print(summarized_text)
273
  # number_of_sentences-=1
274
  #print(summarized_text_list_list)
275
  #print(number_of_sentences)
276
- #text to speech
277
- #!pip install git+https://github.com/huggingface/transformers.git
278
- #!pip install datasets sentencepiece
279
- import torch
280
- #import soundfile as sf
281
- #from IPython.display import Audio
282
- from datasets import load_dataset
283
- from transformers import pipeline
284
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
 
285
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
286
  model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
287
- #text = "The future belongs to those who believe in the beauty of their dreams."
288
- #text = (summarized_text_list_list)
289
- text = (summarized_text)
290
-
291
- #inputs = processor(text=summarized_text_list_list, return_tensors="pt")
292
- #inputs = processor("Michelletest", return_tensors="pt")
293
- inputs = processor(text, return_tensors="pt")
294
- from datasets import load_dataset
295
- embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
296
 
297
- import torch
298
- speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
299
- spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
300
- from transformers import SpeechT5HifiGan
301
- vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
302
- with torch.no_grad():
303
- speech = vocoder(spectrogram)
304
- speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
305
- Audio(speech, rate=16000)
306
 
307
 
308
 
 
273
  # number_of_sentences-=1
274
  #print(summarized_text_list_list)
275
  #print(number_of_sentences)
276
+ # Step 2: Text to Speech (TTS)
 
 
 
 
 
 
 
277
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
278
+
279
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
280
  model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
 
 
 
 
 
 
 
 
 
281
 
282
+ # Convert summarized text to speech
283
+ inputs = processor(summarized_text, return_tensors="pt")
284
+ spectrogram = model.generate_speech(inputs["input_ids"])
 
 
 
 
 
 
285
 
286
 
287