fahadqazi commited on
Commit
cec247a
·
verified ·
1 Parent(s): 98f7469

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -26
app.py CHANGED
@@ -5,10 +5,11 @@ import spaces
5
  import os
6
  import numpy as np
7
  import re
8
- from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
9
  from speechbrain.pretrained import EncoderClassifier
10
  from datasets import load_dataset
11
 
 
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
 
14
  def load_models_and_data():
@@ -20,38 +21,17 @@ def load_models_and_data():
20
 
21
  model = SpeechT5ForTextToSpeech.from_pretrained("fahadqazi/testts1234").to(device)
22
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
23
-
24
- spk_model_name = "speechbrain/spkrec-xvect-voxceleb"
25
- speaker_model = EncoderClassifier.from_hparams(
26
- source=spk_model_name,
27
- run_opts={"device": device},
28
- savedir=os.path.join("/tmp", spk_model_name),
29
- )
30
-
31
- # Load a sample from a dataset for default embedding
32
- dataset = load_dataset("erenfazlioglu/turkishvoicedataset", split="train")
33
- example = dataset[304]
34
-
35
- return model, processor, vocoder, speaker_model, example
36
-
37
- model, processor, vocoder, speaker_model, default_example = load_models_and_data()
38
 
39
- # def create_speaker_embedding(waveform):
40
- # with torch.no_grad():
41
- # speaker_embeddings = speaker_model.encode_batch(torch.tensor(waveform).unsqueeze(0).to(device))
42
- # speaker_embeddings = torch.nn.functional.normalize(speaker_embeddings, dim=2)
43
- # speaker_embeddings = speaker_embeddings.squeeze()
44
- # return speaker_embeddings
45
 
46
- # def prepare_default_embedding(example):
47
- # audio = example["audio"]
48
- # return create_speaker_embedding(audio["array"])
49
 
50
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
51
  speaker_embeddings = embeddings_dataset[7306]["xvector"]
52
  speaker_embeddings = torch.tensor(speaker_embeddings).to(device)
53
 
54
- default_embedding = speaker_embeddings #prepare_default_embedding(default_example)
55
 
56
  # replacements = [
57
  # ("â", "a"), # Long a
 
5
  import os
6
  import numpy as np
7
  import re
8
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan, AutoTokenizer
9
  from speechbrain.pretrained import EncoderClassifier
10
  from datasets import load_dataset
11
 
12
+
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
  def load_models_and_data():
 
21
 
22
  model = SpeechT5ForTextToSpeech.from_pretrained("fahadqazi/testts1234").to(device)
23
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+
26
+ return model, processor, vocoder
 
 
 
 
27
 
28
+ model, processor, vocoder = load_models_and_data()
 
 
29
 
30
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
31
  speaker_embeddings = embeddings_dataset[7306]["xvector"]
32
  speaker_embeddings = torch.tensor(speaker_embeddings).to(device)
33
 
34
+ default_embedding = speaker_embeddings
35
 
36
  # replacements = [
37
  # ("â", "a"), # Long a