guangyil commited on
Commit
3043708
·
verified ·
1 Parent(s): 5cf1b23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -26,8 +26,14 @@ from huggingface_hub import hf_hub_download
26
 
27
  from infer import load_model, eval_model
28
  from spkr import SpeakerEmbedding
 
 
 
 
 
29
 
30
- spkr_model = SpeakerEmbedding(device="cpu")
 
31
  model, tokenizer, tokenizer_voila, model_type = load_model("maitrix-org/Voila-chat", "maitrix-org/Voila-Tokenizer")
32
  default_ref_file = "examples/character_ref_emb_demo.pkl"
33
  default_ref_name = "Homer Simpson"
@@ -45,9 +51,7 @@ million_voice_ref_emb_mask_list = pickle.load(open(million_voice_ref_file, "rb")
45
 
46
  def get_ref_embs(ref_audio):
47
  wav, sr = torchaudio.load(ref_audio)
48
- spkr_model.to("cuda")
49
  ref_embs = spkr_model(wav, sr).cpu()
50
- spkr_model.to("cpu")
51
  return ref_embs
52
 
53
  def delete_directory(request: gr.Request):
@@ -69,8 +73,10 @@ def call_bot(history, ref_embs, request: gr.Request):
69
  }
70
  formated_history["conversations"].append({"from": "assistant"})
71
  print(formated_history)
72
- ref_embs = torch.tensor(ref_embs, dtype=torch.float32, device="cuda")
73
- ref_embs_mask = torch.tensor([1], device="cuda")
 
 
74
  out = eval_model(model, tokenizer, tokenizer_voila, model_type, "chat_aiao", formated_history, ref_embs, ref_embs_mask, max_new_tokens=512)
75
  if 'audio' in out:
76
  wav, sr = out['audio']
@@ -93,8 +99,10 @@ def run_tts(text, ref_embs):
93
  "conversations": [{'from': "user", 'text': text}],
94
  }
95
  formated_history["conversations"].append({"from": "assistant"})
96
- ref_embs = torch.tensor(ref_embs, dtype=torch.float32, device="cuda")
97
- ref_embs_mask = torch.tensor([1], device="cuda")
 
 
98
  out = eval_model(model, tokenizer, tokenizer_voila, model_type, "chat_tts", formated_history, ref_embs, ref_embs_mask, max_new_tokens=512)
99
  if 'audio' in out:
100
  wav, sr = out['audio']
 
26
 
27
  from infer import load_model, eval_model
28
  from spkr import SpeakerEmbedding
29
+
30
+ @spaces.GPU
31
+ def spkr_model_init():
32
+ spkr_model = SpeakerEmbedding(device="cpu")
33
+ return spkr_model
34
 
35
+ spkr_model = spkr_model_init()
36
+ spkr_model.to("cuda")
37
  model, tokenizer, tokenizer_voila, model_type = load_model("maitrix-org/Voila-chat", "maitrix-org/Voila-Tokenizer")
38
  default_ref_file = "examples/character_ref_emb_demo.pkl"
39
  default_ref_name = "Homer Simpson"
 
51
 
52
  def get_ref_embs(ref_audio):
53
  wav, sr = torchaudio.load(ref_audio)
 
54
  ref_embs = spkr_model(wav, sr).cpu()
 
55
  return ref_embs
56
 
57
  def delete_directory(request: gr.Request):
 
73
  }
74
  formated_history["conversations"].append({"from": "assistant"})
75
  print(formated_history)
76
+ ref_embs = torch.tensor(ref_embs, dtype=torch.float32, device="cpu")
77
+ ref_embs_mask = torch.tensor([1], device="cpu")
78
+ ref_embs.to("cuda")
79
+ ref_embs_mask.to("cuda")
80
  out = eval_model(model, tokenizer, tokenizer_voila, model_type, "chat_aiao", formated_history, ref_embs, ref_embs_mask, max_new_tokens=512)
81
  if 'audio' in out:
82
  wav, sr = out['audio']
 
99
  "conversations": [{'from': "user", 'text': text}],
100
  }
101
  formated_history["conversations"].append({"from": "assistant"})
102
+ ref_embs = torch.tensor(ref_embs, dtype=torch.float32, device="cpu")
103
+ ref_embs_mask = torch.tensor([1], device="cpu")
104
+ ref_embs.to("cuda")
105
+ ref_embs_mask.to("cuda")
106
  out = eval_model(model, tokenizer, tokenizer_voila, model_type, "chat_tts", formated_history, ref_embs, ref_embs_mask, max_new_tokens=512)
107
  if 'audio' in out:
108
  wav, sr = out['audio']