justus-tobias commited on
Commit
d8213a7
·
1 Parent(s): 2321d9d
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -11,10 +11,10 @@ mimi_weight = hf_hub_download(loaders.DEFAULT_REPO, loaders.MIMI_NAME)
11
  moshi_weight = hf_hub_download(loaders.DEFAULT_REPO, loaders.MOSHI_NAME)
12
 
13
 
14
-
15
  def compute_codes(wav):
16
  """wav = torch.randn(1, 1, 24000 * 10) # should be [B, C=1, T]"""
17
- mimi = loaders.get_mimi(mimi_weight, device='cpu')
18
  mimi.set_num_codebooks(8) # up to 32 for mimi, but limited to 8 for moshi.
19
 
20
  with torch.no_grad():
@@ -31,12 +31,16 @@ def compute_codes(wav):
31
 
32
 
33
  @spaces.GPU
34
- def process_wav_new(all_codes):
35
  """wav = torch.randn(1, 1, 24000 * 10) # should be [B, C=1, T]"""
36
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
37
  mimi = loaders.get_mimi(mimi_weight, device='cpu')
38
  mimi.set_num_codebooks(8) # up to 32 for mimi, but limited to 8 for moshi.
39
  mimi.to(device)
 
 
40
  moshi = loaders.get_moshi_lm(moshi_weight, device='cpu')
41
  moshi.to(device) # Move to GPU after loading
42
  lm_gen = LMGen(moshi, temp=0.8, temp_text=0.7) # this handles sampling params etc.
@@ -109,7 +113,7 @@ def process_audio(audio, instream):
109
  print("WAV COMBINED")
110
 
111
  mimi_codes = compute_codes(combined_wav)
112
- outwav = process_wav_new(mimi_codes)
113
 
114
  return gr.update(value=None), (24000, outwav.squeeze().cpu().numpy()), stream
115
 
 
11
  moshi_weight = hf_hub_download(loaders.DEFAULT_REPO, loaders.MOSHI_NAME)
12
 
13
 
14
+ @spaces.GPU
15
  def compute_codes(wav):
16
  """wav = torch.randn(1, 1, 24000 * 10) # should be [B, C=1, T]"""
17
+ mimi = loaders.get_mimi(mimi_weight)
18
  mimi.set_num_codebooks(8) # up to 32 for mimi, but limited to 8 for moshi.
19
 
20
  with torch.no_grad():
 
31
 
32
 
33
  @spaces.GPU
34
+ def generate_reponse(all_codes):
35
  """wav = torch.randn(1, 1, 24000 * 10) # should be [B, C=1, T]"""
36
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
37
+
38
+ # Set up Mimi
39
  mimi = loaders.get_mimi(mimi_weight, device='cpu')
40
  mimi.set_num_codebooks(8) # up to 32 for mimi, but limited to 8 for moshi.
41
  mimi.to(device)
42
+
43
+ # Set up Moshi/LM Gen
44
  moshi = loaders.get_moshi_lm(moshi_weight, device='cpu')
45
  moshi.to(device) # Move to GPU after loading
46
  lm_gen = LMGen(moshi, temp=0.8, temp_text=0.7) # this handles sampling params etc.
 
113
  print("WAV COMBINED")
114
 
115
  mimi_codes = compute_codes(combined_wav)
116
+ outwav = generate_reponse(mimi_codes)
117
 
118
  return gr.update(value=None), (24000, outwav.squeeze().cpu().numpy()), stream
119