Spaces:

justus-tobias
/

Moshi

Paused

justus-tobias commited on Sep 27, 2024

Commit

d8213a7

1 Parent(s): 2321d9d

added GPU

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,10 +11,10 @@ mimi_weight = hf_hub_download(loaders.DEFAULT_REPO, loaders.MIMI_NAME)
 moshi_weight = hf_hub_download(loaders.DEFAULT_REPO, loaders.MOSHI_NAME)
 def compute_codes(wav):
     """wav = torch.randn(1, 1, 24000 * 10)  # should be [B, C=1, T]"""
-    mimi = loaders.get_mimi(mimi_weight, device='cpu')
     mimi.set_num_codebooks(8)  # up to 32 for mimi, but limited to 8 for moshi.
     with torch.no_grad():
@@ -31,12 +31,16 @@ def compute_codes(wav):
 @spaces.GPU
-def process_wav_new(all_codes):
     """wav = torch.randn(1, 1, 24000 * 10)  # should be [B, C=1, T]"""
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     mimi = loaders.get_mimi(mimi_weight, device='cpu')
     mimi.set_num_codebooks(8)  # up to 32 for mimi, but limited to 8 for moshi.
     mimi.to(device)
     moshi = loaders.get_moshi_lm(moshi_weight, device='cpu')
     moshi.to(device)  # Move to GPU after loading
     lm_gen = LMGen(moshi, temp=0.8, temp_text=0.7)  # this handles sampling params etc.
@@ -109,7 +113,7 @@ def process_audio(audio, instream):
     print("WAV COMBINED")
     mimi_codes = compute_codes(combined_wav)
-    outwav = process_wav_new(mimi_codes)
     return gr.update(value=None), (24000, outwav.squeeze().cpu().numpy()), stream

 moshi_weight = hf_hub_download(loaders.DEFAULT_REPO, loaders.MOSHI_NAME)
+@spaces.GPU
 def compute_codes(wav):
     """wav = torch.randn(1, 1, 24000 * 10)  # should be [B, C=1, T]"""
+    mimi = loaders.get_mimi(mimi_weight)
     mimi.set_num_codebooks(8)  # up to 32 for mimi, but limited to 8 for moshi.
     with torch.no_grad():
 @spaces.GPU
+def generate_reponse(all_codes):
     """wav = torch.randn(1, 1, 24000 * 10)  # should be [B, C=1, T]"""
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Set up Mimi
     mimi = loaders.get_mimi(mimi_weight, device='cpu')
     mimi.set_num_codebooks(8)  # up to 32 for mimi, but limited to 8 for moshi.
     mimi.to(device)
+    # Set up Moshi/LM Gen
     moshi = loaders.get_moshi_lm(moshi_weight, device='cpu')
     moshi.to(device)  # Move to GPU after loading
     lm_gen = LMGen(moshi, temp=0.8, temp_text=0.7)  # this handles sampling params etc.
     print("WAV COMBINED")
     mimi_codes = compute_codes(combined_wav)
+    outwav = generate_reponse(mimi_codes)
     return gr.update(value=None), (24000, outwav.squeeze().cpu().numpy()), stream