Spaces:
Paused
Paused
Commit
·
5b7c5f9
1
Parent(s):
8d2f8c3
tested error handling
Browse files
app.py
CHANGED
@@ -35,17 +35,21 @@ def process_wav_new(wav):
|
|
35 |
lm_gen = LMGen(moshi, temp=0.8, temp_text=0.7) # this handles sampling params etc.
|
36 |
|
37 |
out_wav_chunks = []
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
49 |
|
50 |
return torch.cat(out_wav_chunks, dim=-1)
|
51 |
|
|
|
35 |
lm_gen = LMGen(moshi, temp=0.8, temp_text=0.7) # this handles sampling params etc.
|
36 |
|
37 |
out_wav_chunks = []
|
38 |
+
|
39 |
+
try:
|
40 |
+
# Now we will stream over both Moshi I/O, and decode on the fly with Mimi.
|
41 |
+
with torch.no_grad(), lm_gen.streaming(1), mimi.streaming(1):
|
42 |
+
for idx, code in enumerate(all_codes):
|
43 |
+
# print("CODE: ", code.shape)
|
44 |
+
tokens_out = lm_gen.step(code.to(device))
|
45 |
+
# tokens_out is [B, 1 + 8, 1], with tokens_out[:, 1] representing the text token.
|
46 |
+
if tokens_out is not None:
|
47 |
+
wav_chunk = mimi.decode(tokens_out[:, 1:])
|
48 |
+
out_wav_chunks.append(wav_chunk)
|
49 |
+
print(idx, end='\r')
|
50 |
+
# out_wav = torch.cat(out_wav_chunks, dim=-1)
|
51 |
+
except gr.Error:
|
52 |
+
return torch.cat(out_wav_chunks, dim=-1)
|
53 |
|
54 |
return torch.cat(out_wav_chunks, dim=-1)
|
55 |
|