Spaces:

lym0302
/

DeepSound-V1

Running

lym0302 commited on Mar 24

Commit

46ab663

1 Parent(s): 146e231

device->cpu

Files changed (2) hide show

mmaudio/ext/rotary_embeddings.py CHANGED Viewed

@@ -16,7 +16,8 @@ def compute_rope_rotations(length: int,
                            device: Union[torch.device, str] = 'cpu') -> Tensor:
     assert dim % 2 == 0
-    with torch.amp.autocast(device_type='cuda', enabled=False):
         pos = torch.arange(length, dtype=torch.float32, device=device)
         freqs = 1.0 / (theta**(torch.arange(0, dim, 2, dtype=torch.float32, device=device) / dim))
         freqs *= freq_scaling
@@ -27,8 +28,9 @@ def compute_rope_rotations(length: int,
         return rot
-def apply_rope(x: Tensor, rot: Tensor) -> tuple[Tensor, Tensor]:
-    with torch.amp.autocast(device_type='cuda', enabled=False):
         _x = x.float()
         _x = _x.view(*_x.shape[:-1], -1, 1, 2)
         x_out = rot[..., 0] * _x[..., 0] + rot[..., 1] * _x[..., 1]

                            device: Union[torch.device, str] = 'cpu') -> Tensor:
     assert dim % 2 == 0
+    # with torch.amp.autocast(device_type='cuda', enabled=False):
+    with torch.amp.autocast(device_type=device, enabled=False):
         pos = torch.arange(length, dtype=torch.float32, device=device)
         freqs = 1.0 / (theta**(torch.arange(0, dim, 2, dtype=torch.float32, device=device) / dim))
         freqs *= freq_scaling
         return rot
+def apply_rope(x: Tensor, rot: Tensor, device: Union[torch.device, str] = 'cpu') -> tuple[Tensor, Tensor]:
+    # with torch.amp.autocast(device_type='cuda', enabled=False):
+    with torch.amp.autocast(device_type=device, enabled=False):
         _x = x.float()
         _x = _x.view(*_x.shape[:-1], -1, 1, 2)
         x_out = rot[..., 0] * _x[..., 0] + rot[..., 1] * _x[..., 1]

mmaudio/ext/synchformer/synchformer.py CHANGED Viewed

@@ -41,14 +41,14 @@ class Synchformer(nn.Module):
         return super().load_state_dict(sd, strict)
-if __name__ == "__main__":
-    model = Synchformer().cuda().eval()
-    sd = torch.load('./ext_weights/synchformer_state_dict.pth', weights_only=True)
-    model.load_state_dict(sd)
-    vid = torch.randn(2, 7, 16, 3, 224, 224).cuda()
-    features = model.extract_vfeats(vid, for_loop=False).detach().cpu()
-    print(features.shape)
     # extract and save the state dict only
     # sd = torch.load('./ext_weights/sync_model_audioset.pt')['model']

         return super().load_state_dict(sd, strict)
+# if __name__ == "__main__":
+#     model = Synchformer().cuda().eval()
+#     sd = torch.load('./ext_weights/synchformer_state_dict.pth', weights_only=True)
+#     model.load_state_dict(sd)
+#     vid = torch.randn(2, 7, 16, 3, 224, 224).cuda()
+#     features = model.extract_vfeats(vid, for_loop=False).detach().cpu()
+#     print(features.shape)
     # extract and save the state dict only
     # sd = torch.load('./ext_weights/sync_model_audioset.pt')['model']