diff --git a/README.md b/README.md
index 6c2e823d2c11f8bd341796712252d0beceb93bfd..4d0561e803c34dc86855ced8fdaa54ca836ef8e2 100644
--- a/README.md
+++ b/README.md
@@ -2,9 +2,6 @@
 tags:
 - kernel
 ---
-
-![Status](https://hubwebhook.dholtz.com/shield?repo=kernels-community/activation)
-
 ## Activation
 
 Activation kernels from [vLLM](https://github.com/vllm-project/vllm/blob/main/csrc/activation_kernels.cu).
\ No newline at end of file
diff --git a/build.toml b/build.toml
index 8014f840f597230d5503d6cc9e6125318bd18052..7da9d632a70edb0699eb77f097b9b1a5ae573c48 100644
--- a/build.toml
+++ b/build.toml
@@ -8,8 +8,7 @@ src = [
 ]
 
 [kernel.activation]
-#language = "cuda-hipify"
-#rocm-archs = [ "gfx906", "gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", "gfx1030", "gfx1100", "gfx1101" ]
+cuda-capabilities = [ "7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0" ]
 src = [
   "activation/activation_kernels.cu",
   "activation/cuda_compat.h",
diff --git a/build/torch25-cxx11-cu118-x86_64-linux/activation/__init__.py b/build/torch25-cxx11-cu118-x86_64-linux/activation/__init__.py
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..71e0b01a03416c783f2bd67fc30d7ac86aee8764 100644
--- a/build/torch25-cxx11-cu118-x86_64-linux/activation/__init__.py
+++ b/build/torch25-cxx11-cu118-x86_64-linux/activation/__init__.py
@@ -1,8 +1,15 @@
 import torch
 
-from ._ops import ops
+try:
+    from ._ops import ops
+except ImportError as e:
+    # Fallback for local development.
+    try:
+        import _activation
 
-from . import layers
+        ops = torch.ops._activition
+    except ImportError:
+        raise e
 
 
 def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -38,15 +45,3 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
 def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_quick(out, x)
     return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch25-cxx11-cu118-x86_64-linux/activation/_activation_78448fa.abi3.so b/build/torch25-cxx11-cu118-x86_64-linux/activation/_activation_78448fa.abi3.so
deleted file mode 100755
index d3e4c45501c4bf4d1bb9c69451e5330263f00f53..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu118-x86_64-linux/activation/_activation_78448fa.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:323dbf69b89390fd46b207abc1314a4cbe27491e1bb9f026c840bc3bff43b7d3
-size 2447952
diff --git a/build/torch25-cxx11-cu118-x86_64-linux/activation/_activation_o63kkyjirmkf4.abi3.so b/build/torch25-cxx11-cu118-x86_64-linux/activation/_activation_o63kkyjirmkf4.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..1442eb73f43c4768d2b8f9b916943743256af106
--- /dev/null
+++ b/build/torch25-cxx11-cu118-x86_64-linux/activation/_activation_o63kkyjirmkf4.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d50cdabfbed1df74e921ac34ff00bca0555977b14ef8082ddae7b1f30985a494
+size 2370160
diff --git a/build/torch25-cxx11-cu118-x86_64-linux/activation/_ops.py b/build/torch25-cxx11-cu118-x86_64-linux/activation/_ops.py
index 9599b1a485532e5c16dfb1bb9228c701ad4260a6..0ee3c40cd38699ce7744be53b0531fe4fa505996 100644
--- a/build/torch25-cxx11-cu118-x86_64-linux/activation/_ops.py
+++ b/build/torch25-cxx11-cu118-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_78448fa
-ops = torch.ops._activation_78448fa
+from . import _activation_o63kkyjirmkf4
+ops = torch.ops._activation_o63kkyjirmkf4
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_78448fa::{op_name}"
\ No newline at end of file
+    return f"_activation_o63kkyjirmkf4::{op_name}"
\ No newline at end of file
diff --git a/build/torch25-cxx11-cu118-x86_64-linux/activation/layers.py b/build/torch25-cxx11-cu118-x86_64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu118-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch25-cxx11-cu121-x86_64-linux/activation/__init__.py b/build/torch25-cxx11-cu121-x86_64-linux/activation/__init__.py
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..71e0b01a03416c783f2bd67fc30d7ac86aee8764 100644
--- a/build/torch25-cxx11-cu121-x86_64-linux/activation/__init__.py
+++ b/build/torch25-cxx11-cu121-x86_64-linux/activation/__init__.py
@@ -1,8 +1,15 @@
 import torch
 
-from ._ops import ops
+try:
+    from ._ops import ops
+except ImportError as e:
+    # Fallback for local development.
+    try:
+        import _activation
 
-from . import layers
+        ops = torch.ops._activition
+    except ImportError:
+        raise e
 
 
 def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -38,15 +45,3 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
 def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_quick(out, x)
     return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch25-cxx11-cu121-x86_64-linux/activation/_activation_78448fa.abi3.so b/build/torch25-cxx11-cu121-x86_64-linux/activation/_activation_78448fa.abi3.so
deleted file mode 100755
index d25d6d4e382c656ac56b436cb0a4babe828b75aa..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu121-x86_64-linux/activation/_activation_78448fa.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6146ac6e77cbd458560bf67c46d93217833f2caf08260cc80a4aa62ba5645ee9
-size 2471056
diff --git a/build/torch25-cxx11-cu121-x86_64-linux/activation/_activation_vrl36m2ejer54.abi3.so b/build/torch25-cxx11-cu121-x86_64-linux/activation/_activation_vrl36m2ejer54.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..e6b6260c4378717d4369cd2577a00350960e317c
--- /dev/null
+++ b/build/torch25-cxx11-cu121-x86_64-linux/activation/_activation_vrl36m2ejer54.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bd0709ef09c8f0c18d1dc4a36c8096c59459bece61f5f5dbea95d1e73f54d44
+size 2393264
diff --git a/build/torch25-cxx11-cu121-x86_64-linux/activation/_ops.py b/build/torch25-cxx11-cu121-x86_64-linux/activation/_ops.py
index 9599b1a485532e5c16dfb1bb9228c701ad4260a6..cd5eee20b4c9a70e4f59e7198a0a28c5dfa06244 100644
--- a/build/torch25-cxx11-cu121-x86_64-linux/activation/_ops.py
+++ b/build/torch25-cxx11-cu121-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_78448fa
-ops = torch.ops._activation_78448fa
+from . import _activation_vrl36m2ejer54
+ops = torch.ops._activation_vrl36m2ejer54
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_78448fa::{op_name}"
\ No newline at end of file
+    return f"_activation_vrl36m2ejer54::{op_name}"
\ No newline at end of file
diff --git a/build/torch25-cxx11-cu121-x86_64-linux/activation/layers.py b/build/torch25-cxx11-cu121-x86_64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu121-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch25-cxx11-cu124-x86_64-linux/activation/__init__.py b/build/torch25-cxx11-cu124-x86_64-linux/activation/__init__.py
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..71e0b01a03416c783f2bd67fc30d7ac86aee8764 100644
--- a/build/torch25-cxx11-cu124-x86_64-linux/activation/__init__.py
+++ b/build/torch25-cxx11-cu124-x86_64-linux/activation/__init__.py
@@ -1,8 +1,15 @@
 import torch
 
-from ._ops import ops
+try:
+    from ._ops import ops
+except ImportError as e:
+    # Fallback for local development.
+    try:
+        import _activation
 
-from . import layers
+        ops = torch.ops._activition
+    except ImportError:
+        raise e
 
 
 def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -38,15 +45,3 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
 def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_quick(out, x)
     return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch25-cxx11-cu124-x86_64-linux/activation/_activation_78448fa.abi3.so b/build/torch25-cxx11-cu124-x86_64-linux/activation/_activation_78448fa.abi3.so
deleted file mode 100755
index b3e629bc4a0200b8cb36a2483ba17e085137ef0c..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu124-x86_64-linux/activation/_activation_78448fa.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:28eea3907055742f99bc9d7d4260add848adc2f6464e97029f37cd42a5c6bd0a
-size 2509832
diff --git a/build/torch25-cxx11-cu124-x86_64-linux/activation/_activation_va3moa75vw7c2.abi3.so b/build/torch25-cxx11-cu124-x86_64-linux/activation/_activation_va3moa75vw7c2.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..2bfbc3667e483ce8441b310889b63614133f6334
--- /dev/null
+++ b/build/torch25-cxx11-cu124-x86_64-linux/activation/_activation_va3moa75vw7c2.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8353447f64e7d2df1a6a341d9c53bced53abef267f079923ae774170d0d57c53
+size 2427936
diff --git a/build/torch25-cxx11-cu124-x86_64-linux/activation/_ops.py b/build/torch25-cxx11-cu124-x86_64-linux/activation/_ops.py
index 9599b1a485532e5c16dfb1bb9228c701ad4260a6..05fa036d769ea858865c92b972c68a158899eca1 100644
--- a/build/torch25-cxx11-cu124-x86_64-linux/activation/_ops.py
+++ b/build/torch25-cxx11-cu124-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_78448fa
-ops = torch.ops._activation_78448fa
+from . import _activation_va3moa75vw7c2
+ops = torch.ops._activation_va3moa75vw7c2
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_78448fa::{op_name}"
\ No newline at end of file
+    return f"_activation_va3moa75vw7c2::{op_name}"
\ No newline at end of file
diff --git a/build/torch25-cxx11-cu124-x86_64-linux/activation/layers.py b/build/torch25-cxx11-cu124-x86_64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch25-cxx11-cu124-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch25-cxx98-cu118-x86_64-linux/activation/__init__.py b/build/torch25-cxx98-cu118-x86_64-linux/activation/__init__.py
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..71e0b01a03416c783f2bd67fc30d7ac86aee8764 100644
--- a/build/torch25-cxx98-cu118-x86_64-linux/activation/__init__.py
+++ b/build/torch25-cxx98-cu118-x86_64-linux/activation/__init__.py
@@ -1,8 +1,15 @@
 import torch
 
-from ._ops import ops
+try:
+    from ._ops import ops
+except ImportError as e:
+    # Fallback for local development.
+    try:
+        import _activation
 
-from . import layers
+        ops = torch.ops._activition
+    except ImportError:
+        raise e
 
 
 def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -38,15 +45,3 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
 def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_quick(out, x)
     return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch25-cxx98-cu118-x86_64-linux/activation/_activation_78448fa.abi3.so b/build/torch25-cxx98-cu118-x86_64-linux/activation/_activation_78448fa.abi3.so
deleted file mode 100755
index 348e0c950321396fd29fbb0d64996501c804e43d..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu118-x86_64-linux/activation/_activation_78448fa.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d5609ad07903b98c83c297bfb64f0d944df5edfe1c611fee23ec6c8fbd952604
-size 2440392
diff --git a/build/torch25-cxx98-cu118-x86_64-linux/activation/_activation_qr3gs3eckeig4.abi3.so b/build/torch25-cxx98-cu118-x86_64-linux/activation/_activation_qr3gs3eckeig4.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..fc35d5f1bdad2b294897ace78c80c2913e8bd793
--- /dev/null
+++ b/build/torch25-cxx98-cu118-x86_64-linux/activation/_activation_qr3gs3eckeig4.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df184a6315118d787a1bd6b435cb45f1ca7828445a1f1c0e55c57645cfbba43a
+size 2362600
diff --git a/build/torch25-cxx98-cu118-x86_64-linux/activation/_ops.py b/build/torch25-cxx98-cu118-x86_64-linux/activation/_ops.py
index 9599b1a485532e5c16dfb1bb9228c701ad4260a6..11bf3a1e13ad4f53348d55ce0b8e727307534d67 100644
--- a/build/torch25-cxx98-cu118-x86_64-linux/activation/_ops.py
+++ b/build/torch25-cxx98-cu118-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_78448fa
-ops = torch.ops._activation_78448fa
+from . import _activation_qr3gs3eckeig4
+ops = torch.ops._activation_qr3gs3eckeig4
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_78448fa::{op_name}"
\ No newline at end of file
+    return f"_activation_qr3gs3eckeig4::{op_name}"
\ No newline at end of file
diff --git a/build/torch25-cxx98-cu118-x86_64-linux/activation/layers.py b/build/torch25-cxx98-cu118-x86_64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu118-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch25-cxx98-cu121-x86_64-linux/activation/__init__.py b/build/torch25-cxx98-cu121-x86_64-linux/activation/__init__.py
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..71e0b01a03416c783f2bd67fc30d7ac86aee8764 100644
--- a/build/torch25-cxx98-cu121-x86_64-linux/activation/__init__.py
+++ b/build/torch25-cxx98-cu121-x86_64-linux/activation/__init__.py
@@ -1,8 +1,15 @@
 import torch
 
-from ._ops import ops
+try:
+    from ._ops import ops
+except ImportError as e:
+    # Fallback for local development.
+    try:
+        import _activation
 
-from . import layers
+        ops = torch.ops._activition
+    except ImportError:
+        raise e
 
 
 def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -38,15 +45,3 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
 def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_quick(out, x)
     return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch25-cxx98-cu121-x86_64-linux/activation/_activation_78448fa.abi3.so b/build/torch25-cxx98-cu121-x86_64-linux/activation/_activation_78448fa.abi3.so
deleted file mode 100755
index 2d296b21c2864cc6292a53fbdf34aabc07f2ee89..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu121-x86_64-linux/activation/_activation_78448fa.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7e6475ed603ad2cb565bd19ad2554484bd6c00d0d3f02decff60f2285df2546f
-size 2463232
diff --git a/build/torch25-cxx98-cu121-x86_64-linux/activation/_activation_p7gbzt25w3zg2.abi3.so b/build/torch25-cxx98-cu121-x86_64-linux/activation/_activation_p7gbzt25w3zg2.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..e24099a45ad0e0ab481769c24c749947c164b07c
--- /dev/null
+++ b/build/torch25-cxx98-cu121-x86_64-linux/activation/_activation_p7gbzt25w3zg2.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ccb13cfc2e45cf483e8b9f77f1760f28b48bcf185508d51b32d45bc759c4e8bb
+size 2385440
diff --git a/build/torch25-cxx98-cu121-x86_64-linux/activation/_ops.py b/build/torch25-cxx98-cu121-x86_64-linux/activation/_ops.py
index 9599b1a485532e5c16dfb1bb9228c701ad4260a6..fbe888c4ec71536f08f7de2d823b75d9bbac0173 100644
--- a/build/torch25-cxx98-cu121-x86_64-linux/activation/_ops.py
+++ b/build/torch25-cxx98-cu121-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_78448fa
-ops = torch.ops._activation_78448fa
+from . import _activation_p7gbzt25w3zg2
+ops = torch.ops._activation_p7gbzt25w3zg2
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_78448fa::{op_name}"
\ No newline at end of file
+    return f"_activation_p7gbzt25w3zg2::{op_name}"
\ No newline at end of file
diff --git a/build/torch25-cxx98-cu121-x86_64-linux/activation/layers.py b/build/torch25-cxx98-cu121-x86_64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu121-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch25-cxx98-cu124-x86_64-linux/activation/__init__.py b/build/torch25-cxx98-cu124-x86_64-linux/activation/__init__.py
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..71e0b01a03416c783f2bd67fc30d7ac86aee8764 100644
--- a/build/torch25-cxx98-cu124-x86_64-linux/activation/__init__.py
+++ b/build/torch25-cxx98-cu124-x86_64-linux/activation/__init__.py
@@ -1,8 +1,15 @@
 import torch
 
-from ._ops import ops
+try:
+    from ._ops import ops
+except ImportError as e:
+    # Fallback for local development.
+    try:
+        import _activation
 
-from . import layers
+        ops = torch.ops._activition
+    except ImportError:
+        raise e
 
 
 def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -38,15 +45,3 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
 def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_quick(out, x)
     return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch25-cxx98-cu124-x86_64-linux/activation/_activation_78448fa.abi3.so b/build/torch25-cxx98-cu124-x86_64-linux/activation/_activation_78448fa.abi3.so
deleted file mode 100755
index d1d17f12ba992ef1267c24b25d05513453b44f8d..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu124-x86_64-linux/activation/_activation_78448fa.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a0767f6dba00c543d3cb77e2044bccd32ef569abc55b921231112c8a1ddfb187
-size 2502088
diff --git a/build/torch25-cxx98-cu124-x86_64-linux/activation/_activation_jg7yaigtn7wco.abi3.so b/build/torch25-cxx98-cu124-x86_64-linux/activation/_activation_jg7yaigtn7wco.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..dc1fd3cc2e28051b8a263a5baf287b794d741f41
--- /dev/null
+++ b/build/torch25-cxx98-cu124-x86_64-linux/activation/_activation_jg7yaigtn7wco.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f8048853e8cb06e8574a9a9497800d2be438f7989d79f44dcf2e0ced38a75a9
+size 2420192
diff --git a/build/torch25-cxx98-cu124-x86_64-linux/activation/_ops.py b/build/torch25-cxx98-cu124-x86_64-linux/activation/_ops.py
index 9599b1a485532e5c16dfb1bb9228c701ad4260a6..0c022ad4f749236320938c6ae13079a445e860ba 100644
--- a/build/torch25-cxx98-cu124-x86_64-linux/activation/_ops.py
+++ b/build/torch25-cxx98-cu124-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_78448fa
-ops = torch.ops._activation_78448fa
+from . import _activation_jg7yaigtn7wco
+ops = torch.ops._activation_jg7yaigtn7wco
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_78448fa::{op_name}"
\ No newline at end of file
+    return f"_activation_jg7yaigtn7wco::{op_name}"
\ No newline at end of file
diff --git a/build/torch25-cxx98-cu124-x86_64-linux/activation/layers.py b/build/torch25-cxx98-cu124-x86_64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch25-cxx98-cu124-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py b/build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..71e0b01a03416c783f2bd67fc30d7ac86aee8764 100644
--- a/build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py
+++ b/build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py
@@ -1,8 +1,15 @@
 import torch
 
-from ._ops import ops
+try:
+    from ._ops import ops
+except ImportError as e:
+    # Fallback for local development.
+    try:
+        import _activation
 
-from . import layers
+        ops = torch.ops._activition
+    except ImportError:
+        raise e
 
 
 def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -38,15 +45,3 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
 def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_quick(out, x)
     return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/activation/_activation_78448fa.abi3.so b/build/torch26-cxx11-cu118-x86_64-linux/activation/_activation_78448fa.abi3.so
deleted file mode 100755
index 3ce2339b2c3a1a531db79f5667dc40c514ab5241..0000000000000000000000000000000000000000
--- a/build/torch26-cxx11-cu118-x86_64-linux/activation/_activation_78448fa.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e0c04d860454cc565113a3c93ff755fe9cbba0578c4604b89ad89e47c2503932
-size 2448056
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/activation/_activation_ncisyrun7guwk.abi3.so b/build/torch26-cxx11-cu118-x86_64-linux/activation/_activation_ncisyrun7guwk.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..951cd024fc65dcaf31f0ce123d7c56538db90255
--- /dev/null
+++ b/build/torch26-cxx11-cu118-x86_64-linux/activation/_activation_ncisyrun7guwk.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cde5439e78ba0e1aaa1937d798b214b46d38cbab8e4384b93a22239fed1a4dd4
+size 2370264
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py b/build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py
index 9599b1a485532e5c16dfb1bb9228c701ad4260a6..f4538ecbd1302013d2026d413f07fefa1e3ed1ba 100644
--- a/build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py
+++ b/build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_78448fa
-ops = torch.ops._activation_78448fa
+from . import _activation_ncisyrun7guwk
+ops = torch.ops._activation_ncisyrun7guwk
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_78448fa::{op_name}"
\ No newline at end of file
+    return f"_activation_ncisyrun7guwk::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py b/build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py b/build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..71e0b01a03416c783f2bd67fc30d7ac86aee8764 100644
--- a/build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py
+++ b/build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py
@@ -1,8 +1,15 @@
 import torch
 
-from ._ops import ops
+try:
+    from ._ops import ops
+except ImportError as e:
+    # Fallback for local development.
+    try:
+        import _activation
 
-from . import layers
+        ops = torch.ops._activition
+    except ImportError:
+        raise e
 
 
 def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -38,15 +45,3 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
 def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_quick(out, x)
     return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/activation/_activation_78448fa.abi3.so b/build/torch26-cxx11-cu124-x86_64-linux/activation/_activation_78448fa.abi3.so
deleted file mode 100755
index bf4346f6bcf6cfe9721c2f5facae07130c46de7d..0000000000000000000000000000000000000000
--- a/build/torch26-cxx11-cu124-x86_64-linux/activation/_activation_78448fa.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:48d7b0d190af1dd0366dbaeb0690b9c7cd1dfdc9aeda9b0b23bce56c70f5cbae
-size 2509928
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/activation/_activation_ochhfvlnc3vyc.abi3.so b/build/torch26-cxx11-cu124-x86_64-linux/activation/_activation_ochhfvlnc3vyc.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..7cc13c8b18b95f7a24b708e41f20daa15cf8e5ba
--- /dev/null
+++ b/build/torch26-cxx11-cu124-x86_64-linux/activation/_activation_ochhfvlnc3vyc.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6bd20d411c51fc8729b15cab6a60c5c9185222474aa035489e1bff299d76682
+size 2428040
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py b/build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py
index 9599b1a485532e5c16dfb1bb9228c701ad4260a6..fc135b9b87ed568acd3b7ae002760780202297ab 100644
--- a/build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py
+++ b/build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_78448fa
-ops = torch.ops._activation_78448fa
+from . import _activation_ochhfvlnc3vyc
+ops = torch.ops._activation_ochhfvlnc3vyc
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_78448fa::{op_name}"
\ No newline at end of file
+    return f"_activation_ochhfvlnc3vyc::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py b/build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch26-cxx11-cu126-aarch64-linux/activation/__init__.py b/build/torch26-cxx11-cu126-aarch64-linux/activation/__init__.py
deleted file mode 100644
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..0000000000000000000000000000000000000000
--- a/build/torch26-cxx11-cu126-aarch64-linux/activation/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import torch
-
-from ._ops import ops
-
-from . import layers
-
-
-def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.silu_and_mul(out, x)
-    return out
-
-
-def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_and_mul(out, x)
-    return out
-
-
-def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_tanh_and_mul(out, x)
-    return out
-
-
-def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None:
-    ops.fatrelu_and_mul(out, x, threshold)
-    return out
-
-
-def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_fast(out, x)
-    return out
-
-
-def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_new(out, x)
-    return out
-
-
-def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_quick(out, x)
-    return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch26-cxx11-cu126-aarch64-linux/activation/_activation_bbdc1b4_dirty.abi3.so b/build/torch26-cxx11-cu126-aarch64-linux/activation/_activation_bbdc1b4_dirty.abi3.so
deleted file mode 100755
index 9b07e72a35a8215840e52e262d1593822cd2b869..0000000000000000000000000000000000000000
--- a/build/torch26-cxx11-cu126-aarch64-linux/activation/_activation_bbdc1b4_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:558e4499ad3c09d02633488cfdc802a228b78a8cd51d963c92239d44744298c7
-size 2631936
diff --git a/build/torch26-cxx11-cu126-aarch64-linux/activation/_ops.py b/build/torch26-cxx11-cu126-aarch64-linux/activation/_ops.py
deleted file mode 100644
index 6244e4a74379142bc0652a345e44f788bbf5308d..0000000000000000000000000000000000000000
--- a/build/torch26-cxx11-cu126-aarch64-linux/activation/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _activation_bbdc1b4_dirty
-ops = torch.ops._activation_bbdc1b4_dirty
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_activation_bbdc1b4_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx11-cu126-aarch64-linux/activation/layers.py b/build/torch26-cxx11-cu126-aarch64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch26-cxx11-cu126-aarch64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..71e0b01a03416c783f2bd67fc30d7ac86aee8764 100644
--- a/build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py
+++ b/build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py
@@ -1,8 +1,15 @@
 import torch
 
-from ._ops import ops
+try:
+    from ._ops import ops
+except ImportError as e:
+    # Fallback for local development.
+    try:
+        import _activation
 
-from . import layers
+        ops = torch.ops._activition
+    except ImportError:
+        raise e
 
 
 def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -38,15 +45,3 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
 def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_quick(out, x)
     return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_78448fa.abi3.so b/build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_78448fa.abi3.so
deleted file mode 100755
index 9ce2fd90f00d4e060a60a9ac438ad3cca0d91112..0000000000000000000000000000000000000000
--- a/build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_78448fa.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:11a11d0f4119edc5c637bab04ebd5669750a0e4f4000f58ab1bf5be2d8d9ab0b
-size 2518568
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_u6vnqubnicksq.abi3.so b/build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_u6vnqubnicksq.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..ed72afe1b709df6b64ae4daf96dfacf5397334e1
--- /dev/null
+++ b/build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_u6vnqubnicksq.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41c18b20c2bf8c49d2d3088a9bc1aad4293df0b57eafc9b141a9e8e595fe551a
+size 2436672
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py
index 9599b1a485532e5c16dfb1bb9228c701ad4260a6..03feb54a67ee96a3181145a654e9c1d3432d3c83 100644
--- a/build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py
+++ b/build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_78448fa
-ops = torch.ops._activation_78448fa
+from . import _activation_u6vnqubnicksq
+ops = torch.ops._activation_u6vnqubnicksq
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_78448fa::{op_name}"
\ No newline at end of file
+    return f"_activation_u6vnqubnicksq::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py b/build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py b/build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..71e0b01a03416c783f2bd67fc30d7ac86aee8764 100644
--- a/build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py
+++ b/build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py
@@ -1,8 +1,15 @@
 import torch
 
-from ._ops import ops
+try:
+    from ._ops import ops
+except ImportError as e:
+    # Fallback for local development.
+    try:
+        import _activation
 
-from . import layers
+        ops = torch.ops._activition
+    except ImportError:
+        raise e
 
 
 def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -38,15 +45,3 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
 def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_quick(out, x)
     return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_2vn6ty3gfqfb6.abi3.so b/build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_2vn6ty3gfqfb6.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..d9c4fba021babdd0966b329da77216730a212c19
--- /dev/null
+++ b/build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_2vn6ty3gfqfb6.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfbcd5da358cd5cb7982d19c8880cf4db6f08b46622a7a953f755ad59e4e1492
+size 2362752
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_78448fa.abi3.so b/build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_78448fa.abi3.so
deleted file mode 100755
index e5810575cad84c728e3e7e44091d9a28467d76c6..0000000000000000000000000000000000000000
--- a/build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_78448fa.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:56dcc985761e309cbef3fc2a201f26e800583128d6e5a3fc1b23800fb0b8b48c
-size 2440544
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py b/build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py
index 9599b1a485532e5c16dfb1bb9228c701ad4260a6..8ec67ec6be213233dc83cb83dcd9e3d8cade5a98 100644
--- a/build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py
+++ b/build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_78448fa
-ops = torch.ops._activation_78448fa
+from . import _activation_2vn6ty3gfqfb6
+ops = torch.ops._activation_2vn6ty3gfqfb6
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_78448fa::{op_name}"
\ No newline at end of file
+    return f"_activation_2vn6ty3gfqfb6::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py b/build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py b/build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..71e0b01a03416c783f2bd67fc30d7ac86aee8764 100644
--- a/build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py
+++ b/build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py
@@ -1,8 +1,15 @@
 import torch
 
-from ._ops import ops
+try:
+    from ._ops import ops
+except ImportError as e:
+    # Fallback for local development.
+    try:
+        import _activation
 
-from . import layers
+        ops = torch.ops._activition
+    except ImportError:
+        raise e
 
 
 def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -38,15 +45,3 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
 def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_quick(out, x)
     return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_78448fa.abi3.so b/build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_78448fa.abi3.so
deleted file mode 100755
index a61e6f40574131ce5866efe651db62af196eebe2..0000000000000000000000000000000000000000
--- a/build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_78448fa.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:03c5f08322796d0736024412babe5d7f13bb1126387976ae12a80485a40d3883
-size 2502240
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_myvteedxdpqc6.abi3.so b/build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_myvteedxdpqc6.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..486879e6269e0c7d0763ac11d82a100dec1b1b91
--- /dev/null
+++ b/build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_myvteedxdpqc6.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1bc928823117c800904bcd3492bf1a0c65a32f6d8a842dc039f55e29831ab49
+size 2420344
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py b/build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py
index 9599b1a485532e5c16dfb1bb9228c701ad4260a6..0f4e8d31b42ed7be77b8eaef9aa29251327009bf 100644
--- a/build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py
+++ b/build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_78448fa
-ops = torch.ops._activation_78448fa
+from . import _activation_myvteedxdpqc6
+ops = torch.ops._activation_myvteedxdpqc6
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_78448fa::{op_name}"
\ No newline at end of file
+    return f"_activation_myvteedxdpqc6::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py b/build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch26-cxx98-cu126-aarch64-linux/activation/__init__.py b/build/torch26-cxx98-cu126-aarch64-linux/activation/__init__.py
deleted file mode 100644
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..0000000000000000000000000000000000000000
--- a/build/torch26-cxx98-cu126-aarch64-linux/activation/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import torch
-
-from ._ops import ops
-
-from . import layers
-
-
-def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.silu_and_mul(out, x)
-    return out
-
-
-def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_and_mul(out, x)
-    return out
-
-
-def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_tanh_and_mul(out, x)
-    return out
-
-
-def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None:
-    ops.fatrelu_and_mul(out, x, threshold)
-    return out
-
-
-def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_fast(out, x)
-    return out
-
-
-def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_new(out, x)
-    return out
-
-
-def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_quick(out, x)
-    return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch26-cxx98-cu126-aarch64-linux/activation/_activation_bbdc1b4_dirty.abi3.so b/build/torch26-cxx98-cu126-aarch64-linux/activation/_activation_bbdc1b4_dirty.abi3.so
deleted file mode 100755
index b19813010320a7b7df823587e9cf0b78a0c7f760..0000000000000000000000000000000000000000
--- a/build/torch26-cxx98-cu126-aarch64-linux/activation/_activation_bbdc1b4_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f6afd50526ff4221cddd52cb947900cdf6bb95ad0a6bffcd1a86bda4d3f52349
-size 2628128
diff --git a/build/torch26-cxx98-cu126-aarch64-linux/activation/_ops.py b/build/torch26-cxx98-cu126-aarch64-linux/activation/_ops.py
deleted file mode 100644
index 6244e4a74379142bc0652a345e44f788bbf5308d..0000000000000000000000000000000000000000
--- a/build/torch26-cxx98-cu126-aarch64-linux/activation/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _activation_bbdc1b4_dirty
-ops = torch.ops._activation_bbdc1b4_dirty
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_activation_bbdc1b4_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx98-cu126-aarch64-linux/activation/layers.py b/build/torch26-cxx98-cu126-aarch64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch26-cxx98-cu126-aarch64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py b/build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..71e0b01a03416c783f2bd67fc30d7ac86aee8764 100644
--- a/build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py
+++ b/build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py
@@ -1,8 +1,15 @@
 import torch
 
-from ._ops import ops
+try:
+    from ._ops import ops
+except ImportError as e:
+    # Fallback for local development.
+    try:
+        import _activation
 
-from . import layers
+        ops = torch.ops._activition
+    except ImportError:
+        raise e
 
 
 def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -38,15 +45,3 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
 def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_quick(out, x)
     return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_78448fa.abi3.so b/build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_78448fa.abi3.so
deleted file mode 100755
index 5967a78d0929f911fb8a64d92bf379f6258edc47..0000000000000000000000000000000000000000
--- a/build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_78448fa.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f6eae5c895c564fbd2524ce488f4e91e65dc63402cd41a8bc74474b7437b2e62
-size 2506784
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_rbswus6emrhm2.abi3.so b/build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_rbswus6emrhm2.abi3.so
new file mode 100755
index 0000000000000000000000000000000000000000..42ad3291140b013999afb683ec84c23804759e46
--- /dev/null
+++ b/build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_rbswus6emrhm2.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:474727e434a9cd4ec984a6da7124992ead4ca0fefce9581d0fd503e36c065aed
+size 2424888
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py b/build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py
index 9599b1a485532e5c16dfb1bb9228c701ad4260a6..c6d4e4c91a867d657f287510c40366bccef86c94 100644
--- a/build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py
+++ b/build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_78448fa
-ops = torch.ops._activation_78448fa
+from . import _activation_rbswus6emrhm2
+ops = torch.ops._activation_rbswus6emrhm2
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_78448fa::{op_name}"
\ No newline at end of file
+    return f"_activation_rbswus6emrhm2::{op_name}"
\ No newline at end of file
diff --git a/build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py b/build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py
deleted file mode 100644
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import torch
-
-from ._ops import ops
-
-from . import layers
-
-
-def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.silu_and_mul(out, x)
-    return out
-
-
-def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_and_mul(out, x)
-    return out
-
-
-def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_tanh_and_mul(out, x)
-    return out
-
-
-def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None:
-    ops.fatrelu_and_mul(out, x, threshold)
-    return out
-
-
-def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_fast(out, x)
-    return out
-
-
-def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_new(out, x)
-    return out
-
-
-def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_quick(out, x)
-    return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_78448fa.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_78448fa.abi3.so
deleted file mode 100755
index 0b9f449b2c357ba217eefceca0f50ffc270df387..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_78448fa.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f8086b2d9e0f2db80385b83e0bc28f8d158725d002e1613e1a46a87732197e9f
-size 2448152
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py
deleted file mode 100644
index 9599b1a485532e5c16dfb1bb9228c701ad4260a6..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _activation_78448fa
-ops = torch.ops._activation_78448fa
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_activation_78448fa::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch27-cxx11-cu126-aarch64-linux/activation/__init__.py b/build/torch27-cxx11-cu126-aarch64-linux/activation/__init__.py
deleted file mode 100644
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu126-aarch64-linux/activation/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import torch
-
-from ._ops import ops
-
-from . import layers
-
-
-def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.silu_and_mul(out, x)
-    return out
-
-
-def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_and_mul(out, x)
-    return out
-
-
-def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_tanh_and_mul(out, x)
-    return out
-
-
-def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None:
-    ops.fatrelu_and_mul(out, x, threshold)
-    return out
-
-
-def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_fast(out, x)
-    return out
-
-
-def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_new(out, x)
-    return out
-
-
-def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_quick(out, x)
-    return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch27-cxx11-cu126-aarch64-linux/activation/_activation_bbdc1b4_dirty.abi3.so b/build/torch27-cxx11-cu126-aarch64-linux/activation/_activation_bbdc1b4_dirty.abi3.so
deleted file mode 100755
index a26b59f33b9ede8cc6088d362932a3a950705ef9..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu126-aarch64-linux/activation/_activation_bbdc1b4_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4210a6598f00b8921ecba1a0e24603eb05437a876ca1f473d2641e11d9a67ece
-size 2632160
diff --git a/build/torch27-cxx11-cu126-aarch64-linux/activation/_ops.py b/build/torch27-cxx11-cu126-aarch64-linux/activation/_ops.py
deleted file mode 100644
index 6244e4a74379142bc0652a345e44f788bbf5308d..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu126-aarch64-linux/activation/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _activation_bbdc1b4_dirty
-ops = torch.ops._activation_bbdc1b4_dirty
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_activation_bbdc1b4_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu126-aarch64-linux/activation/layers.py b/build/torch27-cxx11-cu126-aarch64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu126-aarch64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py
deleted file mode 100644
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import torch
-
-from ._ops import ops
-
-from . import layers
-
-
-def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.silu_and_mul(out, x)
-    return out
-
-
-def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_and_mul(out, x)
-    return out
-
-
-def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_tanh_and_mul(out, x)
-    return out
-
-
-def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None:
-    ops.fatrelu_and_mul(out, x, threshold)
-    return out
-
-
-def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_fast(out, x)
-    return out
-
-
-def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_new(out, x)
-    return out
-
-
-def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_quick(out, x)
-    return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_78448fa.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_78448fa.abi3.so
deleted file mode 100755
index 565f878ddcee0b2d24c30526b0975f5195e2806a..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_78448fa.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:22ed530294eb70c8261e581615bd9da0d2dc1ba8c3f0dcc3696cff9be62580cb
-size 2518600
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py
deleted file mode 100644
index 9599b1a485532e5c16dfb1bb9228c701ad4260a6..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _activation_78448fa
-ops = torch.ops._activation_78448fa
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_activation_78448fa::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch27-cxx11-cu128-aarch64-linux/activation/__init__.py b/build/torch27-cxx11-cu128-aarch64-linux/activation/__init__.py
deleted file mode 100644
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu128-aarch64-linux/activation/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import torch
-
-from ._ops import ops
-
-from . import layers
-
-
-def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.silu_and_mul(out, x)
-    return out
-
-
-def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_and_mul(out, x)
-    return out
-
-
-def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_tanh_and_mul(out, x)
-    return out
-
-
-def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None:
-    ops.fatrelu_and_mul(out, x, threshold)
-    return out
-
-
-def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_fast(out, x)
-    return out
-
-
-def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_new(out, x)
-    return out
-
-
-def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_quick(out, x)
-    return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch27-cxx11-cu128-aarch64-linux/activation/_activation_bbdc1b4_dirty.abi3.so b/build/torch27-cxx11-cu128-aarch64-linux/activation/_activation_bbdc1b4_dirty.abi3.so
deleted file mode 100755
index df74edb6da28103fb058acf976502b52d10294c8..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu128-aarch64-linux/activation/_activation_bbdc1b4_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d669f5fb8675b5a8f7511a16c6c61148d3701169bf5c5469159b12001eacfbac
-size 3418712
diff --git a/build/torch27-cxx11-cu128-aarch64-linux/activation/_ops.py b/build/torch27-cxx11-cu128-aarch64-linux/activation/_ops.py
deleted file mode 100644
index 6244e4a74379142bc0652a345e44f788bbf5308d..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu128-aarch64-linux/activation/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _activation_bbdc1b4_dirty
-ops = torch.ops._activation_bbdc1b4_dirty
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_activation_bbdc1b4_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu128-aarch64-linux/activation/layers.py b/build/torch27-cxx11-cu128-aarch64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu128-aarch64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py
deleted file mode 100644
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import torch
-
-from ._ops import ops
-
-from . import layers
-
-
-def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.silu_and_mul(out, x)
-    return out
-
-
-def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_and_mul(out, x)
-    return out
-
-
-def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_tanh_and_mul(out, x)
-    return out
-
-
-def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None:
-    ops.fatrelu_and_mul(out, x, threshold)
-    return out
-
-
-def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_fast(out, x)
-    return out
-
-
-def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_new(out, x)
-    return out
-
-
-def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
-    ops.gelu_quick(out, x)
-    return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_78448fa.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_78448fa.abi3.so
deleted file mode 100755
index 682fe4248f9bfea5d25d30b6f4cbcd7590a5c3d3..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_78448fa.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5d168f3ecfc9539e9a2f0af0a5f533bd958682efd1cc5bd716a964d8f1b6f679
-size 3331432
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py
deleted file mode 100644
index 9599b1a485532e5c16dfb1bb9228c701ad4260a6..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _activation_78448fa
-ops = torch.ops._activation_78448fa
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_activation_78448fa::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out
diff --git a/flake.lock b/flake.lock
deleted file mode 100644
index ec3949cdc453e65c36919cc70ff2d8eeff85797f..0000000000000000000000000000000000000000
--- a/flake.lock
+++ /dev/null
@@ -1,117 +0,0 @@
-{
-  "nodes": {
-    "flake-compat": {
-      "locked": {
-        "lastModified": 1733328505,
-        "narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=",
-        "owner": "edolstra",
-        "repo": "flake-compat",
-        "rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec",
-        "type": "github"
-      },
-      "original": {
-        "owner": "edolstra",
-        "repo": "flake-compat",
-        "type": "github"
-      }
-    },
-    "flake-utils": {
-      "inputs": {
-        "systems": "systems"
-      },
-      "locked": {
-        "lastModified": 1731533236,
-        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
-        "type": "github"
-      },
-      "original": {
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "type": "github"
-      }
-    },
-    "kernel-builder": {
-      "inputs": {
-        "flake-compat": "flake-compat",
-        "flake-utils": "flake-utils",
-        "nixpkgs": "nixpkgs",
-        "rocm-nix": "rocm-nix"
-      },
-      "locked": {
-        "lastModified": 1744976941,
-        "narHash": "sha256-+csrhVaT6Mj2j1FM7P2BDITvf1Xwj2AKdMm0IKZK340=",
-        "owner": "huggingface",
-        "repo": "kernel-builder",
-        "rev": "0a278c2e9aaf6003a4ec6fe35c7158624762de5a",
-        "type": "github"
-      },
-      "original": {
-        "owner": "huggingface",
-        "repo": "kernel-builder",
-        "type": "github"
-      }
-    },
-    "nixpkgs": {
-      "locked": {
-        "lastModified": 1743559129,
-        "narHash": "sha256-7gpAWsENV3tY2HmeHYQ2MoQxGpys+jQWnkS/BHAMXVk=",
-        "owner": "nixos",
-        "repo": "nixpkgs",
-        "rev": "adae22bea8bcc0aa2fd6e8732044660fb7755f5e",
-        "type": "github"
-      },
-      "original": {
-        "owner": "nixos",
-        "ref": "nixos-unstable-small",
-        "repo": "nixpkgs",
-        "type": "github"
-      }
-    },
-    "rocm-nix": {
-      "inputs": {
-        "nixpkgs": [
-          "kernel-builder",
-          "nixpkgs"
-        ]
-      },
-      "locked": {
-        "lastModified": 1743085847,
-        "narHash": "sha256-uWG29p+nhZmGRV1LffWwRGjwtPIXeu1F0YTQbXgB+GU=",
-        "owner": "huggingface",
-        "repo": "rocm-nix",
-        "rev": "245cdc9bfb4bfafa818711c5f5e0b889afe1ba39",
-        "type": "github"
-      },
-      "original": {
-        "owner": "huggingface",
-        "repo": "rocm-nix",
-        "type": "github"
-      }
-    },
-    "root": {
-      "inputs": {
-        "kernel-builder": "kernel-builder"
-      }
-    },
-    "systems": {
-      "locked": {
-        "lastModified": 1681028828,
-        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
-        "owner": "nix-systems",
-        "repo": "default",
-        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
-        "type": "github"
-      },
-      "original": {
-        "owner": "nix-systems",
-        "repo": "default",
-        "type": "github"
-      }
-    }
-  },
-  "root": "root",
-  "version": 7
-}
diff --git a/flake.nix b/flake.nix
index 54ac44c0698d43fb86a123430f5e9d2e9bcda8ea..f2e6b1f85c809079eb671a60228bbab307616f1a 100644
--- a/flake.nix
+++ b/flake.nix
@@ -2,7 +2,7 @@
   description = "Flake for activation kernels";
 
   inputs = {
-    kernel-builder.url = "github:huggingface/kernel-builder";
+    kernel-builder.url = "git+ssh://git@github.com/huggingface/kernel-builder";
   };
 
   outputs =
@@ -10,8 +10,5 @@
       self,
       kernel-builder,
     }:
-    kernel-builder.lib.genFlakeOutputs {
-      path = ./.;
-      rev = self.shortRev or self.dirtyShortRev or self.lastModifiedDate;
-    };
+    kernel-builder.lib.genFlakeOutputs ./.;
 }
diff --git a/tests/kernels/test_activation.py b/tests/kernels/test_activation.py
index 5d6aa773c9abcb5d3c0d61646e465aae9951966d..2f67a94f73db9ac8ed8d0c6a4b642702284ced9c 100644
--- a/tests/kernels/test_activation.py
+++ b/tests/kernels/test_activation.py
@@ -71,34 +71,28 @@ def test_act_and_mul(
         torch_fn = silu_and_mul
         fn = activation.silu_and_mul
         op = activation.ops.silu_and_mul
-        layer = activation.layers.SiluAndMul()
     elif activation_name == "gelu":
         torch_fn = lambda x: gelu_and_mul(x, "none")
         fn = activation.gelu_and_mul
         op = activation.ops.gelu_and_mul
-        layer = activation.layers.GeluAndMul()
     elif activation_name == "gelu_tanh":
         torch_fn = lambda x: gelu_and_mul(x, "tanh")
         fn = activation.gelu_tanh_and_mul
         op = activation.ops.gelu_tanh_and_mul
-        layer = activation.layers.GeluTanhAndMul()
     elif activation_name == "fatrelu":
         threshold = random.uniform(0, 1)
         torch_fn = lambda x: fatrelu_and_mul(x, threshold)
         fn = lambda out, x: activation.fatrelu_and_mul(out, x, threshold)
         op = activation.ops.fatrelu_and_mul
-        layer = activation.layers.FatreluAndMul(threshold)
 
     out_shape = x.shape[:-1] + (x.shape[-1] // 2,)
     out = torch.empty(out_shape, dtype=x.dtype, device=x.device)
     out = fn(out, x)
-    mod_out = layer(x)
     ref_out = torch_fn(x)
 
     # The SiLU, GELU and FatReLU implementations are equivalent to the native
     # PyTorch implementations, so we can do exact comparison.
     torch.testing.assert_close(out, ref_out, atol=0.0, rtol=0.0)
-    torch.testing.assert_close(mod_out, ref_out, atol=0.0, rtol=0.0)
 
     d = x.shape[-1] // 2
     output_shape = x.shape[:-1] + (d,)
@@ -112,24 +106,9 @@ def test_act_and_mul(
 @pytest.mark.parametrize(
     "activation_fns",
     [
-        (
-            gelu_fast,
-            activation.gelu_fast,
-            activation.ops.gelu_fast,
-            activation.layers.FastGELU,
-        ),
-        (
-            gelu_new,
-            activation.gelu_new,
-            activation.ops.gelu_new,
-            activation.layers.NewGELU,
-        ),
-        (
-            gelu_quick,
-            activation.gelu_quick,
-            activation.ops.gelu_quick,
-            activation.layers.QuickGELU,
-        ),
+        (gelu_fast, activation.gelu_fast, activation.ops.gelu_fast),
+        (gelu_new, activation.gelu_new, activation.ops.gelu_new),
+        (gelu_quick, activation.gelu_quick, activation.ops.gelu_quick),
     ],
 )
 @pytest.mark.parametrize("num_tokens", NUM_TOKENS)
@@ -149,17 +128,12 @@ def test_activation(
     torch.manual_seed(seed)
     torch.set_default_device(device)
     x = torch.randn(num_tokens, d, dtype=dtype)
-    torch_fn, fn, op, cls = activation_fns
-    layer = cls()
+    torch_fn, fn, op = activation_fns
     out = fn(torch.empty_like(x), x)
-    layer_out = layer(x)
     ref_out = torch_fn(x)
     torch.testing.assert_close(
         out, ref_out, atol=get_default_atol(out), rtol=get_default_rtol(out)
     )
-    torch.testing.assert_close(
-        out, layer_out, atol=get_default_atol(out), rtol=get_default_rtol(out)
-    )
 
     out = torch.empty_like(x)
     opcheck(op, (out, x))
diff --git a/torch-ext/activation/__init__.py b/torch-ext/activation/__init__.py
index ddb37490dad9d8ffcbeb13ed06b33f03fef8ed78..71e0b01a03416c783f2bd67fc30d7ac86aee8764 100644
--- a/torch-ext/activation/__init__.py
+++ b/torch-ext/activation/__init__.py
@@ -1,8 +1,15 @@
 import torch
 
-from ._ops import ops
+try:
+    from ._ops import ops
+except ImportError as e:
+    # Fallback for local development.
+    try:
+        import _activation
 
-from . import layers
+        ops = torch.ops._activition
+    except ImportError:
+        raise e
 
 
 def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -38,15 +45,3 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
 def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_quick(out, x)
     return out
-
-
-__all__ = [
-    "silu_and_mul",
-    "gelu_and_mul",
-    "gelu_tanh_and_mul",
-    "fatrelu_and_mul",
-    "gelu_fast",
-    "gelu_new",
-    "gelu_quick",
-    "layers",
-]
diff --git a/torch-ext/activation/layers.py b/torch-ext/activation/layers.py
deleted file mode 100644
index 99c129e3b1c9ed4c18166d5b5d67eb08f137a27f..0000000000000000000000000000000000000000
--- a/torch-ext/activation/layers.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class SiluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.silu_and_mul(out, x)
-        return out
-
-
-class GeluAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
-        return out
-
-
-class GeluTanhAndMul(nn.Module):
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_tanh_and_mul(out, x)
-        return out
-
-
-class FatreluAndMul(nn.Module):
-    def __init__(self, threshold: float = 0.0):
-        super().__init__()
-        self.threshold = threshold
-
-    def forward(self, x: torch.Tensor):
-        d = x.shape[-1] // 2
-        output_shape = x.shape[:-1] + (d,)
-        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.fatrelu_and_mul(out, x, self.threshold)
-        return out
-
-
-class FastGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_fast(out, x)
-        return out
-
-
-class NewGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_new(out, x)
-        return out
-
-
-class QuickGELU(nn.Module):
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        out = torch.empty_like(x)
-        ops.gelu_quick(out, x)
-        return out