Upload custom kernels

Browse files

Files changed (7) hide show

build/torch-universal/liger_kernels/__init__.py +2 -29
build/torch-universal/liger_kernels/_ops.py +2 -2
build/torch-universal/liger_kernels/layers.py +42 -0
build/torch-universal/liger_kernels/rms_norm.py +1 -40
torch-ext/liger_kernels/__init__.py +2 -29
torch-ext/liger_kernels/layers.py +42 -0
torch-ext/liger_kernels/rms_norm.py +1 -40

build/torch-universal/liger_kernels/__init__.py CHANGED Viewed

@@ -1,30 +1,3 @@
-from .cross_entropy import LigerCrossEntropyFunction
-from .fused_linear_cross_entropy import LigerFusedLinearCrossEntropyFunction
-from .dyt import LigerDyTFunction
-from .geglu import LigerGELUMulFunction
-from .group_norm import LigerGroupNormFunction
-from .kl_div import LigerKLDivLossFunction
-from .layer_norm import LigerLayerNormFunction
-from .qwen2vl_mrope import LigerQwen2VLMRopeFunction
-from .rms_norm import LigerRMSNormFunction, LigerRMSNorm
-from .jsd import LigerJSDFunction
-from .rope import LigerRopeFunction
-from .swiglu import LigerSiLUMulFunction
-from .tvd import LigerTVDLossFunction
-__all__ = [
-    "LigerCrossEntropyFunction",
-    "LigerFusedLinearCrossEntropyFunction",
-    "LigerDyTFunction",
-    "LigerGELUMulFunction",
-    "LigerGroupNormFunction",
-    "LigerKLDivLossFunction",
-    "LigerLayerNormFunction",
-    "LigerQwen2VLMRopeFunction",
-    "LigerRMSNormFunction",
-    "LigerRMSNorm",
-    "LigerJSDFunction",
-    "LigerRopeFunction",
-    "LigerSiLUMulFunction",
-    "LigerTVDLossFunction",
-]


1	+ from . import layers












2
3	+ __all__ = ["layers"]

build/torch-universal/liger_kernels/_ops.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import torch
-ops = torch.ops._liger_kernels_20250507091026
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_liger_kernels_20250507091026::{op_name}"

 import torch
+ops = torch.ops._liger_kernels_20250507091553
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_liger_kernels_20250507091553::{op_name}"

build/torch-universal/liger_kernels/layers.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import torch
+from .rms_norm import LigerRMSNormFunction
+class LigerRMSNorm(torch.nn.Module):
+    """
+    RMSNorm module that uses the optimized LigerRMSNormFunction.
+    Args:
+        hidden_size (int): The size of the hidden dimension.
+        eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
+        offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
+        casting_mode (str, optional): The casting mode to use. Defaults to "llama".
+        in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
+    """
+    weight: torch.Tensor
+    variance_epsilon: float
+    offset: float = 0
+    casting_mode: str = "llama"
+    in_place: bool = True
+    def forward(self, hidden_states):
+        """
+        Apply RMS normalization to the input tensor.
+        Args:
+            hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
+        Returns:
+            torch.Tensor: Normalized tensor of the same shape as input
+        """
+        return LigerRMSNormFunction.apply(
+            hidden_states,
+            self.weight,
+            self.variance_epsilon,
+            self.offset,
+            self.casting_mode,
+            self.in_place
+        )
+__all__ = ["LigerRMSNorm"]

build/torch-universal/liger_kernels/rms_norm.py CHANGED Viewed

@@ -362,43 +362,4 @@ class LigerRMSNormFunction(torch.autograd.Function):
             ctx.num_warps,
             ctx.in_place,
         )
-        return dX, dW, None, None, None, None
-class LigerRMSNorm(torch.nn.Module):
-    """
-    RMSNorm module that uses the optimized LigerRMSNormFunction.
-    Args:
-        hidden_size (int): The size of the hidden dimension.
-        eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
-        offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
-        casting_mode (str, optional): The casting mode to use. Defaults to "llama".
-        in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
-    """
-    weight: torch.Tensor
-    variance_epsilon: float
-    offset: float = 0
-    casting_mode: str = "llama"
-    in_place: bool = True
-    def forward(self, hidden_states):
-        """
-        Apply RMS normalization to the input tensor.
-        Args:
-            hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
-        Returns:
-            torch.Tensor: Normalized tensor of the same shape as input
-        """
-        return LigerRMSNormFunction.apply(
-            hidden_states,
-            self.weight,
-            self.variance_epsilon,
-            self.offset,
-            self.casting_mode,
-            self.in_place
-        )

             ctx.num_warps,
             ctx.in_place,
         )
+        return dX, dW, None, None, None, None

torch-ext/liger_kernels/__init__.py CHANGED Viewed

@@ -1,30 +1,3 @@
-from .cross_entropy import LigerCrossEntropyFunction
-from .fused_linear_cross_entropy import LigerFusedLinearCrossEntropyFunction
-from .dyt import LigerDyTFunction
-from .geglu import LigerGELUMulFunction
-from .group_norm import LigerGroupNormFunction
-from .kl_div import LigerKLDivLossFunction
-from .layer_norm import LigerLayerNormFunction
-from .qwen2vl_mrope import LigerQwen2VLMRopeFunction
-from .rms_norm import LigerRMSNormFunction, LigerRMSNorm
-from .jsd import LigerJSDFunction
-from .rope import LigerRopeFunction
-from .swiglu import LigerSiLUMulFunction
-from .tvd import LigerTVDLossFunction
-__all__ = [
-    "LigerCrossEntropyFunction",
-    "LigerFusedLinearCrossEntropyFunction",
-    "LigerDyTFunction",
-    "LigerGELUMulFunction",
-    "LigerGroupNormFunction",
-    "LigerKLDivLossFunction",
-    "LigerLayerNormFunction",
-    "LigerQwen2VLMRopeFunction",
-    "LigerRMSNormFunction",
-    "LigerRMSNorm",
-    "LigerJSDFunction",
-    "LigerRopeFunction",
-    "LigerSiLUMulFunction",
-    "LigerTVDLossFunction",
-]


1	+ from . import layers












2
3	+ __all__ = ["layers"]

torch-ext/liger_kernels/layers.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import torch
+from .rms_norm import LigerRMSNormFunction
+class LigerRMSNorm(torch.nn.Module):
+    """
+    RMSNorm module that uses the optimized LigerRMSNormFunction.
+    Args:
+        hidden_size (int): The size of the hidden dimension.
+        eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
+        offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
+        casting_mode (str, optional): The casting mode to use. Defaults to "llama".
+        in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
+    """
+    weight: torch.Tensor
+    variance_epsilon: float
+    offset: float = 0
+    casting_mode: str = "llama"
+    in_place: bool = True
+    def forward(self, hidden_states):
+        """
+        Apply RMS normalization to the input tensor.
+        Args:
+            hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
+        Returns:
+            torch.Tensor: Normalized tensor of the same shape as input
+        """
+        return LigerRMSNormFunction.apply(
+            hidden_states,
+            self.weight,
+            self.variance_epsilon,
+            self.offset,
+            self.casting_mode,
+            self.in_place
+        )
+__all__ = ["LigerRMSNorm"]

torch-ext/liger_kernels/rms_norm.py CHANGED Viewed

@@ -362,43 +362,4 @@ class LigerRMSNormFunction(torch.autograd.Function):
             ctx.num_warps,
             ctx.in_place,
         )
-        return dX, dW, None, None, None, None
-class LigerRMSNorm(torch.nn.Module):
-    """
-    RMSNorm module that uses the optimized LigerRMSNormFunction.
-    Args:
-        hidden_size (int): The size of the hidden dimension.
-        eps (float, optional): The epsilon value for numerical stability. Defaults to 1e-6.
-        offset (float, optional): Offset value to shift the weight tensor. Defaults to 0.0.
-        casting_mode (str, optional): The casting mode to use. Defaults to "llama".
-        in_place (bool, optional): Whether to modify dY in-place to store dX during backward. Defaults to True.
-    """
-    weight: torch.Tensor
-    variance_epsilon: float
-    offset: float = 0
-    casting_mode: str = "llama"
-    in_place: bool = True
-    def forward(self, hidden_states):
-        """
-        Apply RMS normalization to the input tensor.
-        Args:
-            hidden_states (torch.Tensor): Input tensor of shape (B, T, H) or (BxT, H)
-        Returns:
-            torch.Tensor: Normalized tensor of the same shape as input
-        """
-        return LigerRMSNormFunction.apply(
-            hidden_states,
-            self.weight,
-            self.variance_epsilon,
-            self.offset,
-            self.casting_mode,
-            self.in_place
-        )

             ctx.num_warps,
             ctx.in_place,
         )
+        return dX, dW, None, None, None, None