Commit 
							
							·
						
						cf68df1
	
1
								Parent(s):
							
							605f22e
								
feat: add cuda build
Browse filesThis view is limited to 50 files because it contains too many changes.  
							See raw diff
- build.toml +15 -1
 - build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py +30 -0
 - build/{torch26-cxx11-rocm62-x86_64-linux/activation/_activation_f3b99fb_dirty.abi3.so → torch26-cxx11-cu118-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so} +2 -2
 - build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py +9 -0
 - build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py +46 -0
 - build/torch26-cxx11-cu118-x86_64-linux/activation/poly_norm.py +41 -0
 - build/torch26-cxx11-cu118-x86_64-linux/activation/rms_norm.py +34 -0
 - build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py +30 -0
 - build/{torch27-cxx11-rocm63-x86_64-linux/activation/_activation_f3b99fb_dirty.abi3.so → torch26-cxx11-cu124-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so} +2 -2
 - build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py +9 -0
 - build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py +46 -0
 - build/torch26-cxx11-cu124-x86_64-linux/activation/poly_norm.py +41 -0
 - build/torch26-cxx11-cu124-x86_64-linux/activation/rms_norm.py +34 -0
 - build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py +30 -0
 - build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so +3 -0
 - build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py +9 -0
 - build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py +46 -0
 - build/torch26-cxx11-cu126-x86_64-linux/activation/poly_norm.py +41 -0
 - build/torch26-cxx11-cu126-x86_64-linux/activation/rms_norm.py +34 -0
 - build/torch26-cxx11-rocm62-x86_64-linux/activation/__init__.py +0 -0
 - build/torch26-cxx11-rocm62-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so +3 -0
 - build/torch26-cxx11-rocm62-x86_64-linux/activation/_ops.py +3 -3
 - build/torch26-cxx11-rocm62-x86_64-linux/activation/layers.py +0 -0
 - build/torch26-cxx11-rocm62-x86_64-linux/activation/poly_norm.py +0 -0
 - build/torch26-cxx11-rocm62-x86_64-linux/activation/rms_norm.py +0 -0
 - build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py +30 -0
 - build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so +3 -0
 - build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py +9 -0
 - build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py +46 -0
 - build/torch26-cxx98-cu118-x86_64-linux/activation/poly_norm.py +41 -0
 - build/torch26-cxx98-cu118-x86_64-linux/activation/rms_norm.py +34 -0
 - build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py +30 -0
 - build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so +3 -0
 - build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py +9 -0
 - build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py +46 -0
 - build/torch26-cxx98-cu124-x86_64-linux/activation/poly_norm.py +41 -0
 - build/torch26-cxx98-cu124-x86_64-linux/activation/rms_norm.py +34 -0
 - build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py +30 -0
 - build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so +3 -0
 - build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py +9 -0
 - build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py +46 -0
 - build/torch26-cxx98-cu126-x86_64-linux/activation/poly_norm.py +41 -0
 - build/torch26-cxx98-cu126-x86_64-linux/activation/rms_norm.py +34 -0
 - build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py +30 -0
 - build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so +3 -0
 - build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py +9 -0
 - build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py +46 -0
 - build/torch27-cxx11-cu118-x86_64-linux/activation/poly_norm.py +41 -0
 - build/torch27-cxx11-cu118-x86_64-linux/activation/rms_norm.py +34 -0
 - build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py +30 -0
 
    	
        build.toml
    CHANGED
    
    | 
         @@ -10,7 +10,7 @@ src = [ 
     | 
|
| 10 | 
         | 
| 11 | 
         
             
            [kernel.activation]
         
     | 
| 12 | 
         
             
            backend = "rocm"
         
     | 
| 13 | 
         
            -
            rocm-archs = [ "gfx90a" ]
         
     | 
| 14 | 
         
             
            src = [
         
     | 
| 15 | 
         
             
              "activation/poly_norm.cu",
         
     | 
| 16 | 
         
             
              "activation/rms_norm.cu",
         
     | 
| 
         @@ -21,3 +21,17 @@ src = [ 
     | 
|
| 21 | 
         
             
              "activation/atomic_utils.h",
         
     | 
| 22 | 
         
             
            ]
         
     | 
| 23 | 
         
             
            depends = [ "torch" ]
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 10 | 
         | 
| 11 | 
         
             
            [kernel.activation]
         
     | 
| 12 | 
         
             
            backend = "rocm"
         
     | 
| 13 | 
         
            +
            rocm-archs = [ "gfx90a", "gfx942" ]
         
     | 
| 14 | 
         
             
            src = [
         
     | 
| 15 | 
         
             
              "activation/poly_norm.cu",
         
     | 
| 16 | 
         
             
              "activation/rms_norm.cu",
         
     | 
| 
         | 
|
| 21 | 
         
             
              "activation/atomic_utils.h",
         
     | 
| 22 | 
         
             
            ]
         
     | 
| 23 | 
         
             
            depends = [ "torch" ]
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
            [kernel.activation_cuda]
         
     | 
| 26 | 
         
            +
            backend = "cuda"
         
     | 
| 27 | 
         
            +
            src = [
         
     | 
| 28 | 
         
            +
              "activation/poly_norm.cu",
         
     | 
| 29 | 
         
            +
              "activation/rms_norm.cu",
         
     | 
| 30 | 
         
            +
              "activation/cuda_compat.h",
         
     | 
| 31 | 
         
            +
              "activation/block_reduce.h",
         
     | 
| 32 | 
         
            +
              "activation/dispatch_utils.h",
         
     | 
| 33 | 
         
            +
              "activation/assert_utils.h",
         
     | 
| 34 | 
         
            +
              "activation/atomic_utils.h",
         
     | 
| 35 | 
         
            +
            ]
         
     | 
| 36 | 
         
            +
            depends = ["torch"]
         
     | 
| 37 | 
         
            +
             
     | 
    	
        build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py
    ADDED
    
    | 
         @@ -0,0 +1,30 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from . import layers
         
     | 
| 4 | 
         
            +
            from ._ops import ops
         
     | 
| 5 | 
         
            +
            from .poly_norm import PolyNormFunction
         
     | 
| 6 | 
         
            +
            from .rms_norm import RMSNormFunction
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            def poly_norm(
         
     | 
| 10 | 
         
            +
                x: torch.Tensor,
         
     | 
| 11 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 12 | 
         
            +
                bias: torch.Tensor,
         
     | 
| 13 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 14 | 
         
            +
            ) -> None:
         
     | 
| 15 | 
         
            +
                return PolyNormFunction.apply(x, weight, bias, eps)
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            def rms_norm(
         
     | 
| 19 | 
         
            +
                x: torch.Tensor,
         
     | 
| 20 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 21 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 22 | 
         
            +
            ) -> None:
         
     | 
| 23 | 
         
            +
                return RMSNormFunction.apply(x, weight, eps)
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            __all__ = [
         
     | 
| 27 | 
         
            +
                "poly_norm",
         
     | 
| 28 | 
         
            +
                "layers",
         
     | 
| 29 | 
         
            +
                "ops",
         
     | 
| 30 | 
         
            +
            ]
         
     | 
    	
        build/{torch26-cxx11-rocm62-x86_64-linux/activation/_activation_f3b99fb_dirty.abi3.so → torch26-cxx11-cu118-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so}
    RENAMED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
            -
            size  
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:b2fdb7378a1c907c3ff3ad0a5134a0a8ce4a464196404436470d7b4eb77ec305
         
     | 
| 3 | 
         
            +
            size 2957296
         
     | 
    	
        build/torch26-cxx11-cu118-x86_64-linux/activation/_ops.py
    ADDED
    
    | 
         @@ -0,0 +1,9 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
            from . import _activation_605f22e_dirty
         
     | 
| 3 | 
         
            +
            ops = torch.ops._activation_605f22e_dirty
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            def add_op_namespace_prefix(op_name: str):
         
     | 
| 6 | 
         
            +
                """
         
     | 
| 7 | 
         
            +
                Prefix op by namespace.
         
     | 
| 8 | 
         
            +
                """
         
     | 
| 9 | 
         
            +
                return f"_activation_605f22e_dirty::{op_name}"
         
     | 
    	
        build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py
    ADDED
    
    | 
         @@ -0,0 +1,46 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
            import torch.nn as nn
         
     | 
| 3 | 
         
            +
            from torch.nn import init
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            from .poly_norm import PolyNormFunction
         
     | 
| 6 | 
         
            +
            from .rms_norm import RMSNormFunction
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            class PolyNorm(nn.Module):
         
     | 
| 10 | 
         
            +
                def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
         
     | 
| 11 | 
         
            +
                    super().__init__()
         
     | 
| 12 | 
         
            +
                    self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
         
     | 
| 13 | 
         
            +
                    self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
         
     | 
| 14 | 
         
            +
                    self.eps = eps
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
                def forward(
         
     | 
| 17 | 
         
            +
                    self,
         
     | 
| 18 | 
         
            +
                    x: torch.Tensor,
         
     | 
| 19 | 
         
            +
                ):
         
     | 
| 20 | 
         
            +
                    return PolyNormFunction.apply(x, self.weight, self.bias, self.eps)
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
                def reset_parameters(self) -> None:
         
     | 
| 23 | 
         
            +
                    """
         
     | 
| 24 | 
         
            +
                    Resets parameters based on their initialization used in __init__.
         
     | 
| 25 | 
         
            +
                    """
         
     | 
| 26 | 
         
            +
                    init.ones_(self.weight)
         
     | 
| 27 | 
         
            +
                    init.zeros_(self.bias)
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
            class RMSNorm(nn.Module):
         
     | 
| 31 | 
         
            +
                def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
         
     | 
| 32 | 
         
            +
                    super().__init__()
         
     | 
| 33 | 
         
            +
                    self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
         
     | 
| 34 | 
         
            +
                    self.eps = eps
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
                def forward(
         
     | 
| 37 | 
         
            +
                    self,
         
     | 
| 38 | 
         
            +
                    x: torch.Tensor,
         
     | 
| 39 | 
         
            +
                ):
         
     | 
| 40 | 
         
            +
                    return RMSNormFunction.apply(x, self.weight, self.eps)
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
                def reset_parameters(self) -> None:
         
     | 
| 43 | 
         
            +
                    """
         
     | 
| 44 | 
         
            +
                    Resets parameters based on their initialization used in __init__.
         
     | 
| 45 | 
         
            +
                    """
         
     | 
| 46 | 
         
            +
                    init.ones_(self.weight)
         
     | 
    	
        build/torch26-cxx11-cu118-x86_64-linux/activation/poly_norm.py
    ADDED
    
    | 
         @@ -0,0 +1,41 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from ._ops import ops
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Inherit from Function
         
     | 
| 7 | 
         
            +
            class PolyNormFunction(torch.autograd.Function):
         
     | 
| 8 | 
         
            +
                # Note that forward, setup_context, and backward are @staticmethods
         
     | 
| 9 | 
         
            +
                @staticmethod
         
     | 
| 10 | 
         
            +
                def forward(input, weight, bias, eps):
         
     | 
| 11 | 
         
            +
                    output = torch.empty_like(input)
         
     | 
| 12 | 
         
            +
                    ops.poly_norm(output, input, weight, bias, eps)
         
     | 
| 13 | 
         
            +
                    return output
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
                @staticmethod
         
     | 
| 16 | 
         
            +
                # inputs is a Tuple of all of the inputs passed to forward.
         
     | 
| 17 | 
         
            +
                # output is the output of the forward().
         
     | 
| 18 | 
         
            +
                def setup_context(ctx, inputs, output):
         
     | 
| 19 | 
         
            +
                    input, weight, bias, eps = inputs
         
     | 
| 20 | 
         
            +
                    ctx.save_for_backward(input, weight)
         
     | 
| 21 | 
         
            +
                    ctx.eps = eps
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                # This function has only a single output, so it gets only one gradient
         
     | 
| 24 | 
         
            +
                @staticmethod
         
     | 
| 25 | 
         
            +
                def backward(ctx, output_grad):
         
     | 
| 26 | 
         
            +
                    input, weight = ctx.saved_tensors
         
     | 
| 27 | 
         
            +
                    eps = ctx.eps
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                    input_grad = torch.empty_like(input) if ctx.needs_input_grad[0] else None
         
     | 
| 30 | 
         
            +
                    weight_grad = torch.empty_like(weight) if ctx.needs_input_grad[1] else None
         
     | 
| 31 | 
         
            +
                    bias_grad = (
         
     | 
| 32 | 
         
            +
                        torch.empty(1, dtype=weight.dtype, device=weight.device)
         
     | 
| 33 | 
         
            +
                        if ctx.needs_input_grad[2]
         
     | 
| 34 | 
         
            +
                        else None
         
     | 
| 35 | 
         
            +
                    )
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
                    ops.poly_norm_backward(
         
     | 
| 38 | 
         
            +
                        input_grad, weight_grad, bias_grad, output_grad, input, weight, eps
         
     | 
| 39 | 
         
            +
                    )
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
                    return input_grad, weight_grad, bias_grad, None
         
     | 
    	
        build/torch26-cxx11-cu118-x86_64-linux/activation/rms_norm.py
    ADDED
    
    | 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from ._ops import ops
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Inherit from Function
         
     | 
| 7 | 
         
            +
            class RMSNormFunction(torch.autograd.Function):
         
     | 
| 8 | 
         
            +
                # Note that forward, setup_context, and backward are @staticmethods
         
     | 
| 9 | 
         
            +
                @staticmethod
         
     | 
| 10 | 
         
            +
                def forward(input, weight, eps):
         
     | 
| 11 | 
         
            +
                    output = torch.empty_like(input)
         
     | 
| 12 | 
         
            +
                    ops.rms_norm(output, input, weight, eps)
         
     | 
| 13 | 
         
            +
                    return output
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
                @staticmethod
         
     | 
| 16 | 
         
            +
                # inputs is a Tuple of all of the inputs passed to forward.
         
     | 
| 17 | 
         
            +
                # output is the output of the forward().
         
     | 
| 18 | 
         
            +
                def setup_context(ctx, inputs, output):
         
     | 
| 19 | 
         
            +
                    input, weight, eps = inputs
         
     | 
| 20 | 
         
            +
                    ctx.save_for_backward(input, weight)
         
     | 
| 21 | 
         
            +
                    ctx.eps = eps
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                # This function has only a single output, so it gets only one gradient
         
     | 
| 24 | 
         
            +
                @staticmethod
         
     | 
| 25 | 
         
            +
                def backward(ctx, output_grad):
         
     | 
| 26 | 
         
            +
                    input, weight = ctx.saved_tensors
         
     | 
| 27 | 
         
            +
                    eps = ctx.eps
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                    input_grad = torch.empty_like(input) if ctx.needs_input_grad[0] else None
         
     | 
| 30 | 
         
            +
                    weight_grad = torch.empty_like(weight) if ctx.needs_input_grad[1] else None
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
                    ops.rms_norm_backward(input_grad, weight_grad, output_grad, input, weight, eps)
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
                    return input_grad, weight_grad, None
         
     | 
    	
        build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py
    ADDED
    
    | 
         @@ -0,0 +1,30 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from . import layers
         
     | 
| 4 | 
         
            +
            from ._ops import ops
         
     | 
| 5 | 
         
            +
            from .poly_norm import PolyNormFunction
         
     | 
| 6 | 
         
            +
            from .rms_norm import RMSNormFunction
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            def poly_norm(
         
     | 
| 10 | 
         
            +
                x: torch.Tensor,
         
     | 
| 11 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 12 | 
         
            +
                bias: torch.Tensor,
         
     | 
| 13 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 14 | 
         
            +
            ) -> None:
         
     | 
| 15 | 
         
            +
                return PolyNormFunction.apply(x, weight, bias, eps)
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            def rms_norm(
         
     | 
| 19 | 
         
            +
                x: torch.Tensor,
         
     | 
| 20 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 21 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 22 | 
         
            +
            ) -> None:
         
     | 
| 23 | 
         
            +
                return RMSNormFunction.apply(x, weight, eps)
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            __all__ = [
         
     | 
| 27 | 
         
            +
                "poly_norm",
         
     | 
| 28 | 
         
            +
                "layers",
         
     | 
| 29 | 
         
            +
                "ops",
         
     | 
| 30 | 
         
            +
            ]
         
     | 
    	
        build/{torch27-cxx11-rocm63-x86_64-linux/activation/_activation_f3b99fb_dirty.abi3.so → torch26-cxx11-cu124-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so}
    RENAMED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
            -
            size  
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:5baac6228e04fbb209cbc90a24702c14f4eb52d2698cea12a766d77412622096
         
     | 
| 3 | 
         
            +
            size 2981880
         
     | 
    	
        build/torch26-cxx11-cu124-x86_64-linux/activation/_ops.py
    ADDED
    
    | 
         @@ -0,0 +1,9 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
            from . import _activation_605f22e_dirty
         
     | 
| 3 | 
         
            +
            ops = torch.ops._activation_605f22e_dirty
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            def add_op_namespace_prefix(op_name: str):
         
     | 
| 6 | 
         
            +
                """
         
     | 
| 7 | 
         
            +
                Prefix op by namespace.
         
     | 
| 8 | 
         
            +
                """
         
     | 
| 9 | 
         
            +
                return f"_activation_605f22e_dirty::{op_name}"
         
     | 
    	
        build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py
    ADDED
    
    | 
         @@ -0,0 +1,46 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
            import torch.nn as nn
         
     | 
| 3 | 
         
            +
            from torch.nn import init
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            from .poly_norm import PolyNormFunction
         
     | 
| 6 | 
         
            +
            from .rms_norm import RMSNormFunction
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            class PolyNorm(nn.Module):
         
     | 
| 10 | 
         
            +
                def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
         
     | 
| 11 | 
         
            +
                    super().__init__()
         
     | 
| 12 | 
         
            +
                    self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
         
     | 
| 13 | 
         
            +
                    self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
         
     | 
| 14 | 
         
            +
                    self.eps = eps
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
                def forward(
         
     | 
| 17 | 
         
            +
                    self,
         
     | 
| 18 | 
         
            +
                    x: torch.Tensor,
         
     | 
| 19 | 
         
            +
                ):
         
     | 
| 20 | 
         
            +
                    return PolyNormFunction.apply(x, self.weight, self.bias, self.eps)
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
                def reset_parameters(self) -> None:
         
     | 
| 23 | 
         
            +
                    """
         
     | 
| 24 | 
         
            +
                    Resets parameters based on their initialization used in __init__.
         
     | 
| 25 | 
         
            +
                    """
         
     | 
| 26 | 
         
            +
                    init.ones_(self.weight)
         
     | 
| 27 | 
         
            +
                    init.zeros_(self.bias)
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
            class RMSNorm(nn.Module):
         
     | 
| 31 | 
         
            +
                def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
         
     | 
| 32 | 
         
            +
                    super().__init__()
         
     | 
| 33 | 
         
            +
                    self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
         
     | 
| 34 | 
         
            +
                    self.eps = eps
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
                def forward(
         
     | 
| 37 | 
         
            +
                    self,
         
     | 
| 38 | 
         
            +
                    x: torch.Tensor,
         
     | 
| 39 | 
         
            +
                ):
         
     | 
| 40 | 
         
            +
                    return RMSNormFunction.apply(x, self.weight, self.eps)
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
                def reset_parameters(self) -> None:
         
     | 
| 43 | 
         
            +
                    """
         
     | 
| 44 | 
         
            +
                    Resets parameters based on their initialization used in __init__.
         
     | 
| 45 | 
         
            +
                    """
         
     | 
| 46 | 
         
            +
                    init.ones_(self.weight)
         
     | 
    	
        build/torch26-cxx11-cu124-x86_64-linux/activation/poly_norm.py
    ADDED
    
    | 
         @@ -0,0 +1,41 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from ._ops import ops
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Inherit from Function
         
     | 
| 7 | 
         
            +
            class PolyNormFunction(torch.autograd.Function):
         
     | 
| 8 | 
         
            +
                # Note that forward, setup_context, and backward are @staticmethods
         
     | 
| 9 | 
         
            +
                @staticmethod
         
     | 
| 10 | 
         
            +
                def forward(input, weight, bias, eps):
         
     | 
| 11 | 
         
            +
                    output = torch.empty_like(input)
         
     | 
| 12 | 
         
            +
                    ops.poly_norm(output, input, weight, bias, eps)
         
     | 
| 13 | 
         
            +
                    return output
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
                @staticmethod
         
     | 
| 16 | 
         
            +
                # inputs is a Tuple of all of the inputs passed to forward.
         
     | 
| 17 | 
         
            +
                # output is the output of the forward().
         
     | 
| 18 | 
         
            +
                def setup_context(ctx, inputs, output):
         
     | 
| 19 | 
         
            +
                    input, weight, bias, eps = inputs
         
     | 
| 20 | 
         
            +
                    ctx.save_for_backward(input, weight)
         
     | 
| 21 | 
         
            +
                    ctx.eps = eps
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                # This function has only a single output, so it gets only one gradient
         
     | 
| 24 | 
         
            +
                @staticmethod
         
     | 
| 25 | 
         
            +
                def backward(ctx, output_grad):
         
     | 
| 26 | 
         
            +
                    input, weight = ctx.saved_tensors
         
     | 
| 27 | 
         
            +
                    eps = ctx.eps
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                    input_grad = torch.empty_like(input) if ctx.needs_input_grad[0] else None
         
     | 
| 30 | 
         
            +
                    weight_grad = torch.empty_like(weight) if ctx.needs_input_grad[1] else None
         
     | 
| 31 | 
         
            +
                    bias_grad = (
         
     | 
| 32 | 
         
            +
                        torch.empty(1, dtype=weight.dtype, device=weight.device)
         
     | 
| 33 | 
         
            +
                        if ctx.needs_input_grad[2]
         
     | 
| 34 | 
         
            +
                        else None
         
     | 
| 35 | 
         
            +
                    )
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
                    ops.poly_norm_backward(
         
     | 
| 38 | 
         
            +
                        input_grad, weight_grad, bias_grad, output_grad, input, weight, eps
         
     | 
| 39 | 
         
            +
                    )
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
                    return input_grad, weight_grad, bias_grad, None
         
     | 
    	
        build/torch26-cxx11-cu124-x86_64-linux/activation/rms_norm.py
    ADDED
    
    | 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from ._ops import ops
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Inherit from Function
         
     | 
| 7 | 
         
            +
            class RMSNormFunction(torch.autograd.Function):
         
     | 
| 8 | 
         
            +
                # Note that forward, setup_context, and backward are @staticmethods
         
     | 
| 9 | 
         
            +
                @staticmethod
         
     | 
| 10 | 
         
            +
                def forward(input, weight, eps):
         
     | 
| 11 | 
         
            +
                    output = torch.empty_like(input)
         
     | 
| 12 | 
         
            +
                    ops.rms_norm(output, input, weight, eps)
         
     | 
| 13 | 
         
            +
                    return output
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
                @staticmethod
         
     | 
| 16 | 
         
            +
                # inputs is a Tuple of all of the inputs passed to forward.
         
     | 
| 17 | 
         
            +
                # output is the output of the forward().
         
     | 
| 18 | 
         
            +
                def setup_context(ctx, inputs, output):
         
     | 
| 19 | 
         
            +
                    input, weight, eps = inputs
         
     | 
| 20 | 
         
            +
                    ctx.save_for_backward(input, weight)
         
     | 
| 21 | 
         
            +
                    ctx.eps = eps
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                # This function has only a single output, so it gets only one gradient
         
     | 
| 24 | 
         
            +
                @staticmethod
         
     | 
| 25 | 
         
            +
                def backward(ctx, output_grad):
         
     | 
| 26 | 
         
            +
                    input, weight = ctx.saved_tensors
         
     | 
| 27 | 
         
            +
                    eps = ctx.eps
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                    input_grad = torch.empty_like(input) if ctx.needs_input_grad[0] else None
         
     | 
| 30 | 
         
            +
                    weight_grad = torch.empty_like(weight) if ctx.needs_input_grad[1] else None
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
                    ops.rms_norm_backward(input_grad, weight_grad, output_grad, input, weight, eps)
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
                    return input_grad, weight_grad, None
         
     | 
    	
        build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py
    ADDED
    
    | 
         @@ -0,0 +1,30 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from . import layers
         
     | 
| 4 | 
         
            +
            from ._ops import ops
         
     | 
| 5 | 
         
            +
            from .poly_norm import PolyNormFunction
         
     | 
| 6 | 
         
            +
            from .rms_norm import RMSNormFunction
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            def poly_norm(
         
     | 
| 10 | 
         
            +
                x: torch.Tensor,
         
     | 
| 11 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 12 | 
         
            +
                bias: torch.Tensor,
         
     | 
| 13 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 14 | 
         
            +
            ) -> None:
         
     | 
| 15 | 
         
            +
                return PolyNormFunction.apply(x, weight, bias, eps)
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            def rms_norm(
         
     | 
| 19 | 
         
            +
                x: torch.Tensor,
         
     | 
| 20 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 21 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 22 | 
         
            +
            ) -> None:
         
     | 
| 23 | 
         
            +
                return RMSNormFunction.apply(x, weight, eps)
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            __all__ = [
         
     | 
| 27 | 
         
            +
                "poly_norm",
         
     | 
| 28 | 
         
            +
                "layers",
         
     | 
| 29 | 
         
            +
                "ops",
         
     | 
| 30 | 
         
            +
            ]
         
     | 
    	
        build/torch26-cxx11-cu126-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:2d5c0095b931923008435d361c1871e97ff2ef04100e93205f09e65316f307f3
         
     | 
| 3 | 
         
            +
            size 2994704
         
     | 
    	
        build/torch26-cxx11-cu126-x86_64-linux/activation/_ops.py
    ADDED
    
    | 
         @@ -0,0 +1,9 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
            from . import _activation_605f22e_dirty
         
     | 
| 3 | 
         
            +
            ops = torch.ops._activation_605f22e_dirty
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            def add_op_namespace_prefix(op_name: str):
         
     | 
| 6 | 
         
            +
                """
         
     | 
| 7 | 
         
            +
                Prefix op by namespace.
         
     | 
| 8 | 
         
            +
                """
         
     | 
| 9 | 
         
            +
                return f"_activation_605f22e_dirty::{op_name}"
         
     | 
    	
        build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py
    ADDED
    
    | 
         @@ -0,0 +1,46 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
            import torch.nn as nn
         
     | 
| 3 | 
         
            +
            from torch.nn import init
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            from .poly_norm import PolyNormFunction
         
     | 
| 6 | 
         
            +
            from .rms_norm import RMSNormFunction
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            class PolyNorm(nn.Module):
         
     | 
| 10 | 
         
            +
                def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
         
     | 
| 11 | 
         
            +
                    super().__init__()
         
     | 
| 12 | 
         
            +
                    self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
         
     | 
| 13 | 
         
            +
                    self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
         
     | 
| 14 | 
         
            +
                    self.eps = eps
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
                def forward(
         
     | 
| 17 | 
         
            +
                    self,
         
     | 
| 18 | 
         
            +
                    x: torch.Tensor,
         
     | 
| 19 | 
         
            +
                ):
         
     | 
| 20 | 
         
            +
                    return PolyNormFunction.apply(x, self.weight, self.bias, self.eps)
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
                def reset_parameters(self) -> None:
         
     | 
| 23 | 
         
            +
                    """
         
     | 
| 24 | 
         
            +
                    Resets parameters based on their initialization used in __init__.
         
     | 
| 25 | 
         
            +
                    """
         
     | 
| 26 | 
         
            +
                    init.ones_(self.weight)
         
     | 
| 27 | 
         
            +
                    init.zeros_(self.bias)
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
            class RMSNorm(nn.Module):
         
     | 
| 31 | 
         
            +
                def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
         
     | 
| 32 | 
         
            +
                    super().__init__()
         
     | 
| 33 | 
         
            +
                    self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
         
     | 
| 34 | 
         
            +
                    self.eps = eps
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
                def forward(
         
     | 
| 37 | 
         
            +
                    self,
         
     | 
| 38 | 
         
            +
                    x: torch.Tensor,
         
     | 
| 39 | 
         
            +
                ):
         
     | 
| 40 | 
         
            +
                    return RMSNormFunction.apply(x, self.weight, self.eps)
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
                def reset_parameters(self) -> None:
         
     | 
| 43 | 
         
            +
                    """
         
     | 
| 44 | 
         
            +
                    Resets parameters based on their initialization used in __init__.
         
     | 
| 45 | 
         
            +
                    """
         
     | 
| 46 | 
         
            +
                    init.ones_(self.weight)
         
     | 
    	
        build/torch26-cxx11-cu126-x86_64-linux/activation/poly_norm.py
    ADDED
    
    | 
         @@ -0,0 +1,41 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from ._ops import ops
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Inherit from Function
         
     | 
| 7 | 
         
            +
            class PolyNormFunction(torch.autograd.Function):
         
     | 
| 8 | 
         
            +
                # Note that forward, setup_context, and backward are @staticmethods
         
     | 
| 9 | 
         
            +
                @staticmethod
         
     | 
| 10 | 
         
            +
                def forward(input, weight, bias, eps):
         
     | 
| 11 | 
         
            +
                    output = torch.empty_like(input)
         
     | 
| 12 | 
         
            +
                    ops.poly_norm(output, input, weight, bias, eps)
         
     | 
| 13 | 
         
            +
                    return output
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
                @staticmethod
         
     | 
| 16 | 
         
            +
                # inputs is a Tuple of all of the inputs passed to forward.
         
     | 
| 17 | 
         
            +
                # output is the output of the forward().
         
     | 
| 18 | 
         
            +
                def setup_context(ctx, inputs, output):
         
     | 
| 19 | 
         
            +
                    input, weight, bias, eps = inputs
         
     | 
| 20 | 
         
            +
                    ctx.save_for_backward(input, weight)
         
     | 
| 21 | 
         
            +
                    ctx.eps = eps
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                # This function has only a single output, so it gets only one gradient
         
     | 
| 24 | 
         
            +
                @staticmethod
         
     | 
| 25 | 
         
            +
                def backward(ctx, output_grad):
         
     | 
| 26 | 
         
            +
                    input, weight = ctx.saved_tensors
         
     | 
| 27 | 
         
            +
                    eps = ctx.eps
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                    input_grad = torch.empty_like(input) if ctx.needs_input_grad[0] else None
         
     | 
| 30 | 
         
            +
                    weight_grad = torch.empty_like(weight) if ctx.needs_input_grad[1] else None
         
     | 
| 31 | 
         
            +
                    bias_grad = (
         
     | 
| 32 | 
         
            +
                        torch.empty(1, dtype=weight.dtype, device=weight.device)
         
     | 
| 33 | 
         
            +
                        if ctx.needs_input_grad[2]
         
     | 
| 34 | 
         
            +
                        else None
         
     | 
| 35 | 
         
            +
                    )
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
                    ops.poly_norm_backward(
         
     | 
| 38 | 
         
            +
                        input_grad, weight_grad, bias_grad, output_grad, input, weight, eps
         
     | 
| 39 | 
         
            +
                    )
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
                    return input_grad, weight_grad, bias_grad, None
         
     | 
    	
        build/torch26-cxx11-cu126-x86_64-linux/activation/rms_norm.py
    ADDED
    
    | 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from ._ops import ops
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Inherit from Function
         
     | 
| 7 | 
         
            +
            class RMSNormFunction(torch.autograd.Function):
         
     | 
| 8 | 
         
            +
                # Note that forward, setup_context, and backward are @staticmethods
         
     | 
| 9 | 
         
            +
                @staticmethod
         
     | 
| 10 | 
         
            +
                def forward(input, weight, eps):
         
     | 
| 11 | 
         
            +
                    output = torch.empty_like(input)
         
     | 
| 12 | 
         
            +
                    ops.rms_norm(output, input, weight, eps)
         
     | 
| 13 | 
         
            +
                    return output
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
                @staticmethod
         
     | 
| 16 | 
         
            +
                # inputs is a Tuple of all of the inputs passed to forward.
         
     | 
| 17 | 
         
            +
                # output is the output of the forward().
         
     | 
| 18 | 
         
            +
                def setup_context(ctx, inputs, output):
         
     | 
| 19 | 
         
            +
                    input, weight, eps = inputs
         
     | 
| 20 | 
         
            +
                    ctx.save_for_backward(input, weight)
         
     | 
| 21 | 
         
            +
                    ctx.eps = eps
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                # This function has only a single output, so it gets only one gradient
         
     | 
| 24 | 
         
            +
                @staticmethod
         
     | 
| 25 | 
         
            +
                def backward(ctx, output_grad):
         
     | 
| 26 | 
         
            +
                    input, weight = ctx.saved_tensors
         
     | 
| 27 | 
         
            +
                    eps = ctx.eps
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                    input_grad = torch.empty_like(input) if ctx.needs_input_grad[0] else None
         
     | 
| 30 | 
         
            +
                    weight_grad = torch.empty_like(weight) if ctx.needs_input_grad[1] else None
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
                    ops.rms_norm_backward(input_grad, weight_grad, output_grad, input, weight, eps)
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
                    return input_grad, weight_grad, None
         
     | 
    	
        build/torch26-cxx11-rocm62-x86_64-linux/activation/__init__.py
    CHANGED
    
    | 
         
            File without changes
         
     | 
    	
        build/torch26-cxx11-rocm62-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:5c29fccf3f62ac3e3b7ff59e898d31ae38f3484bfe762f6767b8bc8cedf1af01
         
     | 
| 3 | 
         
            +
            size 2660632
         
     | 
    	
        build/torch26-cxx11-rocm62-x86_64-linux/activation/_ops.py
    CHANGED
    
    | 
         @@ -1,9 +1,9 @@ 
     | 
|
| 1 | 
         
             
            import torch
         
     | 
| 2 | 
         
            -
            from . import  
     | 
| 3 | 
         
            -
            ops = torch.ops. 
     | 
| 4 | 
         | 
| 5 | 
         
             
            def add_op_namespace_prefix(op_name: str):
         
     | 
| 6 | 
         
             
                """
         
     | 
| 7 | 
         
             
                Prefix op by namespace.
         
     | 
| 8 | 
         
             
                """
         
     | 
| 9 | 
         
            -
                return f" 
     | 
| 
         | 
|
| 1 | 
         
             
            import torch
         
     | 
| 2 | 
         
            +
            from . import _activation_605f22e_dirty
         
     | 
| 3 | 
         
            +
            ops = torch.ops._activation_605f22e_dirty
         
     | 
| 4 | 
         | 
| 5 | 
         
             
            def add_op_namespace_prefix(op_name: str):
         
     | 
| 6 | 
         
             
                """
         
     | 
| 7 | 
         
             
                Prefix op by namespace.
         
     | 
| 8 | 
         
             
                """
         
     | 
| 9 | 
         
            +
                return f"_activation_605f22e_dirty::{op_name}"
         
     | 
    	
        build/torch26-cxx11-rocm62-x86_64-linux/activation/layers.py
    CHANGED
    
    | 
         
            File without changes
         
     | 
    	
        build/torch26-cxx11-rocm62-x86_64-linux/activation/poly_norm.py
    CHANGED
    
    | 
         
            File without changes
         
     | 
    	
        build/torch26-cxx11-rocm62-x86_64-linux/activation/rms_norm.py
    CHANGED
    
    | 
         
            File without changes
         
     | 
    	
        build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py
    ADDED
    
    | 
         @@ -0,0 +1,30 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from . import layers
         
     | 
| 4 | 
         
            +
            from ._ops import ops
         
     | 
| 5 | 
         
            +
            from .poly_norm import PolyNormFunction
         
     | 
| 6 | 
         
            +
            from .rms_norm import RMSNormFunction
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            def poly_norm(
         
     | 
| 10 | 
         
            +
                x: torch.Tensor,
         
     | 
| 11 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 12 | 
         
            +
                bias: torch.Tensor,
         
     | 
| 13 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 14 | 
         
            +
            ) -> None:
         
     | 
| 15 | 
         
            +
                return PolyNormFunction.apply(x, weight, bias, eps)
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            def rms_norm(
         
     | 
| 19 | 
         
            +
                x: torch.Tensor,
         
     | 
| 20 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 21 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 22 | 
         
            +
            ) -> None:
         
     | 
| 23 | 
         
            +
                return RMSNormFunction.apply(x, weight, eps)
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            __all__ = [
         
     | 
| 27 | 
         
            +
                "poly_norm",
         
     | 
| 28 | 
         
            +
                "layers",
         
     | 
| 29 | 
         
            +
                "ops",
         
     | 
| 30 | 
         
            +
            ]
         
     | 
    	
        build/torch26-cxx98-cu118-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:176b8610ed2b9650c68347ec2f1d9e99b653170b4fd4f6f3540731f3fd78e98b
         
     | 
| 3 | 
         
            +
            size 2949936
         
     | 
    	
        build/torch26-cxx98-cu118-x86_64-linux/activation/_ops.py
    ADDED
    
    | 
         @@ -0,0 +1,9 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
            from . import _activation_605f22e_dirty
         
     | 
| 3 | 
         
            +
            ops = torch.ops._activation_605f22e_dirty
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            def add_op_namespace_prefix(op_name: str):
         
     | 
| 6 | 
         
            +
                """
         
     | 
| 7 | 
         
            +
                Prefix op by namespace.
         
     | 
| 8 | 
         
            +
                """
         
     | 
| 9 | 
         
            +
                return f"_activation_605f22e_dirty::{op_name}"
         
     | 
    	
        build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py
    ADDED
    
    | 
         @@ -0,0 +1,46 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
            import torch.nn as nn
         
     | 
| 3 | 
         
            +
            from torch.nn import init
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            from .poly_norm import PolyNormFunction
         
     | 
| 6 | 
         
            +
            from .rms_norm import RMSNormFunction
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            class PolyNorm(nn.Module):
         
     | 
| 10 | 
         
            +
                def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
         
     | 
| 11 | 
         
            +
                    super().__init__()
         
     | 
| 12 | 
         
            +
                    self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
         
     | 
| 13 | 
         
            +
                    self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
         
     | 
| 14 | 
         
            +
                    self.eps = eps
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
                def forward(
         
     | 
| 17 | 
         
            +
                    self,
         
     | 
| 18 | 
         
            +
                    x: torch.Tensor,
         
     | 
| 19 | 
         
            +
                ):
         
     | 
| 20 | 
         
            +
                    return PolyNormFunction.apply(x, self.weight, self.bias, self.eps)
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
                def reset_parameters(self) -> None:
         
     | 
| 23 | 
         
            +
                    """
         
     | 
| 24 | 
         
            +
                    Resets parameters based on their initialization used in __init__.
         
     | 
| 25 | 
         
            +
                    """
         
     | 
| 26 | 
         
            +
                    init.ones_(self.weight)
         
     | 
| 27 | 
         
            +
                    init.zeros_(self.bias)
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
            class RMSNorm(nn.Module):
         
     | 
| 31 | 
         
            +
                def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
         
     | 
| 32 | 
         
            +
                    super().__init__()
         
     | 
| 33 | 
         
            +
                    self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
         
     | 
| 34 | 
         
            +
                    self.eps = eps
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
                def forward(
         
     | 
| 37 | 
         
            +
                    self,
         
     | 
| 38 | 
         
            +
                    x: torch.Tensor,
         
     | 
| 39 | 
         
            +
                ):
         
     | 
| 40 | 
         
            +
                    return RMSNormFunction.apply(x, self.weight, self.eps)
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
                def reset_parameters(self) -> None:
         
     | 
| 43 | 
         
            +
                    """
         
     | 
| 44 | 
         
            +
                    Resets parameters based on their initialization used in __init__.
         
     | 
| 45 | 
         
            +
                    """
         
     | 
| 46 | 
         
            +
                    init.ones_(self.weight)
         
     | 
    	
        build/torch26-cxx98-cu118-x86_64-linux/activation/poly_norm.py
    ADDED
    
    | 
         @@ -0,0 +1,41 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from ._ops import ops
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Inherit from Function
         
     | 
| 7 | 
         
            +
            class PolyNormFunction(torch.autograd.Function):
         
     | 
| 8 | 
         
            +
                # Note that forward, setup_context, and backward are @staticmethods
         
     | 
| 9 | 
         
            +
                @staticmethod
         
     | 
| 10 | 
         
            +
                def forward(input, weight, bias, eps):
         
     | 
| 11 | 
         
            +
                    output = torch.empty_like(input)
         
     | 
| 12 | 
         
            +
                    ops.poly_norm(output, input, weight, bias, eps)
         
     | 
| 13 | 
         
            +
                    return output
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
                @staticmethod
         
     | 
| 16 | 
         
            +
                # inputs is a Tuple of all of the inputs passed to forward.
         
     | 
| 17 | 
         
            +
                # output is the output of the forward().
         
     | 
| 18 | 
         
            +
                def setup_context(ctx, inputs, output):
         
     | 
| 19 | 
         
            +
                    input, weight, bias, eps = inputs
         
     | 
| 20 | 
         
            +
                    ctx.save_for_backward(input, weight)
         
     | 
| 21 | 
         
            +
                    ctx.eps = eps
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                # This function has only a single output, so it gets only one gradient
         
     | 
| 24 | 
         
            +
                @staticmethod
         
     | 
| 25 | 
         
            +
                def backward(ctx, output_grad):
         
     | 
| 26 | 
         
            +
                    input, weight = ctx.saved_tensors
         
     | 
| 27 | 
         
            +
                    eps = ctx.eps
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                    input_grad = torch.empty_like(input) if ctx.needs_input_grad[0] else None
         
     | 
| 30 | 
         
            +
                    weight_grad = torch.empty_like(weight) if ctx.needs_input_grad[1] else None
         
     | 
| 31 | 
         
            +
                    bias_grad = (
         
     | 
| 32 | 
         
            +
                        torch.empty(1, dtype=weight.dtype, device=weight.device)
         
     | 
| 33 | 
         
            +
                        if ctx.needs_input_grad[2]
         
     | 
| 34 | 
         
            +
                        else None
         
     | 
| 35 | 
         
            +
                    )
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
                    ops.poly_norm_backward(
         
     | 
| 38 | 
         
            +
                        input_grad, weight_grad, bias_grad, output_grad, input, weight, eps
         
     | 
| 39 | 
         
            +
                    )
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
                    return input_grad, weight_grad, bias_grad, None
         
     | 
    	
        build/torch26-cxx98-cu118-x86_64-linux/activation/rms_norm.py
    ADDED
    
    | 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from ._ops import ops
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Inherit from Function
         
     | 
| 7 | 
         
            +
            class RMSNormFunction(torch.autograd.Function):
         
     | 
| 8 | 
         
            +
                # Note that forward, setup_context, and backward are @staticmethods
         
     | 
| 9 | 
         
            +
                @staticmethod
         
     | 
| 10 | 
         
            +
                def forward(input, weight, eps):
         
     | 
| 11 | 
         
            +
                    output = torch.empty_like(input)
         
     | 
| 12 | 
         
            +
                    ops.rms_norm(output, input, weight, eps)
         
     | 
| 13 | 
         
            +
                    return output
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
                @staticmethod
         
     | 
| 16 | 
         
            +
                # inputs is a Tuple of all of the inputs passed to forward.
         
     | 
| 17 | 
         
            +
                # output is the output of the forward().
         
     | 
| 18 | 
         
            +
                def setup_context(ctx, inputs, output):
         
     | 
| 19 | 
         
            +
                    input, weight, eps = inputs
         
     | 
| 20 | 
         
            +
                    ctx.save_for_backward(input, weight)
         
     | 
| 21 | 
         
            +
                    ctx.eps = eps
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                # This function has only a single output, so it gets only one gradient
         
     | 
| 24 | 
         
            +
                @staticmethod
         
     | 
| 25 | 
         
            +
                def backward(ctx, output_grad):
         
     | 
| 26 | 
         
            +
                    input, weight = ctx.saved_tensors
         
     | 
| 27 | 
         
            +
                    eps = ctx.eps
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                    input_grad = torch.empty_like(input) if ctx.needs_input_grad[0] else None
         
     | 
| 30 | 
         
            +
                    weight_grad = torch.empty_like(weight) if ctx.needs_input_grad[1] else None
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
                    ops.rms_norm_backward(input_grad, weight_grad, output_grad, input, weight, eps)
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
                    return input_grad, weight_grad, None
         
     | 
    	
        build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py
    ADDED
    
    | 
         @@ -0,0 +1,30 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from . import layers
         
     | 
| 4 | 
         
            +
            from ._ops import ops
         
     | 
| 5 | 
         
            +
            from .poly_norm import PolyNormFunction
         
     | 
| 6 | 
         
            +
            from .rms_norm import RMSNormFunction
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            def poly_norm(
         
     | 
| 10 | 
         
            +
                x: torch.Tensor,
         
     | 
| 11 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 12 | 
         
            +
                bias: torch.Tensor,
         
     | 
| 13 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 14 | 
         
            +
            ) -> None:
         
     | 
| 15 | 
         
            +
                return PolyNormFunction.apply(x, weight, bias, eps)
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            def rms_norm(
         
     | 
| 19 | 
         
            +
                x: torch.Tensor,
         
     | 
| 20 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 21 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 22 | 
         
            +
            ) -> None:
         
     | 
| 23 | 
         
            +
                return RMSNormFunction.apply(x, weight, eps)
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            __all__ = [
         
     | 
| 27 | 
         
            +
                "poly_norm",
         
     | 
| 28 | 
         
            +
                "layers",
         
     | 
| 29 | 
         
            +
                "ops",
         
     | 
| 30 | 
         
            +
            ]
         
     | 
    	
        build/torch26-cxx98-cu124-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:8075bbb5b339e0305d353003eb86a2b6a4d8a468907d821cefbed29e6e439c19
         
     | 
| 3 | 
         
            +
            size 2974640
         
     | 
    	
        build/torch26-cxx98-cu124-x86_64-linux/activation/_ops.py
    ADDED
    
    | 
         @@ -0,0 +1,9 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
            from . import _activation_605f22e_dirty
         
     | 
| 3 | 
         
            +
            ops = torch.ops._activation_605f22e_dirty
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            def add_op_namespace_prefix(op_name: str):
         
     | 
| 6 | 
         
            +
                """
         
     | 
| 7 | 
         
            +
                Prefix op by namespace.
         
     | 
| 8 | 
         
            +
                """
         
     | 
| 9 | 
         
            +
                return f"_activation_605f22e_dirty::{op_name}"
         
     | 
    	
        build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py
    ADDED
    
    | 
         @@ -0,0 +1,46 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
            import torch.nn as nn
         
     | 
| 3 | 
         
            +
            from torch.nn import init
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            from .poly_norm import PolyNormFunction
         
     | 
| 6 | 
         
            +
            from .rms_norm import RMSNormFunction
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            class PolyNorm(nn.Module):
         
     | 
| 10 | 
         
            +
                def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
         
     | 
| 11 | 
         
            +
                    super().__init__()
         
     | 
| 12 | 
         
            +
                    self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
         
     | 
| 13 | 
         
            +
                    self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
         
     | 
| 14 | 
         
            +
                    self.eps = eps
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
                def forward(
         
     | 
| 17 | 
         
            +
                    self,
         
     | 
| 18 | 
         
            +
                    x: torch.Tensor,
         
     | 
| 19 | 
         
            +
                ):
         
     | 
| 20 | 
         
            +
                    return PolyNormFunction.apply(x, self.weight, self.bias, self.eps)
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
                def reset_parameters(self) -> None:
         
     | 
| 23 | 
         
            +
                    """
         
     | 
| 24 | 
         
            +
                    Resets parameters based on their initialization used in __init__.
         
     | 
| 25 | 
         
            +
                    """
         
     | 
| 26 | 
         
            +
                    init.ones_(self.weight)
         
     | 
| 27 | 
         
            +
                    init.zeros_(self.bias)
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
            class RMSNorm(nn.Module):
         
     | 
| 31 | 
         
            +
                def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
         
     | 
| 32 | 
         
            +
                    super().__init__()
         
     | 
| 33 | 
         
            +
                    self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
         
     | 
| 34 | 
         
            +
                    self.eps = eps
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
                def forward(
         
     | 
| 37 | 
         
            +
                    self,
         
     | 
| 38 | 
         
            +
                    x: torch.Tensor,
         
     | 
| 39 | 
         
            +
                ):
         
     | 
| 40 | 
         
            +
                    return RMSNormFunction.apply(x, self.weight, self.eps)
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
                def reset_parameters(self) -> None:
         
     | 
| 43 | 
         
            +
                    """
         
     | 
| 44 | 
         
            +
                    Resets parameters based on their initialization used in __init__.
         
     | 
| 45 | 
         
            +
                    """
         
     | 
| 46 | 
         
            +
                    init.ones_(self.weight)
         
     | 
    	
        build/torch26-cxx98-cu124-x86_64-linux/activation/poly_norm.py
    ADDED
    
    | 
         @@ -0,0 +1,41 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from ._ops import ops
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Inherit from Function
         
     | 
| 7 | 
         
            +
            class PolyNormFunction(torch.autograd.Function):
         
     | 
| 8 | 
         
            +
                # Note that forward, setup_context, and backward are @staticmethods
         
     | 
| 9 | 
         
            +
                @staticmethod
         
     | 
| 10 | 
         
            +
                def forward(input, weight, bias, eps):
         
     | 
| 11 | 
         
            +
                    output = torch.empty_like(input)
         
     | 
| 12 | 
         
            +
                    ops.poly_norm(output, input, weight, bias, eps)
         
     | 
| 13 | 
         
            +
                    return output
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
                @staticmethod
         
     | 
| 16 | 
         
            +
                # inputs is a Tuple of all of the inputs passed to forward.
         
     | 
| 17 | 
         
            +
                # output is the output of the forward().
         
     | 
| 18 | 
         
            +
                def setup_context(ctx, inputs, output):
         
     | 
| 19 | 
         
            +
                    input, weight, bias, eps = inputs
         
     | 
| 20 | 
         
            +
                    ctx.save_for_backward(input, weight)
         
     | 
| 21 | 
         
            +
                    ctx.eps = eps
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                # This function has only a single output, so it gets only one gradient
         
     | 
| 24 | 
         
            +
                @staticmethod
         
     | 
| 25 | 
         
            +
                def backward(ctx, output_grad):
         
     | 
| 26 | 
         
            +
                    input, weight = ctx.saved_tensors
         
     | 
| 27 | 
         
            +
                    eps = ctx.eps
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                    input_grad = torch.empty_like(input) if ctx.needs_input_grad[0] else None
         
     | 
| 30 | 
         
            +
                    weight_grad = torch.empty_like(weight) if ctx.needs_input_grad[1] else None
         
     | 
| 31 | 
         
            +
                    bias_grad = (
         
     | 
| 32 | 
         
            +
                        torch.empty(1, dtype=weight.dtype, device=weight.device)
         
     | 
| 33 | 
         
            +
                        if ctx.needs_input_grad[2]
         
     | 
| 34 | 
         
            +
                        else None
         
     | 
| 35 | 
         
            +
                    )
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
                    ops.poly_norm_backward(
         
     | 
| 38 | 
         
            +
                        input_grad, weight_grad, bias_grad, output_grad, input, weight, eps
         
     | 
| 39 | 
         
            +
                    )
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
                    return input_grad, weight_grad, bias_grad, None
         
     | 
    	
        build/torch26-cxx98-cu124-x86_64-linux/activation/rms_norm.py
    ADDED
    
    | 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from ._ops import ops
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Inherit from Function
         
     | 
| 7 | 
         
            +
            class RMSNormFunction(torch.autograd.Function):
         
     | 
| 8 | 
         
            +
                # Note that forward, setup_context, and backward are @staticmethods
         
     | 
| 9 | 
         
            +
                @staticmethod
         
     | 
| 10 | 
         
            +
                def forward(input, weight, eps):
         
     | 
| 11 | 
         
            +
                    output = torch.empty_like(input)
         
     | 
| 12 | 
         
            +
                    ops.rms_norm(output, input, weight, eps)
         
     | 
| 13 | 
         
            +
                    return output
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
                @staticmethod
         
     | 
| 16 | 
         
            +
                # inputs is a Tuple of all of the inputs passed to forward.
         
     | 
| 17 | 
         
            +
                # output is the output of the forward().
         
     | 
| 18 | 
         
            +
                def setup_context(ctx, inputs, output):
         
     | 
| 19 | 
         
            +
                    input, weight, eps = inputs
         
     | 
| 20 | 
         
            +
                    ctx.save_for_backward(input, weight)
         
     | 
| 21 | 
         
            +
                    ctx.eps = eps
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                # This function has only a single output, so it gets only one gradient
         
     | 
| 24 | 
         
            +
                @staticmethod
         
     | 
| 25 | 
         
            +
                def backward(ctx, output_grad):
         
     | 
| 26 | 
         
            +
                    input, weight = ctx.saved_tensors
         
     | 
| 27 | 
         
            +
                    eps = ctx.eps
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                    input_grad = torch.empty_like(input) if ctx.needs_input_grad[0] else None
         
     | 
| 30 | 
         
            +
                    weight_grad = torch.empty_like(weight) if ctx.needs_input_grad[1] else None
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
                    ops.rms_norm_backward(input_grad, weight_grad, output_grad, input, weight, eps)
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
                    return input_grad, weight_grad, None
         
     | 
    	
        build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py
    ADDED
    
    | 
         @@ -0,0 +1,30 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from . import layers
         
     | 
| 4 | 
         
            +
            from ._ops import ops
         
     | 
| 5 | 
         
            +
            from .poly_norm import PolyNormFunction
         
     | 
| 6 | 
         
            +
            from .rms_norm import RMSNormFunction
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            def poly_norm(
         
     | 
| 10 | 
         
            +
                x: torch.Tensor,
         
     | 
| 11 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 12 | 
         
            +
                bias: torch.Tensor,
         
     | 
| 13 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 14 | 
         
            +
            ) -> None:
         
     | 
| 15 | 
         
            +
                return PolyNormFunction.apply(x, weight, bias, eps)
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            def rms_norm(
         
     | 
| 19 | 
         
            +
                x: torch.Tensor,
         
     | 
| 20 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 21 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 22 | 
         
            +
            ) -> None:
         
     | 
| 23 | 
         
            +
                return RMSNormFunction.apply(x, weight, eps)
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            __all__ = [
         
     | 
| 27 | 
         
            +
                "poly_norm",
         
     | 
| 28 | 
         
            +
                "layers",
         
     | 
| 29 | 
         
            +
                "ops",
         
     | 
| 30 | 
         
            +
            ]
         
     | 
    	
        build/torch26-cxx98-cu126-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:614ef2bf9867f65bf8e09d861def1c554d384676aa58dfbfd73bf96241cb7171
         
     | 
| 3 | 
         
            +
            size 2987456
         
     | 
    	
        build/torch26-cxx98-cu126-x86_64-linux/activation/_ops.py
    ADDED
    
    | 
         @@ -0,0 +1,9 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
            from . import _activation_605f22e_dirty
         
     | 
| 3 | 
         
            +
            ops = torch.ops._activation_605f22e_dirty
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            def add_op_namespace_prefix(op_name: str):
         
     | 
| 6 | 
         
            +
                """
         
     | 
| 7 | 
         
            +
                Prefix op by namespace.
         
     | 
| 8 | 
         
            +
                """
         
     | 
| 9 | 
         
            +
                return f"_activation_605f22e_dirty::{op_name}"
         
     | 
    	
        build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py
    ADDED
    
    | 
         @@ -0,0 +1,46 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
            import torch.nn as nn
         
     | 
| 3 | 
         
            +
            from torch.nn import init
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            from .poly_norm import PolyNormFunction
         
     | 
| 6 | 
         
            +
            from .rms_norm import RMSNormFunction
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            class PolyNorm(nn.Module):
         
     | 
| 10 | 
         
            +
                def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
         
     | 
| 11 | 
         
            +
                    super().__init__()
         
     | 
| 12 | 
         
            +
                    self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
         
     | 
| 13 | 
         
            +
                    self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
         
     | 
| 14 | 
         
            +
                    self.eps = eps
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
                def forward(
         
     | 
| 17 | 
         
            +
                    self,
         
     | 
| 18 | 
         
            +
                    x: torch.Tensor,
         
     | 
| 19 | 
         
            +
                ):
         
     | 
| 20 | 
         
            +
                    return PolyNormFunction.apply(x, self.weight, self.bias, self.eps)
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
                def reset_parameters(self) -> None:
         
     | 
| 23 | 
         
            +
                    """
         
     | 
| 24 | 
         
            +
                    Resets parameters based on their initialization used in __init__.
         
     | 
| 25 | 
         
            +
                    """
         
     | 
| 26 | 
         
            +
                    init.ones_(self.weight)
         
     | 
| 27 | 
         
            +
                    init.zeros_(self.bias)
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
            class RMSNorm(nn.Module):
         
     | 
| 31 | 
         
            +
                def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
         
     | 
| 32 | 
         
            +
                    super().__init__()
         
     | 
| 33 | 
         
            +
                    self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
         
     | 
| 34 | 
         
            +
                    self.eps = eps
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
                def forward(
         
     | 
| 37 | 
         
            +
                    self,
         
     | 
| 38 | 
         
            +
                    x: torch.Tensor,
         
     | 
| 39 | 
         
            +
                ):
         
     | 
| 40 | 
         
            +
                    return RMSNormFunction.apply(x, self.weight, self.eps)
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
                def reset_parameters(self) -> None:
         
     | 
| 43 | 
         
            +
                    """
         
     | 
| 44 | 
         
            +
                    Resets parameters based on their initialization used in __init__.
         
     | 
| 45 | 
         
            +
                    """
         
     | 
| 46 | 
         
            +
                    init.ones_(self.weight)
         
     | 
    	
        build/torch26-cxx98-cu126-x86_64-linux/activation/poly_norm.py
    ADDED
    
    | 
         @@ -0,0 +1,41 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from ._ops import ops
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Inherit from Function
         
     | 
| 7 | 
         
            +
            class PolyNormFunction(torch.autograd.Function):
         
     | 
| 8 | 
         
            +
                # Note that forward, setup_context, and backward are @staticmethods
         
     | 
| 9 | 
         
            +
                @staticmethod
         
     | 
| 10 | 
         
            +
                def forward(input, weight, bias, eps):
         
     | 
| 11 | 
         
            +
                    output = torch.empty_like(input)
         
     | 
| 12 | 
         
            +
                    ops.poly_norm(output, input, weight, bias, eps)
         
     | 
| 13 | 
         
            +
                    return output
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
                @staticmethod
         
     | 
| 16 | 
         
            +
                # inputs is a Tuple of all of the inputs passed to forward.
         
     | 
| 17 | 
         
            +
                # output is the output of the forward().
         
     | 
| 18 | 
         
            +
                def setup_context(ctx, inputs, output):
         
     | 
| 19 | 
         
            +
                    input, weight, bias, eps = inputs
         
     | 
| 20 | 
         
            +
                    ctx.save_for_backward(input, weight)
         
     | 
| 21 | 
         
            +
                    ctx.eps = eps
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                # This function has only a single output, so it gets only one gradient
         
     | 
| 24 | 
         
            +
                @staticmethod
         
     | 
| 25 | 
         
            +
                def backward(ctx, output_grad):
         
     | 
| 26 | 
         
            +
                    input, weight = ctx.saved_tensors
         
     | 
| 27 | 
         
            +
                    eps = ctx.eps
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                    input_grad = torch.empty_like(input) if ctx.needs_input_grad[0] else None
         
     | 
| 30 | 
         
            +
                    weight_grad = torch.empty_like(weight) if ctx.needs_input_grad[1] else None
         
     | 
| 31 | 
         
            +
                    bias_grad = (
         
     | 
| 32 | 
         
            +
                        torch.empty(1, dtype=weight.dtype, device=weight.device)
         
     | 
| 33 | 
         
            +
                        if ctx.needs_input_grad[2]
         
     | 
| 34 | 
         
            +
                        else None
         
     | 
| 35 | 
         
            +
                    )
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
                    ops.poly_norm_backward(
         
     | 
| 38 | 
         
            +
                        input_grad, weight_grad, bias_grad, output_grad, input, weight, eps
         
     | 
| 39 | 
         
            +
                    )
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
                    return input_grad, weight_grad, bias_grad, None
         
     | 
    	
        build/torch26-cxx98-cu126-x86_64-linux/activation/rms_norm.py
    ADDED
    
    | 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from ._ops import ops
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Inherit from Function
         
     | 
| 7 | 
         
            +
            class RMSNormFunction(torch.autograd.Function):
         
     | 
| 8 | 
         
            +
                # Note that forward, setup_context, and backward are @staticmethods
         
     | 
| 9 | 
         
            +
                @staticmethod
         
     | 
| 10 | 
         
            +
                def forward(input, weight, eps):
         
     | 
| 11 | 
         
            +
                    output = torch.empty_like(input)
         
     | 
| 12 | 
         
            +
                    ops.rms_norm(output, input, weight, eps)
         
     | 
| 13 | 
         
            +
                    return output
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
                @staticmethod
         
     | 
| 16 | 
         
            +
                # inputs is a Tuple of all of the inputs passed to forward.
         
     | 
| 17 | 
         
            +
                # output is the output of the forward().
         
     | 
| 18 | 
         
            +
                def setup_context(ctx, inputs, output):
         
     | 
| 19 | 
         
            +
                    input, weight, eps = inputs
         
     | 
| 20 | 
         
            +
                    ctx.save_for_backward(input, weight)
         
     | 
| 21 | 
         
            +
                    ctx.eps = eps
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                # This function has only a single output, so it gets only one gradient
         
     | 
| 24 | 
         
            +
                @staticmethod
         
     | 
| 25 | 
         
            +
                def backward(ctx, output_grad):
         
     | 
| 26 | 
         
            +
                    input, weight = ctx.saved_tensors
         
     | 
| 27 | 
         
            +
                    eps = ctx.eps
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                    input_grad = torch.empty_like(input) if ctx.needs_input_grad[0] else None
         
     | 
| 30 | 
         
            +
                    weight_grad = torch.empty_like(weight) if ctx.needs_input_grad[1] else None
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
                    ops.rms_norm_backward(input_grad, weight_grad, output_grad, input, weight, eps)
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
                    return input_grad, weight_grad, None
         
     | 
    	
        build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py
    ADDED
    
    | 
         @@ -0,0 +1,30 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from . import layers
         
     | 
| 4 | 
         
            +
            from ._ops import ops
         
     | 
| 5 | 
         
            +
            from .poly_norm import PolyNormFunction
         
     | 
| 6 | 
         
            +
            from .rms_norm import RMSNormFunction
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            def poly_norm(
         
     | 
| 10 | 
         
            +
                x: torch.Tensor,
         
     | 
| 11 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 12 | 
         
            +
                bias: torch.Tensor,
         
     | 
| 13 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 14 | 
         
            +
            ) -> None:
         
     | 
| 15 | 
         
            +
                return PolyNormFunction.apply(x, weight, bias, eps)
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            def rms_norm(
         
     | 
| 19 | 
         
            +
                x: torch.Tensor,
         
     | 
| 20 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 21 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 22 | 
         
            +
            ) -> None:
         
     | 
| 23 | 
         
            +
                return RMSNormFunction.apply(x, weight, eps)
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            __all__ = [
         
     | 
| 27 | 
         
            +
                "poly_norm",
         
     | 
| 28 | 
         
            +
                "layers",
         
     | 
| 29 | 
         
            +
                "ops",
         
     | 
| 30 | 
         
            +
            ]
         
     | 
    	
        build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_605f22e_dirty.abi3.so
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:654d16d66565185dfd1a6f16e0b24d8fff83e12558c8862c322734e6b52e5cc0
         
     | 
| 3 | 
         
            +
            size 2957448
         
     | 
    	
        build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py
    ADDED
    
    | 
         @@ -0,0 +1,9 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
            from . import _activation_605f22e_dirty
         
     | 
| 3 | 
         
            +
            ops = torch.ops._activation_605f22e_dirty
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            def add_op_namespace_prefix(op_name: str):
         
     | 
| 6 | 
         
            +
                """
         
     | 
| 7 | 
         
            +
                Prefix op by namespace.
         
     | 
| 8 | 
         
            +
                """
         
     | 
| 9 | 
         
            +
                return f"_activation_605f22e_dirty::{op_name}"
         
     | 
    	
        build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py
    ADDED
    
    | 
         @@ -0,0 +1,46 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
            import torch.nn as nn
         
     | 
| 3 | 
         
            +
            from torch.nn import init
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            from .poly_norm import PolyNormFunction
         
     | 
| 6 | 
         
            +
            from .rms_norm import RMSNormFunction
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            class PolyNorm(nn.Module):
         
     | 
| 10 | 
         
            +
                def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
         
     | 
| 11 | 
         
            +
                    super().__init__()
         
     | 
| 12 | 
         
            +
                    self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
         
     | 
| 13 | 
         
            +
                    self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
         
     | 
| 14 | 
         
            +
                    self.eps = eps
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
                def forward(
         
     | 
| 17 | 
         
            +
                    self,
         
     | 
| 18 | 
         
            +
                    x: torch.Tensor,
         
     | 
| 19 | 
         
            +
                ):
         
     | 
| 20 | 
         
            +
                    return PolyNormFunction.apply(x, self.weight, self.bias, self.eps)
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
                def reset_parameters(self) -> None:
         
     | 
| 23 | 
         
            +
                    """
         
     | 
| 24 | 
         
            +
                    Resets parameters based on their initialization used in __init__.
         
     | 
| 25 | 
         
            +
                    """
         
     | 
| 26 | 
         
            +
                    init.ones_(self.weight)
         
     | 
| 27 | 
         
            +
                    init.zeros_(self.bias)
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
            class RMSNorm(nn.Module):
         
     | 
| 31 | 
         
            +
                def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
         
     | 
| 32 | 
         
            +
                    super().__init__()
         
     | 
| 33 | 
         
            +
                    self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
         
     | 
| 34 | 
         
            +
                    self.eps = eps
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
                def forward(
         
     | 
| 37 | 
         
            +
                    self,
         
     | 
| 38 | 
         
            +
                    x: torch.Tensor,
         
     | 
| 39 | 
         
            +
                ):
         
     | 
| 40 | 
         
            +
                    return RMSNormFunction.apply(x, self.weight, self.eps)
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
                def reset_parameters(self) -> None:
         
     | 
| 43 | 
         
            +
                    """
         
     | 
| 44 | 
         
            +
                    Resets parameters based on their initialization used in __init__.
         
     | 
| 45 | 
         
            +
                    """
         
     | 
| 46 | 
         
            +
                    init.ones_(self.weight)
         
     | 
    	
        build/torch27-cxx11-cu118-x86_64-linux/activation/poly_norm.py
    ADDED
    
    | 
         @@ -0,0 +1,41 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from ._ops import ops
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Inherit from Function
         
     | 
| 7 | 
         
            +
            class PolyNormFunction(torch.autograd.Function):
         
     | 
| 8 | 
         
            +
                # Note that forward, setup_context, and backward are @staticmethods
         
     | 
| 9 | 
         
            +
                @staticmethod
         
     | 
| 10 | 
         
            +
                def forward(input, weight, bias, eps):
         
     | 
| 11 | 
         
            +
                    output = torch.empty_like(input)
         
     | 
| 12 | 
         
            +
                    ops.poly_norm(output, input, weight, bias, eps)
         
     | 
| 13 | 
         
            +
                    return output
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
                @staticmethod
         
     | 
| 16 | 
         
            +
                # inputs is a Tuple of all of the inputs passed to forward.
         
     | 
| 17 | 
         
            +
                # output is the output of the forward().
         
     | 
| 18 | 
         
            +
                def setup_context(ctx, inputs, output):
         
     | 
| 19 | 
         
            +
                    input, weight, bias, eps = inputs
         
     | 
| 20 | 
         
            +
                    ctx.save_for_backward(input, weight)
         
     | 
| 21 | 
         
            +
                    ctx.eps = eps
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                # This function has only a single output, so it gets only one gradient
         
     | 
| 24 | 
         
            +
                @staticmethod
         
     | 
| 25 | 
         
            +
                def backward(ctx, output_grad):
         
     | 
| 26 | 
         
            +
                    input, weight = ctx.saved_tensors
         
     | 
| 27 | 
         
            +
                    eps = ctx.eps
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                    input_grad = torch.empty_like(input) if ctx.needs_input_grad[0] else None
         
     | 
| 30 | 
         
            +
                    weight_grad = torch.empty_like(weight) if ctx.needs_input_grad[1] else None
         
     | 
| 31 | 
         
            +
                    bias_grad = (
         
     | 
| 32 | 
         
            +
                        torch.empty(1, dtype=weight.dtype, device=weight.device)
         
     | 
| 33 | 
         
            +
                        if ctx.needs_input_grad[2]
         
     | 
| 34 | 
         
            +
                        else None
         
     | 
| 35 | 
         
            +
                    )
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
                    ops.poly_norm_backward(
         
     | 
| 38 | 
         
            +
                        input_grad, weight_grad, bias_grad, output_grad, input, weight, eps
         
     | 
| 39 | 
         
            +
                    )
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
                    return input_grad, weight_grad, bias_grad, None
         
     | 
    	
        build/torch27-cxx11-cu118-x86_64-linux/activation/rms_norm.py
    ADDED
    
    | 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from ._ops import ops
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            # Inherit from Function
         
     | 
| 7 | 
         
            +
            class RMSNormFunction(torch.autograd.Function):
         
     | 
| 8 | 
         
            +
                # Note that forward, setup_context, and backward are @staticmethods
         
     | 
| 9 | 
         
            +
                @staticmethod
         
     | 
| 10 | 
         
            +
                def forward(input, weight, eps):
         
     | 
| 11 | 
         
            +
                    output = torch.empty_like(input)
         
     | 
| 12 | 
         
            +
                    ops.rms_norm(output, input, weight, eps)
         
     | 
| 13 | 
         
            +
                    return output
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
                @staticmethod
         
     | 
| 16 | 
         
            +
                # inputs is a Tuple of all of the inputs passed to forward.
         
     | 
| 17 | 
         
            +
                # output is the output of the forward().
         
     | 
| 18 | 
         
            +
                def setup_context(ctx, inputs, output):
         
     | 
| 19 | 
         
            +
                    input, weight, eps = inputs
         
     | 
| 20 | 
         
            +
                    ctx.save_for_backward(input, weight)
         
     | 
| 21 | 
         
            +
                    ctx.eps = eps
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                # This function has only a single output, so it gets only one gradient
         
     | 
| 24 | 
         
            +
                @staticmethod
         
     | 
| 25 | 
         
            +
                def backward(ctx, output_grad):
         
     | 
| 26 | 
         
            +
                    input, weight = ctx.saved_tensors
         
     | 
| 27 | 
         
            +
                    eps = ctx.eps
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                    input_grad = torch.empty_like(input) if ctx.needs_input_grad[0] else None
         
     | 
| 30 | 
         
            +
                    weight_grad = torch.empty_like(weight) if ctx.needs_input_grad[1] else None
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
                    ops.rms_norm_backward(input_grad, weight_grad, output_grad, input, weight, eps)
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
                    return input_grad, weight_grad, None
         
     | 
    	
        build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py
    ADDED
    
    | 
         @@ -0,0 +1,30 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import torch
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            from . import layers
         
     | 
| 4 | 
         
            +
            from ._ops import ops
         
     | 
| 5 | 
         
            +
            from .poly_norm import PolyNormFunction
         
     | 
| 6 | 
         
            +
            from .rms_norm import RMSNormFunction
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            def poly_norm(
         
     | 
| 10 | 
         
            +
                x: torch.Tensor,
         
     | 
| 11 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 12 | 
         
            +
                bias: torch.Tensor,
         
     | 
| 13 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 14 | 
         
            +
            ) -> None:
         
     | 
| 15 | 
         
            +
                return PolyNormFunction.apply(x, weight, bias, eps)
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            def rms_norm(
         
     | 
| 19 | 
         
            +
                x: torch.Tensor,
         
     | 
| 20 | 
         
            +
                weight: torch.Tensor,
         
     | 
| 21 | 
         
            +
                eps: float = 1e-6,
         
     | 
| 22 | 
         
            +
            ) -> None:
         
     | 
| 23 | 
         
            +
                return RMSNormFunction.apply(x, weight, eps)
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            __all__ = [
         
     | 
| 27 | 
         
            +
                "poly_norm",
         
     | 
| 28 | 
         
            +
                "layers",
         
     | 
| 29 | 
         
            +
                "ops",
         
     | 
| 30 | 
         
            +
            ]
         
     |