Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	Update eva_clip/eva_vit_model.py
Browse files- eva_clip/eva_vit_model.py +90 -5
    	
        eva_clip/eva_vit_model.py
    CHANGED
    
    | @@ -4,14 +4,13 @@ | |
| 4 | 
             
            import math
         | 
| 5 | 
             
            import os
         | 
| 6 | 
             
            from functools import partial
         | 
|  | |
|  | |
| 7 | 
             
            import torch
         | 
| 8 | 
             
            import torch.nn as nn
         | 
|  | |
| 9 | 
             
            import torch.nn.functional as F
         | 
| 10 | 
            -
             | 
| 11 | 
            -
                from timm.models.layers import drop_path, to_2tuple, trunc_normal_
         | 
| 12 | 
            -
            except:
         | 
| 13 | 
            -
                from timm.layers import drop_path, to_2tuple, trunc_normal_
         | 
| 14 | 
            -
                
         | 
| 15 | 
             
            from .transformer import PatchDropout
         | 
| 16 | 
             
            from .rope import VisionRotaryEmbedding, VisionRotaryEmbeddingFast
         | 
| 17 |  | 
| @@ -30,6 +29,92 @@ try: | |
| 30 | 
             
            except:
         | 
| 31 | 
             
                XFORMERS_IS_AVAILBLE = False
         | 
| 32 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 33 | 
             
            class DropPath(nn.Module):
         | 
| 34 | 
             
                """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
         | 
| 35 | 
             
                """
         | 
|  | |
| 4 | 
             
            import math
         | 
| 5 | 
             
            import os
         | 
| 6 | 
             
            from functools import partial
         | 
| 7 | 
            +
            from itertools import repeat
         | 
| 8 | 
            +
            import collections.abc
         | 
| 9 | 
             
            import torch
         | 
| 10 | 
             
            import torch.nn as nn
         | 
| 11 | 
            +
            import warnings
         | 
| 12 | 
             
            import torch.nn.functional as F
         | 
| 13 | 
            +
             | 
|  | |
|  | |
|  | |
|  | |
| 14 | 
             
            from .transformer import PatchDropout
         | 
| 15 | 
             
            from .rope import VisionRotaryEmbedding, VisionRotaryEmbeddingFast
         | 
| 16 |  | 
|  | |
| 29 | 
             
            except:
         | 
| 30 | 
             
                XFORMERS_IS_AVAILBLE = False
         | 
| 31 |  | 
| 32 | 
            +
             | 
| 33 | 
            +
            def _ntuple(n):
         | 
| 34 | 
            +
                def parse(x):
         | 
| 35 | 
            +
                    if isinstance(x, collections.abc.Iterable):
         | 
| 36 | 
            +
                        return x
         | 
| 37 | 
            +
                    return tuple(repeat(x, n))
         | 
| 38 | 
            +
                return parse
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            to_2tuple = _ntuple(2)
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            def _no_grad_trunc_normal_(tensor, mean, std, a, b):
         | 
| 43 | 
            +
                # Cut & paste from PyTorch official master until it's in a few official releases - RW
         | 
| 44 | 
            +
                # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
         | 
| 45 | 
            +
                def norm_cdf(x):
         | 
| 46 | 
            +
                    # Computes standard normal cumulative distribution function
         | 
| 47 | 
            +
                    return (1. + math.erf(x / math.sqrt(2.))) / 2.
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                if (mean < a - 2 * std) or (mean > b + 2 * std):
         | 
| 50 | 
            +
                    warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
         | 
| 51 | 
            +
                                  "The distribution of values may be incorrect.",
         | 
| 52 | 
            +
                                  stacklevel=2)
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                with torch.no_grad():
         | 
| 55 | 
            +
                    # Values are generated by using a truncated uniform distribution and
         | 
| 56 | 
            +
                    # then using the inverse CDF for the normal distribution.
         | 
| 57 | 
            +
                    # Get upper and lower cdf values
         | 
| 58 | 
            +
                    l = norm_cdf((a - mean) / std)
         | 
| 59 | 
            +
                    u = norm_cdf((b - mean) / std)
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                    # Uniformly fill tensor with values from [l, u], then translate to
         | 
| 62 | 
            +
                    # [2l-1, 2u-1].
         | 
| 63 | 
            +
                    tensor.uniform_(2 * l - 1, 2 * u - 1)
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                    # Use inverse cdf transform for normal distribution to get truncated
         | 
| 66 | 
            +
                    # standard normal
         | 
| 67 | 
            +
                    tensor.erfinv_()
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                    # Transform to proper mean, std
         | 
| 70 | 
            +
                    tensor.mul_(std * math.sqrt(2.))
         | 
| 71 | 
            +
                    tensor.add_(mean)
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                    # Clamp to ensure it's in the proper range
         | 
| 74 | 
            +
                    tensor.clamp_(min=a, max=b)
         | 
| 75 | 
            +
                    return tensor
         | 
| 76 | 
            +
             | 
| 77 | 
            +
             | 
| 78 | 
            +
            def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
         | 
| 79 | 
            +
                # type: (Tensor, float, float, float, float) -> Tensor
         | 
| 80 | 
            +
                r"""Fills the input Tensor with values drawn from a truncated
         | 
| 81 | 
            +
                normal distribution. The values are effectively drawn from the
         | 
| 82 | 
            +
                normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
         | 
| 83 | 
            +
                with values outside :math:`[a, b]` redrawn until they are within
         | 
| 84 | 
            +
                the bounds. The method used for generating the random values works
         | 
| 85 | 
            +
                best when :math:`a \leq \text{mean} \leq b`.
         | 
| 86 | 
            +
                Args:
         | 
| 87 | 
            +
                    tensor: an n-dimensional `torch.Tensor`
         | 
| 88 | 
            +
                    mean: the mean of the normal distribution
         | 
| 89 | 
            +
                    std: the standard deviation of the normal distribution
         | 
| 90 | 
            +
                    a: the minimum cutoff value
         | 
| 91 | 
            +
                    b: the maximum cutoff value
         | 
| 92 | 
            +
                Examples:
         | 
| 93 | 
            +
                    >>> w = torch.empty(3, 5)
         | 
| 94 | 
            +
                    >>> nn.init.trunc_normal_(w)
         | 
| 95 | 
            +
                """
         | 
| 96 | 
            +
                return _no_grad_trunc_normal_(tensor, mean, std, a, b)
         | 
| 97 | 
            +
             | 
| 98 | 
            +
            def drop_path(x, drop_prob: float = 0., training: bool = False, scale_by_keep: bool = True):
         | 
| 99 | 
            +
                """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
         | 
| 102 | 
            +
                the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
         | 
| 103 | 
            +
                See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
         | 
| 104 | 
            +
                changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
         | 
| 105 | 
            +
                'survival rate' as the argument.
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                """
         | 
| 108 | 
            +
                if drop_prob == 0. or not training:
         | 
| 109 | 
            +
                    return x
         | 
| 110 | 
            +
                keep_prob = 1 - drop_prob
         | 
| 111 | 
            +
                shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
         | 
| 112 | 
            +
                random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
         | 
| 113 | 
            +
                if keep_prob > 0.0 and scale_by_keep:
         | 
| 114 | 
            +
                    random_tensor.div_(keep_prob)
         | 
| 115 | 
            +
                return x * random_tensor
         | 
| 116 | 
            +
             | 
| 117 | 
            +
             | 
| 118 | 
             
            class DropPath(nn.Module):
         | 
| 119 | 
             
                """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
         | 
| 120 | 
             
                """
         |