Commit 
							
							·
						
						040a791
	
1
								Parent(s):
							
							4f344d1
								
Update README.md
Browse files
    	
        README.md
    CHANGED
    
    | 
         @@ -13,224 +13,8 @@ inference: false 
     | 
|
| 13 | 
         | 
| 14 | 
         
             
            model-index:
         
     | 
| 15 | 
         
             
            - name: SantaCoder-metal
         
     | 
| 16 | 
         
            -
              results:
         
     | 
| 17 | 
         
            -
              - task:
         
     | 
| 18 | 
         
            -
                  type: text-generation
         
     | 
| 19 | 
         
            -
                dataset:
         
     | 
| 20 | 
         
            -
                  type: nuprl/MultiPL-E
         
     | 
| 21 | 
         
            -
                  name: MultiPL HumanEval (Python)
         
     | 
| 22 | 
         
            -
                metrics:
         
     | 
| 23 | 
         
            -
                - name: pass@1
         
     | 
| 24 | 
         
            -
                  type: pass@1
         
     | 
| 25 | 
         
            -
                  value: 0.18
         
     | 
| 26 | 
         
            -
                  verified: false
         
     | 
| 27 | 
         
            -
                - name: pass@10
         
     | 
| 28 | 
         
            -
                  type: pass@10
         
     | 
| 29 | 
         
            -
                  value: 0.29
         
     | 
| 30 | 
         
            -
                  verified: false
         
     | 
| 31 | 
         
            -
                - name: pass@100
         
     | 
| 32 | 
         
            -
                  type: pass@100
         
     | 
| 33 | 
         
            -
                  value: 0.49
         
     | 
| 34 | 
         
            -
                  verified: false
         
     | 
| 35 | 
         
            -
              - task:
         
     | 
| 36 | 
         
            -
                  type: text-generation
         
     | 
| 37 | 
         
            -
                dataset:
         
     | 
| 38 | 
         
            -
                  type: nuprl/MultiPL-E
         
     | 
| 39 | 
         
            -
                  name: MultiPL MBPP (Python)
         
     | 
| 40 | 
         
            -
                metrics:
         
     | 
| 41 | 
         
            -
                - name: pass@1
         
     | 
| 42 | 
         
            -
                  type: pass@1
         
     | 
| 43 | 
         
            -
                  value: 0.35
         
     | 
| 44 | 
         
            -
                  verified: false
         
     | 
| 45 | 
         
            -
                - name: pass@10
         
     | 
| 46 | 
         
            -
                  type: pass@10
         
     | 
| 47 | 
         
            -
                  value: 0.58
         
     | 
| 48 | 
         
            -
                  verified: false
         
     | 
| 49 | 
         
            -
                - name: pass@100
         
     | 
| 50 | 
         
            -
                  type: pass@100
         
     | 
| 51 | 
         
            -
                  value: 0.77
         
     | 
| 52 | 
         
            -
                  verified: false
         
     | 
| 53 | 
         
            -
              - task:
         
     | 
| 54 | 
         
            -
                  type: text-generation
         
     | 
| 55 | 
         
            -
                dataset:
         
     | 
| 56 | 
         
            -
                  type: nuprl/MultiPL-E
         
     | 
| 57 | 
         
            -
                  name: MultiPL HumanEval (JavaScript)
         
     | 
| 58 | 
         
            -
                metrics:
         
     | 
| 59 | 
         
            -
                - name: pass@1
         
     | 
| 60 | 
         
            -
                  type: pass@1
         
     | 
| 61 | 
         
            -
                  value: 0.16
         
     | 
| 62 | 
         
            -
                  verified: false
         
     | 
| 63 | 
         
            -
                - name: pass@10
         
     | 
| 64 | 
         
            -
                  type: pass@10
         
     | 
| 65 | 
         
            -
                  value: 0.27
         
     | 
| 66 | 
         
            -
                  verified: false
         
     | 
| 67 | 
         
            -
                - name: pass@100
         
     | 
| 68 | 
         
            -
                  type: pass@100
         
     | 
| 69 | 
         
            -
                  value: 0.47
         
     | 
| 70 | 
         
            -
                  verified: false
         
     | 
| 71 | 
         
            -
              - task:
         
     | 
| 72 | 
         
            -
                  type: text-generation
         
     | 
| 73 | 
         
            -
                dataset:
         
     | 
| 74 | 
         
            -
                  type: nuprl/MultiPL-E
         
     | 
| 75 | 
         
            -
                  name: MultiPL MBPP (Javascript)
         
     | 
| 76 | 
         
            -
                metrics:
         
     | 
| 77 | 
         
            -
                - name: pass@1
         
     | 
| 78 | 
         
            -
                  type: pass@1
         
     | 
| 79 | 
         
            -
                  value: 0.28
         
     | 
| 80 | 
         
            -
                  verified: false
         
     | 
| 81 | 
         
            -
                - name: pass@10
         
     | 
| 82 | 
         
            -
                  type: pass@10
         
     | 
| 83 | 
         
            -
                  value: 0.51
         
     | 
| 84 | 
         
            -
                  verified: false
         
     | 
| 85 | 
         
            -
                - name: pass@100
         
     | 
| 86 | 
         
            -
                  type: pass@100
         
     | 
| 87 | 
         
            -
                  value: 0.70
         
     | 
| 88 | 
         
            -
                  verified: false
         
     | 
| 89 | 
         
            -
              - task:
         
     | 
| 90 | 
         
            -
                  type: text-generation
         
     | 
| 91 | 
         
            -
                dataset:
         
     | 
| 92 | 
         
            -
                  type: nuprl/MultiPL-E
         
     | 
| 93 | 
         
            -
                  name: MultiPL HumanEval (Java)
         
     | 
| 94 | 
         
            -
                metrics:
         
     | 
| 95 | 
         
            -
                - name: pass@1
         
     | 
| 96 | 
         
            -
                  type: pass@1
         
     | 
| 97 | 
         
            -
                  value: 0.15
         
     | 
| 98 | 
         
            -
                  verified: false
         
     | 
| 99 | 
         
            -
                - name: pass@10
         
     | 
| 100 | 
         
            -
                  type: pass@10
         
     | 
| 101 | 
         
            -
                  value: 0.26
         
     | 
| 102 | 
         
            -
                  verified: false
         
     | 
| 103 | 
         
            -
                - name: pass@100
         
     | 
| 104 | 
         
            -
                  type: pass@100
         
     | 
| 105 | 
         
            -
                  value: 0.41
         
     | 
| 106 | 
         
            -
                  verified: false
         
     | 
| 107 | 
         
            -
              - task:
         
     | 
| 108 | 
         
            -
                  type: text-generation
         
     | 
| 109 | 
         
            -
                dataset:
         
     | 
| 110 | 
         
            -
                  type: nuprl/MultiPL-E
         
     | 
| 111 | 
         
            -
                  name: MultiPL MBPP (Java)
         
     | 
| 112 | 
         
            -
                metrics:
         
     | 
| 113 | 
         
            -
                - name: pass@1
         
     | 
| 114 | 
         
            -
                  type: pass@1
         
     | 
| 115 | 
         
            -
                  value: 0.28
         
     | 
| 116 | 
         
            -
                  verified: false
         
     | 
| 117 | 
         
            -
                - name: pass@10
         
     | 
| 118 | 
         
            -
                  type: pass@10
         
     | 
| 119 | 
         
            -
                  value: 0.44
         
     | 
| 120 | 
         
            -
                  verified: false
         
     | 
| 121 | 
         
            -
                - name: pass@100
         
     | 
| 122 | 
         
            -
                  type: pass@100
         
     | 
| 123 | 
         
            -
                  value: 0.59
         
     | 
| 124 | 
         
            -
                  verified: false
         
     | 
| 125 | 
         
            -
              - task:
         
     | 
| 126 | 
         
            -
                  type: text-generation
         
     | 
| 127 | 
         
            -
                dataset:
         
     | 
| 128 | 
         
            -
                  type: loubnabnl/humaneval_infilling
         
     | 
| 129 | 
         
            -
                  name: HumanEval FIM (Python)
         
     | 
| 130 | 
         
            -
                metrics:
         
     | 
| 131 | 
         
            -
                - name: single_line
         
     | 
| 132 | 
         
            -
                  type: exact_match
         
     | 
| 133 | 
         
            -
                  value: 0.44
         
     | 
| 134 | 
         
            -
                  verified: false
         
     | 
| 135 | 
         
            -
              - task:
         
     | 
| 136 | 
         
            -
                  type: text-generation
         
     | 
| 137 | 
         
            -
                dataset:
         
     | 
| 138 | 
         
            -
                  type: nuprl/MultiPL-E
         
     | 
| 139 | 
         
            -
                  name: MultiPL HumanEval FIM (Java)
         
     | 
| 140 | 
         
            -
                metrics:
         
     | 
| 141 | 
         
            -
                - name: single_line
         
     | 
| 142 | 
         
            -
                  type: exact_match
         
     | 
| 143 | 
         
            -
                  value: 0.62
         
     | 
| 144 | 
         
            -
                  verified: false
         
     | 
| 145 | 
         
            -
              - task:
         
     | 
| 146 | 
         
            -
                  type: text-generation
         
     | 
| 147 | 
         
            -
                dataset:
         
     | 
| 148 | 
         
            -
                  type: nuprl/MultiPL-E
         
     | 
| 149 | 
         
            -
                  name: MultiPL HumanEval FIM (JavaScript)
         
     | 
| 150 | 
         
            -
                metrics:
         
     | 
| 151 | 
         
            -
                - name: single_line
         
     | 
| 152 | 
         
            -
                  type: exact_match
         
     | 
| 153 | 
         
            -
                  value: 0.60
         
     | 
| 154 | 
         
            -
                  verified: false
         
     | 
| 155 | 
         
            -
              - task:
         
     | 
| 156 | 
         
            -
                  type: text-generation
         
     | 
| 157 | 
         
            -
                dataset:
         
     | 
| 158 | 
         
            -
                  type: code_x_glue_ct_code_to_text
         
     | 
| 159 | 
         
            -
                  name: CodeXGLUE code-to-text (Python)
         
     | 
| 160 | 
         
            -
                metrics:
         
     | 
| 161 | 
         
            -
                - name: BLEU
         
     | 
| 162 | 
         
            -
                  type: bleu
         
     | 
| 163 | 
         
            -
                  value: 18.13
         
     | 
| 164 | 
         
            -
                  verified: false
         
     | 
| 165 | 
         
            -
            ---
         
     | 
| 166 | 
         | 
| 167 | 
         
             
            # 🎅🎸SantaCoder-metal
         
     | 
| 168 | 
         | 
| 169 | 
         
            -
             
     | 
| 170 | 
         
            -
             
     | 
| 171 | 
         
            -
            Play with the model on the [SantaCoder Space Demo](https://huggingface.co/spaces/bigcode/santacoder-demo).
         
     | 
| 172 | 
         
            -
             
     | 
| 173 | 
         
            -
            #  Table of Contents
         
     | 
| 174 | 
         
            -
             
     | 
| 175 | 
         
            -
            1. [Model Summary](#model-summary)
         
     | 
| 176 | 
         
            -
            2. [Use](#use)
         
     | 
| 177 | 
         
            -
            3. [Limitations](#limitations)
         
     | 
| 178 | 
         
            -
            4. [Training](#training)
         
     | 
| 179 | 
         
            -
            5. [License](#license)
         
     | 
| 180 | 
         
            -
            6. [Citation](#citation)
         
     | 
| 181 | 
         
            -
             
     | 
| 182 | 
         
            -
            # Model Summary
         
     | 
| 183 | 
         
            -
             
     | 
| 184 | 
         
            -
            This is the same model as [SantaCoder](https://huggingface.co/bigcode/santacoder) but it can be loaded with transformers >=4.28.1 to use the GPTBigCode architecture.
         
     | 
| 185 | 
         
            -
            We refer the reader to the [SantaCoder model page](https://huggingface.co/bigcode/santacoder) for full documentation about this model
         
     | 
| 186 | 
         
            -
             
     | 
| 187 | 
         
            -
             
     | 
| 188 | 
         
            -
            - **Repository:** [bigcode/Megatron-LM](https://github.com/bigcode-project/Megatron-LM)
         
     | 
| 189 | 
         
            -
            - **Project Website:** [bigcode-project.org](www.bigcode-project.org)
         
     | 
| 190 | 
         
            -
            - **Paper:** [🎅SantaCoder: Don't reach for the stars!🌟](https://t.co/YV3pzUbYOr)
         
     | 
| 191 | 
         
            -
            - **Point of Contact:** [[email protected]](mailto:[email protected])
         
     | 
| 192 | 
         
            -
            - **Languages:** Python, Java, and JavaScript
         
     | 
| 193 | 
         
            -
             
     | 
| 194 | 
         
            -
            There are two versions (branches) of the model:
         
     | 
| 195 | 
         
            -
            * `main`: Uses the `gpt_bigcode` model. [Requires the bigcode fork of transformers](https://github.com/bigcode-project/transformers).
         
     | 
| 196 | 
         
            -
            * `main_custom`: Packaged with its modeling code. Requires `transformers>=4.27`.
         
     | 
| 197 | 
         
            -
              Alternatively, it can run on older versions by setting the configuration parameter `activation_function = "gelu_pytorch_tanh"`.
         
     | 
| 198 | 
         
            -
             
     | 
| 199 | 
         
            -
            # Use
         
     | 
| 200 | 
         
            -
             
     | 
| 201 | 
         
            -
            ## Intended use
         
     | 
| 202 | 
         
            -
             
     | 
| 203 | 
         
            -
            The model was trained on GitHub code. As such it is _not_ an instruction model and commands like "Write a function that computes the square root." do not work well.
         
     | 
| 204 | 
         
            -
            You should phrase commands like they occur in source code such as comments (e.g. `# the following function computes the sqrt`) or write a function signature and docstring and let the model complete the function body.
         
     | 
| 205 | 
         
            -
             
     | 
| 206 | 
         
            -
            ### Attribution & Other Requirements
         
     | 
| 207 | 
         
            -
             
     | 
| 208 | 
         
            -
            The pretraining dataset of the model was filtered for permissive licenses only. Nevertheless, the model can generate source code verbatim from the dataset. The code's license might require attribution and/or other specific requirements that must be respected. We provide a [search index](https://huggingface.co/spaces/bigcode/santacoder-search) that let's you search through the pretraining data to identify where generated code came from and apply the proper attribution to your code.
         
     | 
| 209 | 
         
            -
             
     | 
| 210 | 
         
            -
            # Limitations
         
     | 
| 211 | 
         
            -
             
     | 
| 212 | 
         
            -
            The model has been trained on source code in Python, Java, and JavaScript. The predominant language in source is English although other languages are also present. As such the model is capable to generate code snippets provided some context but the generated code is not guaranteed to work as intended. It can be inefficient, contain bugs or exploits.
         
     | 
| 213 | 
         
            -
             
     | 
| 214 | 
         
            -
            # Training
         
     | 
| 215 | 
         
            -
             
     | 
| 216 | 
         
            -
            ## Model
         
     | 
| 217 | 
         
            -
             
     | 
| 218 | 
         
            -
            - **Architecture:** GPT-2 model with multi-query attention and Fill-in-the-Middle objective
         
     | 
| 219 | 
         
            -
            - **Pretraining steps:** 600K
         
     | 
| 220 | 
         
            -
            - **Pretraining tokens:** 236 billion
         
     | 
| 221 | 
         
            -
            - **Precision:** float16
         
     | 
| 222 | 
         
            -
             
     | 
| 223 | 
         
            -
            ## Hardware
         
     | 
| 224 | 
         
            -
             
     | 
| 225 | 
         
            -
            - **GPUs:** 96 Tesla V100
         
     | 
| 226 | 
         
            -
            - **Training time:** 6.2 days
         
     | 
| 227 | 
         
            -
            - **Total FLOPS:** 2.1 x 10e21
         
     | 
| 228 | 
         
            -
             
     | 
| 229 | 
         
            -
            ## Software
         
     | 
| 230 | 
         
            -
             
     | 
| 231 | 
         
            -
            - **Orchestration:** [Megatron-LM](https://github.com/bigcode-project/Megatron-LM)
         
     | 
| 232 | 
         
            -
            - **Neural networks:** [PyTorch](https://github.com/pytorch/pytorch)
         
     | 
| 233 | 
         
            -
            - **FP16 if applicable:** [apex](https://github.com/NVIDIA/apex)
         
     | 
| 234 | 
         | 
| 235 | 
         
            -
            # License
         
     | 
| 236 | 
         
            -
            The model is licenses under the CodeML Open RAIL-M v0.1 license. You can find the full license [here](https://huggingface.co/spaces/bigcode/license).
         
     | 
| 
         | 
|
| 13 | 
         | 
| 14 | 
         
             
            model-index:
         
     | 
| 15 | 
         
             
            - name: SantaCoder-metal
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 16 | 
         | 
| 17 | 
         
             
            # 🎅🎸SantaCoder-metal
         
     | 
| 18 | 
         | 
| 19 | 
         
            +
            This is a quantized gpt_bigcode/stantacode model of mlc-llm running the gpt_bigcode models on Apple GPU (Metal).
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 20 | 
         | 
| 
         | 
|
| 
         |