Bram Vanroy commited on
Commit
08f6f54
·
1 Parent(s): ff5f71b

update models.json

Browse files
Files changed (1) hide show
  1. evals/models.json +21 -45
evals/models.json CHANGED
@@ -3,120 +3,96 @@
3
  "compute_dtype": "bfloat16",
4
  "model_name": "yhavinga/gpt-neo-1.3B-dutch",
5
  "num_parameters": 1315575808,
6
- "quantization": "8-bit",
7
- "model_type": "pretrained",
8
- "dutch_coverage": "pretrained"
9
  },
10
  "gpt-neo-125m-dutch": {
11
  "compute_dtype": "bfloat16",
12
  "model_name": "yhavinga/gpt-neo-125M-dutch",
13
  "num_parameters": 125198592,
14
- "quantization": "8-bit",
15
- "model_type": "pretrained",
16
- "dutch_coverage": "pretrained"
17
  },
18
  "gpt2-large-dutch": {
19
  "compute_dtype": "bfloat16",
20
  "model_name": "yhavinga/gpt2-large-dutch",
21
  "num_parameters": 774030080,
22
- "quantization": "8-bit",
23
- "model_type": "pretrained",
24
- "dutch_coverage": "pretrained"
25
  },
26
  "gpt2-medium-dutch": {
27
  "compute_dtype": "bfloat16",
28
  "model_name": "yhavinga/gpt2-medium-dutch",
29
  "num_parameters": 354823168,
30
- "quantization": "8-bit",
31
- "model_type": "pretrained",
32
- "dutch_coverage": "pretrained"
33
  },
34
  "llama-2-13b-chat-dutch": {
35
  "compute_dtype": "bfloat16",
36
  "model_name": "BramVanroy/Llama-2-13b-chat-dutch",
37
  "num_parameters": 13015864320,
38
- "quantization": "8-bit",
39
- "model_type": "instruction-tuned",
40
- "dutch_coverage": "fine-tuned"
41
  },
42
  "llama-2-13b-chat-hf": {
43
  "compute_dtype": "bfloat16",
44
  "model_name": "meta-llama/Llama-2-13b-chat-hf",
45
  "num_parameters": 13015864320,
46
- "quantization": "8-bit",
47
- "model_type": "instruction-tuned",
48
- "dutch_coverage": "none"
49
  },
50
  "llama-2-13b-hf": {
51
  "compute_dtype": "bfloat16",
52
  "model_name": "meta-llama/Llama-2-13b-hf",
53
  "num_parameters": 13015864320,
54
- "quantization": "8-bit",
55
- "model_type": "pretrained",
56
- "dutch_coverage": "none"
57
  },
58
  "llama-2-7b-chat-hf": {
59
  "compute_dtype": "bfloat16",
60
  "model_name": "meta-llama/Llama-2-7b-chat-hf",
61
  "num_parameters": 6738415616,
62
- "quantization": "8-bit",
63
- "model_type": "instruction-tuned",
64
- "dutch_coverage": "none"
65
  },
66
  "llama-2-7b-hf": {
67
  "compute_dtype": "bfloat16",
68
  "model_name": "meta-llama/Llama-2-7b-hf",
69
  "num_parameters": 6738415616,
70
- "quantization": "8-bit",
71
- "model_type": "pretrained",
72
- "dutch_coverage": "none"
73
  },
74
  "llama2-13b-ft-mc4_nl_cleaned_tiny": {
75
  "compute_dtype": "bfloat16",
76
  "model_name": "BramVanroy/llama2-13b-ft-mc4_nl_cleaned_tiny",
77
  "num_parameters": 13015864320,
78
- "quantization": "8-bit",
79
- "model_type": "fine-tuned",
80
- "dutch_coverage": "fine-tuned"
81
  },
82
  "mistral-7b-v0.1": {
83
  "compute_dtype": "bfloat16",
84
  "model_name": "mistralai/Mistral-7B-v0.1",
85
  "num_parameters": 7241732096,
86
- "quantization": "8-bit",
87
- "model_type": "pretrained",
88
- "dutch_coverage": "none"
 
 
 
 
89
  },
90
  "neural-chat-7b-v3-1": {
91
  "compute_dtype": "bfloat16",
92
  "model_name": "Intel/neural-chat-7b-v3-1",
93
  "num_parameters": 7241732096,
94
- "quantization": "8-bit",
95
- "model_type": "RL-tuned",
96
- "dutch_coverage": "none"
97
  },
98
  "orca-2-13b": {
99
  "compute_dtype": "bfloat16",
100
  "model_name": "microsoft/Orca-2-13b",
101
  "num_parameters": 13015895040,
102
- "quantization": "8-bit",
103
- "model_type": "fine-tuned",
104
- "dutch_coverage": "none"
105
  },
106
  "orca-2-7b": {
107
  "compute_dtype": "bfloat16",
108
  "model_name": "microsoft/Orca-2-7b",
109
  "num_parameters": 6738440192,
110
- "quantization": "8-bit",
111
- "model_type": "fine-tuned",
112
- "dutch_coverage": "none"
113
  },
114
  "zephyr-7b-beta": {
115
  "compute_dtype": "bfloat16",
116
  "model_name": "HuggingFaceH4/zephyr-7b-beta",
117
  "num_parameters": 7241732096,
118
- "quantization": "8-bit",
119
- "model_type": "RL-tuned",
120
- "dutch_coverage": "none"
121
  }
122
  }
 
3
  "compute_dtype": "bfloat16",
4
  "model_name": "yhavinga/gpt-neo-1.3B-dutch",
5
  "num_parameters": 1315575808,
6
+ "quantization": "8-bit"
 
 
7
  },
8
  "gpt-neo-125m-dutch": {
9
  "compute_dtype": "bfloat16",
10
  "model_name": "yhavinga/gpt-neo-125M-dutch",
11
  "num_parameters": 125198592,
12
+ "quantization": "8-bit"
 
 
13
  },
14
  "gpt2-large-dutch": {
15
  "compute_dtype": "bfloat16",
16
  "model_name": "yhavinga/gpt2-large-dutch",
17
  "num_parameters": 774030080,
18
+ "quantization": "8-bit"
 
 
19
  },
20
  "gpt2-medium-dutch": {
21
  "compute_dtype": "bfloat16",
22
  "model_name": "yhavinga/gpt2-medium-dutch",
23
  "num_parameters": 354823168,
24
+ "quantization": "8-bit"
 
 
25
  },
26
  "llama-2-13b-chat-dutch": {
27
  "compute_dtype": "bfloat16",
28
  "model_name": "BramVanroy/Llama-2-13b-chat-dutch",
29
  "num_parameters": 13015864320,
30
+ "quantization": "8-bit"
 
 
31
  },
32
  "llama-2-13b-chat-hf": {
33
  "compute_dtype": "bfloat16",
34
  "model_name": "meta-llama/Llama-2-13b-chat-hf",
35
  "num_parameters": 13015864320,
36
+ "quantization": "8-bit"
 
 
37
  },
38
  "llama-2-13b-hf": {
39
  "compute_dtype": "bfloat16",
40
  "model_name": "meta-llama/Llama-2-13b-hf",
41
  "num_parameters": 13015864320,
42
+ "quantization": "8-bit"
 
 
43
  },
44
  "llama-2-7b-chat-hf": {
45
  "compute_dtype": "bfloat16",
46
  "model_name": "meta-llama/Llama-2-7b-chat-hf",
47
  "num_parameters": 6738415616,
48
+ "quantization": "8-bit"
 
 
49
  },
50
  "llama-2-7b-hf": {
51
  "compute_dtype": "bfloat16",
52
  "model_name": "meta-llama/Llama-2-7b-hf",
53
  "num_parameters": 6738415616,
54
+ "quantization": "8-bit"
 
 
55
  },
56
  "llama2-13b-ft-mc4_nl_cleaned_tiny": {
57
  "compute_dtype": "bfloat16",
58
  "model_name": "BramVanroy/llama2-13b-ft-mc4_nl_cleaned_tiny",
59
  "num_parameters": 13015864320,
60
+ "quantization": "8-bit"
 
 
61
  },
62
  "mistral-7b-v0.1": {
63
  "compute_dtype": "bfloat16",
64
  "model_name": "mistralai/Mistral-7B-v0.1",
65
  "num_parameters": 7241732096,
66
+ "quantization": "8-bit"
67
+ },
68
+ "mixtral-8x7b-v0.1": {
69
+ "compute_dtype": "auto",
70
+ "model_name": "mistralai/Mixtral-8x7B-v0.1",
71
+ "num_parameters": 46702792704,
72
+ "quantization": null
73
  },
74
  "neural-chat-7b-v3-1": {
75
  "compute_dtype": "bfloat16",
76
  "model_name": "Intel/neural-chat-7b-v3-1",
77
  "num_parameters": 7241732096,
78
+ "quantization": "8-bit"
 
 
79
  },
80
  "orca-2-13b": {
81
  "compute_dtype": "bfloat16",
82
  "model_name": "microsoft/Orca-2-13b",
83
  "num_parameters": 13015895040,
84
+ "quantization": "8-bit"
 
 
85
  },
86
  "orca-2-7b": {
87
  "compute_dtype": "bfloat16",
88
  "model_name": "microsoft/Orca-2-7b",
89
  "num_parameters": 6738440192,
90
+ "quantization": "8-bit"
 
 
91
  },
92
  "zephyr-7b-beta": {
93
  "compute_dtype": "bfloat16",
94
  "model_name": "HuggingFaceH4/zephyr-7b-beta",
95
  "num_parameters": 7241732096,
96
+ "quantization": "8-bit"
 
 
97
  }
98
  }