Delta-Vector commited on
Commit
60cfb46
·
verified ·
1 Parent(s): 528d5a0

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +281 -0
README.md ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - roleplay
4
+ - creative-writing
5
+ - merge
6
+ - mergekit
7
+ - kto
8
+ - rl
9
+ - finetune
10
+ datasets:
11
+ - Delta-Vector/Tauri-IFeval-Dans-Tulu-KTO
12
+ - Delta-Vector/Tauri-Opus-Accepted-GPT-Rejected-Opus-Writing-Prompts
13
+ - Delta-Vector/Tauri-KTO-Instruct-Mix
14
+ - Delta-Vector/Tauri-Purpura-Arkhaios-CC-KTO
15
+ base_model:
16
+ - Delta-Vector/Archaeo-32B
17
+ pipeline_tag: text-generation
18
+ library_name: transformers
19
+ ---
20
+ ```
21
+ __~a~_
22
+ ~~; ~_
23
+ _ ~ ~_ _
24
+ '_\;__._._._._._._] ~_._._._._._.__;/_`
25
+ '(/'/'/'/'|'|'|'| ( )|'|'|'|'\'\'\'\)'
26
+ (/ / / /, | | | |(/ \) | | | ,\ \ \ \)
27
+ (/ / / / / | | | ~(/ \) ~ | | \ \ \ \ \)
28
+ (/ / / / / ~ ~ ~ (/ \) ~ ~ \ \ \ \ \)
29
+ (/ / / / ~ / (||)| ~ \ \ \ \)
30
+ ~ / / ~ M /||\M ~ \ \ ~
31
+ ~ ~ /||\ ~ ~
32
+ //||\\
33
+ //||\\
34
+ //||\\
35
+ '/||\' "Archaeopteryx"
36
+ ```
37
+
38
+ <html lang="en">
39
+ <head>
40
+ <meta charset="UTF-8">
41
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
42
+ <style>
43
+ @import url('https://fonts.googleapis.com/css2?family=VT323&display=swap');
44
+ body {
45
+ background: #0a0017;
46
+ margin: 0;
47
+ padding: 20px;
48
+ font-family: 'VT323', monospace;
49
+ color: #ff00aa;
50
+ text-shadow: 0 0 8px #ff00aa;
51
+ animation: glitch-flicker 0.2s infinite alternate;
52
+ }
53
+ @keyframes glitch-flicker {
54
+ 0% { text-shadow: 0 0 5px #ff00aa, 0 0 15px #ff00aa; }
55
+ 100% { text-shadow: 0 0 8px #ff0066, 0 0 18px #ff0066; }
56
+ }
57
+ .crt-container {
58
+ padding: 10px;
59
+ max-width: 900px;
60
+ margin: auto;
61
+ }
62
+ .crt-case {
63
+ background: linear-gradient(135deg, #130021, #20002c);
64
+ border-radius: 10px;
65
+ padding: 15px;
66
+ box-shadow:
67
+ inset 2px 2px 10px rgba(255,0,170,0.5),
68
+ 2px 2px 5px rgba(255,0,170,0.3),
69
+ 0 0 25px rgba(255,0,170,0.2);
70
+ }
71
+ .crt-screen {
72
+ background: #0c011a;
73
+ padding: 20px;
74
+ border-radius: 10px;
75
+ box-shadow:
76
+ inset 0 0 25px rgba(255,0,170,0.3),
77
+ 0 0 15px rgba(255,0,170,0.7);
78
+ filter: contrast(1.2) brightness(1.2);
79
+ text-shadow: 0px 0px 5px #ff00aa;
80
+ animation: glow-pulse 3s infinite alternate;
81
+ }
82
+ @keyframes glow-pulse {
83
+ 0% { box-shadow: inset 0 0 20px rgba(255,0,170,0.3), 0 0 15px rgba(255,0,170,0.3); }
84
+ 100% { box-shadow: inset 0 0 30px rgba(255,0,170,0.5), 0 0 25px rgba(255,0,170,0.5); }
85
+ }
86
+ h2 {
87
+ color: #ff33cc;
88
+ text-align: center;
89
+ font-size: 28px;
90
+ text-shadow:
91
+ 0 0 8px #ff33cc,
92
+ 0 0 18px #ff0044;
93
+ }
94
+ pre {
95
+ background: rgba(255,0,170,0.1);
96
+ padding: 10px;
97
+ border-radius: 10px;
98
+ color: #ff66cc;
99
+ font-size: 14px;
100
+ box-shadow: inset 0 0 10px rgba(255,0,170,0.5);
101
+ }
102
+ .glitch {
103
+ animation: text-glitch 0.5s infinite alternate;
104
+ }
105
+ @keyframes text-glitch {
106
+ 0% { transform: translateX(-2px); text-shadow: 0 0 5px #ff0066, 0 0 10px #ff33cc; }
107
+ 100% { transform: translateX(2px); text-shadow: 0 0 8px #ff00aa, 0 0 20px #ff0099; }
108
+ }
109
+ .neon-link {
110
+ color: #ff66cc;
111
+ text-decoration: none;
112
+ transition: text-shadow 0.3s ease;
113
+ }
114
+ .neon-link:hover {
115
+ text-shadow: 0px 0px 15px #ff66cc, 0 0 25px rgba(255,0,170,0.5);
116
+ }
117
+ .ascii-art {
118
+ text-align: center;
119
+ font-size: 12px;
120
+ color: #ff33cc;
121
+ text-shadow: 0px 0px 5px #ff00ff;
122
+ margin-bottom: 20px;
123
+ }
124
+ .quantso-container {
125
+ display: flex;
126
+ justify-content: center;
127
+ gap: 20px;
128
+ margin-top: 20px;
129
+ }
130
+ .quantso-box {
131
+ background: rgba(255,0,170,0.1);
132
+ padding: 15px;
133
+ border-radius: 10px;
134
+ text-align: center;
135
+ box-shadow: inset 0 0 10px rgba(255,0,170,0.5);
136
+ flex: 1;
137
+ max-width: 150px;
138
+ }
139
+ </style>
140
+ </head>
141
+ <body>
142
+ <div class="crt-container">
143
+ <div class="crt-case">
144
+ <div class="crt-screen">
145
+ <p>A series of Merges made for Roleplaying & Creative Writing, This model is a RL train ontop of Archaeo. A merge using Hamanasu-Magnum & Kunou, Trained with Axolotl on 8xH200s.</p>
146
+ <h3>ChatML formatting</h3>
147
+ <pre>
148
+ """<|im_start|>system
149
+ system prompt<|im_end|>
150
+ <|im_start|>user
151
+ Hi there!<|im_end|>
152
+ <|im_start|>assistant
153
+ Nice to meet you!<|im_end|>
154
+ <|im_start|>user
155
+ Can I ask a question?<|im_end|>
156
+ <|im_start|>assistant
157
+ """
158
+ </pre>
159
+ <h3>Axolotl Configuration</h3>
160
+ <pre>
161
+ base_model: ./model
162
+
163
+ plugins:
164
+ - axolotl.integrations.liger.LigerPlugin
165
+ - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
166
+ liger_rope: true
167
+ liger_rms_norm: true
168
+ liger_layer_norm: true
169
+ liger_glu_activation: true
170
+ liger_fused_linear_cross_entropy: true
171
+ cut_cross_entropy: false
172
+
173
+ load_in_8bit: false
174
+ load_in_4bit: false
175
+ strict: false
176
+
177
+ rl: kto
178
+ kto_undesirable_weight: 1.0
179
+
180
+ datasets:
181
+ - path: Delta-Vector/Tauri-Opus-Accepted-GPT-Rejected-Opus-Writing-Prompts
182
+ split: train
183
+ type: chatml.argilla
184
+ - path: Delta-Vector/Tauri-IFeval-Dans-Tulu-KTO
185
+ split: train
186
+ type: chatml.argilla
187
+ - path: Delta-Vector/Tauri-KTO-Instruct-Mix
188
+ split: train
189
+ type: chatml.argilla
190
+ - path: Delta-Vector/Tauri-Purpura-Arkhaios-CC-KTO
191
+ split: train
192
+ type: chatml.argilla
193
+ dataset_prepared_path: last_run_prepared
194
+ val_set_size: 0.0
195
+ output_dir: ./archaeo-kto-v2
196
+ remove_unused_columns: false
197
+
198
+ #@lora_mlp_kernel: true
199
+ #lora_qkv_kernel: true
200
+ #lora_o_kernel: true
201
+
202
+
203
+ adapter: lora
204
+ lora_model_dir:
205
+
206
+ sequence_len: 8192
207
+ pad_to_sequence_len: false
208
+
209
+ lora_r: 64
210
+ lora_alpha: 32
211
+ lora_dropout: 0.0
212
+ lora_target_linear: true
213
+ lora_fan_in_fan_out:
214
+ lora_target_modules:
215
+ - gate_proj
216
+ - down_proj
217
+ - up_proj
218
+ - q_proj
219
+ - v_proj
220
+ - k_proj
221
+ - o_proj
222
+
223
+ wandb_project: Francois-V2
224
+ wandb_entity:
225
+ wandb_watch:
226
+ wandb_name: Archaeo-32b-KTO
227
+ wandb_log_model:
228
+
229
+ gradient_accumulation_steps: 4
230
+ micro_batch_size: 4
231
+ num_epochs: 1
232
+ optimizer: paged_ademamix_8bit
233
+ lr_scheduler: constant_with_warmup
234
+ learning_rate: 5e-6
235
+ max_grad_norm: 0.001
236
+
237
+ train_on_inputs: false
238
+ group_by_length: false
239
+ bf16: auto
240
+ fp16:
241
+ tf32: true
242
+
243
+ gradient_checkpointing: true
244
+ gradient_checkpointing_kwargs:
245
+ use_reentrant: true
246
+ early_stopping_patience:
247
+ resume_from_checkpoint:
248
+ local_rank:
249
+ logging_steps: 1
250
+ xformers_attention:
251
+ flash_attention: true
252
+
253
+ warmup_steps: 100
254
+ evals_per_epoch: 4
255
+ eval_table_size:
256
+ eval_max_new_tokens: 128
257
+ saves_per_epoch: 1
258
+ debug:
259
+ deepspeed: ./deepspeed_configs/zero3_bf16.json
260
+ weight_decay: 0.0025
261
+ fsdp:
262
+ fsdp_config:
263
+ </pre>
264
+ <h3>Quants:</h3>
265
+ <div class="quantso-container">
266
+ <div class="quantso-box">
267
+ <strong>GGUF</strong><br>
268
+ <a class="neon-link" href="#">https://huggingface.co/mradermacher/Axo-Merge-Archaeo-V2-Lora-GGUF</a>
269
+ </div>
270
+ <div class="quantso-box">
271
+ <strong>EXL2</strong><br>
272
+ <a class="neon-link" href="#">Delta-Vector/Archaeo-32B-KTO-exl2</a>
273
+ </div>
274
+ </div>
275
+ <h3>Credits</h3>
276
+ <p>Thank you to: Kubernetes-bad, LucyKnada, Kalomaze, Alicat, Intervitens, Samantha Twinkman, Tav, Trappu & The rest of Anthracite</p>
277
+ </div>
278
+ </div>
279
+ </div>
280
+ </body>
281
+ </html>