out/pretrain-core/final

Files changed (5) hide show

out/pretrain-core/final/hyperparameters.yaml +76 -0
out/pretrain-core/final/lit_model.pth +3 -0
out/pretrain-core/final/model_config.yaml +40 -0
out/pretrain-core/final/tokenizer.json +3 -0
out/pretrain-core/final/tokenizer_config.json +194 -0

out/pretrain-core/final/hyperparameters.yaml ADDED Viewed

	@@ -0,0 +1,76 @@

+model_name: tangled-alpha-0.1-core
+model_config:
+  name: tangled-alpha-0.1-core
+  hf_config: {}
+  block_size: 131072
+  n_layer: 32
+  n_embd: 512
+  vocab_size: 32064
+  padding_multiple: 512
+  padded_vocab_size: 32064
+  norm_class_name: RMSNorm
+  norm_eps: 1.0e-05
+  norm_qk: false
+  post_attention_norm: false
+  post_mlp_norm: false
+  parallel_residual: false
+  shared_attention_norm: false
+  n_head: 4
+  head_size: 128
+  n_query_groups: 4
+  attn_bias: false
+  rope_base: 500000
+  rotary_percentage: 1.0
+  rope_condense_ratio: 1
+  rope_adjustments:
+    factor: 32.0
+    low_freq_factor: 1.0
+    high_freq_factor: 4.0
+    original_max_seq_len: 8192
+  intermediate_size: 2688
+  bias: false
+  mlp_class_name: LLaMAMLP
+  gelu_approximate: none
+  n_expert: 0
+  n_expert_per_token: 0
+  scale_embeddings: false
+  lm_head_bias: false
+out_dir: ../out/pretrain-core
+precision: bf16-true
+resume: auto
+data:
+  class_path: litgpt.data.LitData
+  init_args:
+    data_path: ../core-data-0-8192-2000/
+    seed: 42
+    num_workers: 32
+train:
+  save_interval: 100
+  log_interval: 1
+  global_batch_size: 512
+  micro_batch_size: 2
+  lr_warmup_steps: 200
+  max_tokens: 7318364160
+  max_seq_length: 8192
+  tie_embeddings: true
+  max_norm: 1.0
+  min_lr: 1.0e-05
+eval:
+  interval: 50
+  max_iters: 100
+  initial_validation: false
+  final_validation: true
+  evaluate_example: first
+optimizer:
+  class_path: grokadamw.GrokAdamW
+  init_args:
+    lr: 0.0001
+    weight_decay: 0.01
+    betas:
+    - 0.9
+    - 0.999
+devices: auto
+num_nodes: 1
+tokenizer_dir: ..
+logger_name: wandb
+seed: 23

out/pretrain-core/final/lit_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be0538fc89f974444adc06478e996ec1649bc85720e177e8830eddbc286a1a20
+size 1457331426

out/pretrain-core/final/model_config.yaml ADDED Viewed

	@@ -0,0 +1,40 @@

+attention_logit_softcapping: null
+attention_scores_scalar: null
+attn_bias: false
+bias: false
+block_size: 131072
+final_logit_softcapping: null
+gelu_approximate: none
+head_size: 128
+hf_config: {}
+intermediate_size: 2688
+lm_head_bias: false
+mlp_class_name: LLaMAMLP
+n_embd: 512
+n_expert: 0
+n_expert_per_token: 0
+n_head: 4
+n_layer: 32
+n_query_groups: 4
+name: tangled-alpha-0.1-core
+norm_class_name: RMSNorm
+norm_eps: 1.0e-05
+norm_qk: false
+padded_vocab_size: 32064
+padding_multiple: 512
+parallel_residual: false
+post_attention_norm: false
+post_mlp_norm: false
+rope_adjustments:
+  factor: 32.0
+  high_freq_factor: 4.0
+  low_freq_factor: 1.0
+  original_max_seq_len: 8192
+rope_base: 500000
+rope_condense_ratio: 1
+rotary_percentage: 1.0
+scale_embeddings: false
+shared_attention_norm: false
+sliding_window_layer_placing: null
+sliding_window_size: null
+vocab_size: 32064

out/pretrain-core/final/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83b2d408ebeae398f24964d4e7ce0c847cd7ff554519941355641c7d0f68b09b
+size 1845893

out/pretrain-core/final/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,194 @@

+{
+    "add_bos_token": false,
+    "add_eos_token": false,
+    "added_tokens_decoder": {
+        "0": {
+            "content": "<unk>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": false,
+            "single_word": false,
+            "special": true
+        },
+        "1": {
+            "content": "<s>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": false,
+            "single_word": false,
+            "special": true
+        },
+        "2": {
+            "content": "</s>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": false
+        },
+        "32000": {
+            "content": "<|endoftext|>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": false,
+            "single_word": false,
+            "special": true
+        },
+        "32001": {
+            "content": "<|assistant|>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32002": {
+            "content": "<|placeholder1|>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32003": {
+            "content": "<|placeholder2|>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32004": {
+            "content": "<|placeholder3|>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32005": {
+            "content": "<|placeholder4|>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32006": {
+            "content": "<|system|>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32007": {
+            "content": "<|end|>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32008": {
+            "content": "<|placeholder5|>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32009": {
+            "content": "<|placeholder6|>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32010": {
+            "content": "<|user|>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32011": {
+            "content": "<tools>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32012": {
+            "content": "</tools>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32013": {
+            "content": "<tool_call>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32014": {
+            "content": "</tool_call>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32015": {
+            "content": "<tool_response>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32016": {
+            "content": "</tool_response>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32017": {
+            "content": "<think>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        },
+        "32018": {
+            "content": "</think>",
+            "lstrip": false,
+            "normalized": false,
+            "rstrip": true,
+            "single_word": false,
+            "special": true
+        }
+    },
+    "bos_token": "<s>",
+    "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>' + message['content'] + '<|end|>'}}{% elif message['role'] == 'user' %}{{'<|user|>' + message['content'] + '<|end|>'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>' + message['content'] + '<|end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}",
+    "clean_up_tokenization_spaces": false,
+    "eos_token": "<|endoftext|>",
+    "legacy": false,
+    "model_max_length": 131072,
+    "pad_token": "<|endoftext|>",
+    "padding_side": "left",
+    "sp_model_kwargs": {},
+    "tokenizer_class": "LlamaTokenizer",
+    "unk_token": "<unk>",
+    "use_default_system_prompt": false
+}