George-API commited on
Commit
18257ed
·
verified ·
1 Parent(s): ff20385

Upload transformers_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. transformers_config.json +4 -3
transformers_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "model_config": {
3
- "model_name_or_path": "unsloth/DeepSeek-R1-Distill-Qwen-14B-bnb-4bit",
4
  "use_cache": false,
5
  "rope_scaling": {
6
  "type": "dynamic",
@@ -9,7 +9,7 @@
9
  },
10
  "training_config": {
11
  "num_train_epochs": 3,
12
- "per_device_train_batch_size": 4,
13
  "gradient_accumulation_steps": 4,
14
  "learning_rate": 2e-5,
15
  "lr_scheduler_type": "cosine",
@@ -38,7 +38,8 @@
38
  "attn_implementation": "eager",
39
  "use_flash_attention": false,
40
  "memory_optimization": {
41
- "expandable_segments": true
 
42
  }
43
  },
44
  "quantization_config": {
 
1
  {
2
  "model_config": {
3
+ "model_name_or_path": "unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit",
4
  "use_cache": false,
5
  "rope_scaling": {
6
  "type": "dynamic",
 
9
  },
10
  "training_config": {
11
  "num_train_epochs": 3,
12
+ "per_device_train_batch_size": 6,
13
  "gradient_accumulation_steps": 4,
14
  "learning_rate": 2e-5,
15
  "lr_scheduler_type": "cosine",
 
38
  "attn_implementation": "eager",
39
  "use_flash_attention": false,
40
  "memory_optimization": {
41
+ "expandable_segments": true,
42
+ "max_memory_fraction": 0.95
43
  }
44
  },
45
  "quantization_config": {