Spaces:

George-API
/

qwen4bit

Sleeping

George-API commited on Mar 11

Commit

0a1769d

verified ·

1 Parent(s): d625213

Upload transformers_config.json with huggingface_hub

Files changed (1) hide show

transformers_config.json CHANGED Viewed

@@ -70,5 +70,50 @@
     "pre_tokenized": true,
     "input_ids_field": "input_ids",
     "skip_tokenization": true
   }
 }

     "pre_tokenized": true,
     "input_ids_field": "input_ids",
     "skip_tokenization": true
+  },
+  "deepspeed_config": {
+    "zero_optimization": {
+      "stage": 2,
+      "offload_optimizer": {
+        "device": "cpu",
+        "pin_memory": true
+      },
+      "contiguous_gradients": true,
+      "overlap_comm": true,
+      "reduce_scatter": true,
+      "reduce_bucket_size": 5e8,
+      "allgather_bucket_size": 5e8
+    },
+    "gradient_accumulation_steps": 4,
+    "gradient_clipping": 0.3,
+    "fp16": {
+      "enabled": true,
+      "loss_scale": 0,
+      "loss_scale_window": 1000,
+      "initial_scale_power": 16,
+      "hysteresis": 2,
+      "min_loss_scale": 1
+    },
+    "optimizer": {
+      "type": "AdamW",
+      "params": {
+        "lr": 2e-5,
+        "betas": [0.9, 0.999],
+        "eps": 1e-8,
+        "weight_decay": 0.01
+      }
+    },
+    "activation_checkpointing": {
+      "partition_activations": true,
+      "cpu_checkpointing": true,
+      "contiguous_memory_optimization": true,
+      "number_checkpoints": null,
+      "synchronize_checkpoint_boundary": false,
+      "profile": false
+    },
+    "steps_per_print": 10,
+    "train_batch_size": "auto",
+    "train_micro_batch_size_per_gpu": "auto",
+    "wall_clock_breakdown": false
   }
 }