Spaces:

George-API
/

qwen4bit

Sleeping

George-API commited on Mar 15

Commit

18257ed

verified ·

1 Parent(s): ff20385

Upload transformers_config.json with huggingface_hub

Files changed (1) hide show

transformers_config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "model_config": {
-    "model_name_or_path": "unsloth/DeepSeek-R1-Distill-Qwen-14B-bnb-4bit",
     "use_cache": false,
     "rope_scaling": {
       "type": "dynamic",
@@ -9,7 +9,7 @@
   },
   "training_config": {
     "num_train_epochs": 3,
-    "per_device_train_batch_size": 4,
     "gradient_accumulation_steps": 4,
     "learning_rate": 2e-5,
     "lr_scheduler_type": "cosine",
@@ -38,7 +38,8 @@
     "attn_implementation": "eager",
     "use_flash_attention": false,
     "memory_optimization": {
-      "expandable_segments": true
     }
   },
   "quantization_config": {

 {
   "model_config": {
+    "model_name_or_path": "unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit",
     "use_cache": false,
     "rope_scaling": {
       "type": "dynamic",
   },
   "training_config": {
     "num_train_epochs": 3,
+    "per_device_train_batch_size": 6,
     "gradient_accumulation_steps": 4,
     "learning_rate": 2e-5,
     "lr_scheduler_type": "cosine",
     "attn_implementation": "eager",
     "use_flash_attention": false,
     "memory_optimization": {
+      "expandable_segments": true,
+      "max_memory_fraction": 0.95
     }
   },
   "quantization_config": {