wadhma commited on
Commit
4ffb6b9
·
verified ·
1 Parent(s): 6fb411a

Upload sft model from experiment sft_test_with_validation_tracking

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. model.safetensors +1 -1
README.md CHANGED
@@ -10,7 +10,7 @@ This model was created as part of the **sft_test_with_validation_tracking** expe
10
 
11
  ## Training Configuration
12
 
13
- {"model_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct", "trust_remote_code": true, "stage": "sft", "do_train": true, "finetuning_type": "full", "deepspeed": "/datastor1/mwadhwa/code/skill-factory/thirdparty/LLaMA-Factory/examples/deepspeed/ds_z3_config.json", "dataset": "TAUR_dev__D_SFTv1_C_cd3arg_Qwen2_5_1_5B_MockSearchV2_7_24_25", "template": "qwen", "cutoff_len": 16384, "max_samples": 1000, "overwrite_cache": true, "preprocessing_num_workers": 16, "output_dir": "/datastor1/mwadhwa/tmp/sf/llamafactory/checkpoints", "logging_steps": 10, "save_steps": 100000, "plot_loss": true, "overwrite_output_dir": true, "per_device_train_batch_size": 1, "gradient_accumulation_steps": 1, "learning_rate": 1e-06, "num_train_epochs": 2, "lr_scheduler_type": "cosine", "warmup_ratio": 0.05, "weight_decay": 0.0001, "adam_beta1": 0.9, "adam_beta2": 0.95, "bf16": true, "ddp_timeout": 180000000, "gradient_checkpointing": true, "save_only_model": true, "enable_masked_ranges": false, "val_size": 0.1, "do_eval": true, "eval_strategy": "steps", "eval_steps": 20, "per_device_eval_batch_size": 1, "report_to": "wandb", "compute_custom_metrics": true}
14
 
15
  ## Experiment Tracking
16
 
 
10
 
11
  ## Training Configuration
12
 
13
+ {"model_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct", "trust_remote_code": true, "stage": "sft", "do_train": true, "finetuning_type": "full", "deepspeed": "/datastor1/mwadhwa/code/skill-factory/thirdparty/LLaMA-Factory/examples/deepspeed/ds_z3_config.json", "dataset": "TAUR_dev__D_SFTv1_C_cd3arg_Qwen2_5_1_5B_MockSearchV2_7_24_25", "template": "qwen", "cutoff_len": 16384, "max_samples": 4000, "overwrite_cache": true, "preprocessing_num_workers": 16, "output_dir": "/datastor1/mwadhwa/tmp/sf/llamafactory/checkpoints", "logging_steps": 10, "save_steps": 100000, "plot_loss": true, "overwrite_output_dir": true, "per_device_train_batch_size": 1, "gradient_accumulation_steps": 1, "learning_rate": 1e-06, "num_train_epochs": 2, "lr_scheduler_type": "cosine", "warmup_ratio": 0.05, "weight_decay": 0.0001, "adam_beta1": 0.9, "adam_beta2": 0.95, "bf16": true, "ddp_timeout": 180000000, "gradient_checkpointing": true, "save_only_model": true, "enable_masked_ranges": false, "val_size": 0.1, "do_eval": true, "eval_strategy": "steps", "eval_steps": 50, "per_device_eval_batch_size": 1, "report_to": "wandb", "compute_custom_metrics": true}
14
 
15
  ## Experiment Tracking
16
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3afe5819266640cf0c634de0ce3aba19e624899d729e605c2812270f6b7f8d3
3
  size 3087467144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49b654dcb2c4eb1e405f69c6322c8f85eddac2f694e30e29f4c680455a1e32a4
3
  size 3087467144