Vinitha2004's picture
Upload distilled Qwen2.5-Coder-3B model with knowledge distillation
eee74b7 verified
{
"epoch": 1,
"checkpoint_type": "epoch",
"is_best": false,
"validation_metrics": {
"total": 1.6648216817606247,
"distill": 0.4663306248399861,
"super": 24.436151396669747
},
"training_metrics": {
"total": 1.6754714963588506,
"distill": 0.48240755922164047,
"super": 24.34368594805605
},
"hyperparameters": {
"temperature": 2.0,
"alpha": 0.95,
"learning_rate": 0.001,
"batch_size": 1,
"gradient_accumulation_steps": 16
},
"best_val_loss_so_far": Infinity,
"best_epoch_so_far": null,
"patience_counter": 0,
"save_time": "2025-06-13 17:57:10"
}