Vinitha2004's picture
Upload distilled Qwen2.5-Coder-3B model with knowledge distillation
eee74b7 verified
{
"epoch": 2,
"checkpoint_type": "epoch",
"is_best": false,
"validation_metrics": {
"total": 1.6648264206669057,
"distill": 0.46633759959448906,
"super": 24.436113647189778
},
"training_metrics": {
"total": 1.6754815220601948,
"distill": 0.48241927724853495,
"super": 24.343663812838038
},
"hyperparameters": {
"temperature": 2.0,
"alpha": 0.95,
"learning_rate": 0.001,
"batch_size": 1,
"gradient_accumulation_steps": 16
},
"best_val_loss_so_far": 1.6648216817606247,
"best_epoch_so_far": 1,
"patience_counter": 0,
"save_time": "2025-06-13 18:30:28"
}