|
{ |
|
"dataset_reader": { |
|
"type": "qasper", |
|
"for_training": true, |
|
"max_document_length": 15360, |
|
"transformer_model_name": ".../qasper-ckpt/qasper_trained_led_base_hf_serialized/" |
|
}, |
|
"model": { |
|
"type": "qasper_baseline", |
|
"attention_dropout": 0.1, |
|
"attention_window_size": 1536, |
|
"gradient_checkpointing": true, |
|
"transformer_model_name": ".../qasper-ckpt/qasper_trained_led_base_hf_serialized/", |
|
"use_evidence_scaffold": true |
|
}, |
|
"train_data_path": ".../ours_led_qa_binary/ours_qa_train.json", |
|
"validation_data_path": ".../ours_led_qa_binary/ours_qa_dev.json", |
|
"trainer": { |
|
"callbacks": [ |
|
{ |
|
"type": "tensorboard" |
|
} |
|
], |
|
"cuda_device": 0, |
|
"enable_default_callbacks": false, |
|
"grad_clipping": 1, |
|
"learning_rate_scheduler": { |
|
"type": "slanted_triangular", |
|
"cut_frac": 0.1, |
|
"num_epochs": 5, |
|
"num_steps_per_epoch": 255 |
|
}, |
|
"num_epochs": 5, |
|
"num_gradient_accumulation_steps": 16, |
|
"optimizer": { |
|
"type": "adam", |
|
"lr": 3e-05 |
|
}, |
|
"patience": 2, |
|
"use_amp": true, |
|
"validation_metric": "+answer_f1" |
|
}, |
|
"vocabulary": { |
|
"type": "empty" |
|
}, |
|
"data_loader": { |
|
"batch_size": 1 |
|
}, |
|
"pytorch_seed": 15371, |
|
"validation_dataset_reader": { |
|
"type": "qasper", |
|
"for_training": false, |
|
"max_document_length": 15360, |
|
"transformer_model_name": ".../qasper-ckpt/qasper_trained_led_base_hf_serialized/" |
|
} |
|
} |