multicite-qa-qasper / config.json
anlausch's picture
Update config.json
3771176
{
"dataset_reader": {
"type": "qasper",
"for_training": true,
"max_document_length": 15360,
"transformer_model_name": ".../qasper-ckpt/qasper_trained_led_base_hf_serialized/"
},
"model": {
"type": "qasper_baseline",
"attention_dropout": 0.1,
"attention_window_size": 1536,
"gradient_checkpointing": true,
"transformer_model_name": ".../qasper-ckpt/qasper_trained_led_base_hf_serialized/",
"use_evidence_scaffold": true
},
"train_data_path": ".../ours_led_qa_binary/ours_qa_train.json",
"validation_data_path": ".../ours_led_qa_binary/ours_qa_dev.json",
"trainer": {
"callbacks": [
{
"type": "tensorboard"
}
],
"cuda_device": 0,
"enable_default_callbacks": false,
"grad_clipping": 1,
"learning_rate_scheduler": {
"type": "slanted_triangular",
"cut_frac": 0.1,
"num_epochs": 5,
"num_steps_per_epoch": 255
},
"num_epochs": 5,
"num_gradient_accumulation_steps": 16,
"optimizer": {
"type": "adam",
"lr": 3e-05
},
"patience": 2,
"use_amp": true,
"validation_metric": "+answer_f1"
},
"vocabulary": {
"type": "empty"
},
"data_loader": {
"batch_size": 1
},
"pytorch_seed": 15371,
"validation_dataset_reader": {
"type": "qasper",
"for_training": false,
"max_document_length": 15360,
"transformer_model_name": ".../qasper-ckpt/qasper_trained_led_base_hf_serialized/"
}
}