################## # Trainer settings ################## MODEL MeetingNet_Transformer TASK HMNet CRITERION MLECriterion SEED 1033 RESUME MAX_NUM_EPOCHS 20 SAVE_PER_UPDATE_NUM 400 UPDATES_PER_EPOCH 2000 # The actuall learning rate will be multiplied with the number of GPUs OPTIMIZER RAdam NO_AUTO_LR_SCALING START_LEARNING_RATE 1e-3 LR_SCHEDULER LnrWrmpInvSqRtDcyScheduler WARMUP_STEPS 16000 WARMUP_INIT_LR 1e-4 WARMUP_END_LR 1e-3 # The actuall start learning rate equals START_LEARNING_RATE * GRADIENT_ACCUMULATE_STEP # Model will be updated after every MINI_BATCH * GRADIENT_ACCUMULATE_STEP samples GRADIENT_ACCUMULATE_STEP 20 GRAD_CLIPPING 2 ################## # Task settings ################## # This is the relative path to the directory where this conf file locates # not a good idea to put data with code # Are we able to provide a list of dir paths in TRAIN_FILE? USE_REL_DATA_PATH TRAIN_FILE ../ExampleRawData/meeting_summarization/AMI_proprec/train_ami.json DEV_FILE ../ExampleRawData/meeting_summarization/AMI_proprec/valid_ami.json TEST_FILE ../ExampleRawData/meeting_summarization/AMI_proprec/test_ami.json ROLE_DICT_FILE ../ExampleRawData/meeting_summarization/role_dict_ext.json MINI_BATCH 1 MAX_PADDING_RATIO 1 BATCH_READ_AHEAD 10 DOC_SHUFFLE_BUF_SIZE 10 SAMPLE_SHUFFLE_BUFFER_SIZE 10 BATCH_SHUFFLE_BUFFER_SIZE 10 MAX_TRANSCRIPT_WORD 8300 MAX_SENT_LEN 30 MAX_SENT_NUM 300 ################## # Model settings ################## DROPOUT 0.1 VOCAB_DIM 512 ROLE_SIZE 32 ROLE_DIM 16 POS_DIM 16 ENT_DIM 16 USE_ROLE USE_POSENT USE_BOS_TOKEN USE_EOS_TOKEN TRANSFORMER_EMBED_DROPOUT 0.1 TRANSFORMER_RESIDUAL_DROPOUT 0.1 TRANSFORMER_ATTENTION_DROPOUT 0.1 TRANSFORMER_LAYER 6 TRANSFORMER_HEAD 8 TRANSFORMER_POS_DISCOUNT 80 PRE_TOKENIZER TransfoXLTokenizer PRE_TOKENIZER_PATH ../ExampleInitModel/transfo-xl-wt103 PYLEARN_MODEL ../ExampleInitModel/HMNet-pretrained ################## # Tokenizer settings ################## EXTRA_IDS 1000 ################## # Decoding settings ################## BEAM_WIDTH 6 MAX_GEN_LENGTH 512 MIN_GEN_LENGTH 320 EVAL_TOKENIZED EVAL_LOWERCASE NO_REPEAT_NGRAM_SIZE 3