| config: conf/train_rawnet3.yaml | |
| print_config: false | |
| log_level: INFO | |
| drop_last_iter: true | |
| dry_run: false | |
| iterator_type: category | |
| valid_iterator_type: sequence | |
| output_dir: exp/spk_train_rawnet3_raw_sp | |
| ngpu: 1 | |
| seed: 0 | |
| num_workers: 6 | |
| num_att_plot: 0 | |
| dist_backend: nccl | |
| dist_init_method: env:// | |
| dist_world_size: 4 | |
| dist_rank: 0 | |
| local_rank: 0 | |
| dist_master_addr: localhost | |
| dist_master_port: 40409 | |
| dist_launcher: null | |
| multiprocessing_distributed: true | |
| unused_parameters: false | |
| sharded_ddp: false | |
| cudnn_enabled: true | |
| cudnn_benchmark: true | |
| cudnn_deterministic: false | |
| collect_stats: false | |
| write_collected_feats: false | |
| max_epoch: 40 | |
| patience: null | |
| val_scheduler_criterion: | |
| - valid | |
| - loss | |
| early_stopping_criterion: | |
| - valid | |
| - loss | |
| - min | |
| best_model_criterion: | |
| - - valid | |
| - eer | |
| - min | |
| keep_nbest_models: 3 | |
| nbest_averaging_interval: 0 | |
| grad_clip: 9999 | |
| grad_clip_type: 2.0 | |
| grad_noise: false | |
| accum_grad: 1 | |
| no_forward_run: false | |
| resume: true | |
| train_dtype: float32 | |
| use_amp: true | |
| log_interval: 100 | |
| use_matplotlib: true | |
| use_tensorboard: true | |
| create_graph_in_tensorboard: false | |
| use_wandb: false | |
| wandb_project: null | |
| wandb_id: null | |
| wandb_entity: null | |
| wandb_name: null | |
| wandb_model_log_interval: -1 | |
| detect_anomaly: false | |
| use_lora: false | |
| save_lora_only: true | |
| lora_conf: {} | |
| pretrain_path: null | |
| init_param: [] | |
| ignore_init_mismatch: false | |
| freeze_param: [] | |
| num_iters_per_epoch: null | |
| batch_size: 512 | |
| valid_batch_size: 40 | |
| batch_bins: 1000000 | |
| valid_batch_bins: null | |
| train_shape_file: | |
| - exp/spk_stats_16k_sp/train/speech_shape | |
| valid_shape_file: | |
| - exp/spk_stats_16k_sp/valid/speech_shape | |
| batch_type: folded | |
| valid_batch_type: null | |
| fold_length: | |
| - 120000 | |
| sort_in_batch: descending | |
| shuffle_within_batch: false | |
| sort_batch: descending | |
| multiple_iterator: false | |
| chunk_length: 500 | |
| chunk_shift_ratio: 0.5 | |
| num_cache_chunks: 1024 | |
| chunk_excluded_key_prefixes: [] | |
| chunk_default_fs: null | |
| train_data_path_and_name_and_type: | |
| - - dump/raw/voxblink_full_sp/wav.scp | |
| - speech | |
| - sound | |
| - - dump/raw/voxblink_full_sp/utt2spk | |
| - spk_labels | |
| - text | |
| valid_data_path_and_name_and_type: | |
| - - dump/raw/voxceleb1_test/trial.scp | |
| - speech | |
| - sound | |
| - - dump/raw/voxceleb1_test/trial2.scp | |
| - speech2 | |
| - sound | |
| - - dump/raw/voxceleb1_test/trial_label | |
| - spk_labels | |
| - text | |
| allow_variable_data_keys: false | |
| max_cache_size: 0.0 | |
| max_cache_fd: 32 | |
| allow_multi_rates: false | |
| valid_max_cache_size: null | |
| exclude_weight_decay: false | |
| exclude_weight_decay_conf: {} | |
| optim: adam | |
| optim_conf: | |
| lr: 0.001 | |
| weight_decay: 5.0e-05 | |
| amsgrad: false | |
| scheduler: cosineannealingwarmuprestarts | |
| scheduler_conf: | |
| first_cycle_steps: 84560 | |
| cycle_mult: 1.0 | |
| max_lr: 0.001 | |
| min_lr: 5.0e-06 | |
| warmup_steps: 1000 | |
| gamma: 0.75 | |
| init: null | |
| use_preprocessor: true | |
| input_size: null | |
| target_duration: 3.0 | |
| spk2utt: dump/raw/voxblink_full_sp/spk2utt | |
| spk_num: 114201 | |
| sample_rate: 16000 | |
| num_eval: 10 | |
| rir_scp: '' | |
| model_conf: | |
| extract_feats_in_collect_stats: false | |
| frontend: asteroid_frontend | |
| frontend_conf: | |
| sinc_stride: 16 | |
| sinc_kernel_size: 251 | |
| sinc_filters: 256 | |
| preemph_coef: 0.97 | |
| log_term: 1.0e-06 | |
| specaug: null | |
| specaug_conf: {} | |
| normalize: null | |
| normalize_conf: {} | |
| encoder: rawnet3 | |
| encoder_conf: | |
| model_scale: 8 | |
| ndim: 1024 | |
| output_size: 1536 | |
| pooling: chn_attn_stat | |
| pooling_conf: {} | |
| projector: rawnet3 | |
| projector_conf: | |
| output_size: 192 | |
| preprocessor: spk | |
| preprocessor_conf: | |
| target_duration: 3.0 | |
| sample_rate: 16000 | |
| num_eval: 5 | |
| noise_apply_prob: 0.5 | |
| noise_info: | |
| - - 1.0 | |
| - dump/raw/musan_speech.scp | |
| - - 4 | |
| - 7 | |
| - - 13 | |
| - 20 | |
| - - 1.0 | |
| - dump/raw/musan_noise.scp | |
| - - 1 | |
| - 1 | |
| - - 0 | |
| - 15 | |
| - - 1.0 | |
| - dump/raw/musan_music.scp | |
| - - 1 | |
| - 1 | |
| - - 5 | |
| - 15 | |
| rir_apply_prob: 0.5 | |
| rir_scp: dump/raw/rirs.scp | |
| loss: aamsoftmax_sc_topk | |
| loss_conf: | |
| margin: 0.3 | |
| scale: 30 | |
| K: 3 | |
| mp: 0.06 | |
| k_top: 5 | |
| required: | |
| - output_dir | |
| version: '202310' | |
| distributed: true | |