# @package _group_ common: fp16: true log_format: json log_interval: 200 seed: 1337 tensorboard_logdir: tblog checkpoint: save_dir: ??? save_interval: 4 keep_last_epochs: 4 save_interval_updates: 20000 keep_interval_updates: -1 keep_interval_updates_pattern: 50000 # no_epoch_checkpoints: true distributed_training: ddp_backend: no_c10d distributed_backend: 'nccl' distributed_world_size: 8 nprocs_per_node: 8 find_unused_parameters: true task: _name: denoising data: ??? mask: 0.15 dataset: num_workers: 6 max_tokens: 1400000 skip_invalid_size_inputs_valid_test: true validate_interval: ${checkpoint.save_interval} validate_interval_updates: ${checkpoint.save_interval_updates} required_batch_size_multiple: 1 criterion: _name: sc2t pred_masked_weight: 1.0 pred_nomask_weight: 0.0 loss_weights: [10,] label_smoothing: 0.1 text_weight: 0.1 optimization: max_update: 400000 lr: [0.0005] clip_norm: 10.0 optimizer: _name: adam adam_betas: (0.9,0.98) adam_eps: 1e-06 weight_decay: 0.01 lr_scheduler: _name: polynomial_decay warmup_updates: 32000 model: _name: stbert label_rate: ??? skip_masked: false skip_nomask: false mask_prob: 0.80 extractor_mode: default conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' final_dim: 256 encoder_layers: 6 encoder_attention_heads: 8 decoder_layerdrop: 0.05 dropout_input: 0.1 dropout_features: 0.1 dropout: 0.1 attention_dropout: 0.1 feature_grad_mult: 0.1 untie_final_proj: true activation_dropout: 0.0 use_rel_pos_enc: true add_code_encoder: true add_adaptor: false text_transformer: activation_fn: ${model.activation_fn} dropout: ${model.dropout} attention_dropout: ${model.attention_dropout} activation_dropout: ${model.activation_dropout} adaptive_input: ${model.adaptive_input} max_source_positions: 3000 checkpoint_activations: ${model.checkpoint_activations} no_scale_embedding: false layernorm_embedding: false quant_noise: pq: ${model.quant_noise_pq} encoder: embed_dim: 768 ffn_embed_dim: 3072 layers: 6 attention_heads: 8 normalize_before: false learned_pos: true layerdrop: ${model.encoder_layerdrop} hydra: job: config: override_dirname: kv_sep: '-' item_sep: '__' exclude_keys: - run - task.data - task.label_dir run: dir: ??? sweep: dir: ??? subdir: ${hydra.job.config_name}__${hydra.job.override_dirname}