Spaces:
Runtime error
Runtime error
# @package _group_ | |
common: | |
fp16: true | |
log_format: json | |
log_interval: 200 | |
seed: 1337 | |
tensorboard_logdir: tblog | |
checkpoint: | |
save_dir: ??? | |
save_interval: 4 | |
keep_last_epochs: 4 | |
save_interval_updates: 20000 | |
keep_interval_updates: -1 | |
keep_interval_updates_pattern: 50000 | |
# no_epoch_checkpoints: true | |
distributed_training: | |
ddp_backend: no_c10d | |
distributed_backend: 'nccl' | |
distributed_world_size: 8 | |
nprocs_per_node: 8 | |
find_unused_parameters: true | |
task: | |
_name: denoising | |
data: ??? | |
mask: 0.15 | |
dataset: | |
num_workers: 6 | |
max_tokens: 1400000 | |
skip_invalid_size_inputs_valid_test: true | |
validate_interval: ${checkpoint.save_interval} | |
validate_interval_updates: ${checkpoint.save_interval_updates} | |
required_batch_size_multiple: 1 | |
criterion: | |
_name: sc2t | |
pred_masked_weight: 1.0 | |
pred_nomask_weight: 0.0 | |
loss_weights: [10,] | |
label_smoothing: 0.1 | |
text_weight: 0.1 | |
optimization: | |
max_update: 400000 | |
lr: [0.0005] | |
clip_norm: 10.0 | |
optimizer: | |
_name: adam | |
adam_betas: (0.9,0.98) | |
adam_eps: 1e-06 | |
weight_decay: 0.01 | |
lr_scheduler: | |
_name: polynomial_decay | |
warmup_updates: 32000 | |
model: | |
_name: stbert | |
label_rate: ??? | |
skip_masked: false | |
skip_nomask: false | |
mask_prob: 0.80 | |
extractor_mode: default | |
conv_feature_layers: '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2' | |
final_dim: 256 | |
encoder_layers: 6 | |
encoder_attention_heads: 8 | |
decoder_layerdrop: 0.05 | |
dropout_input: 0.1 | |
dropout_features: 0.1 | |
dropout: 0.1 | |
attention_dropout: 0.1 | |
feature_grad_mult: 0.1 | |
untie_final_proj: true | |
activation_dropout: 0.0 | |
use_rel_pos_enc: true | |
add_code_encoder: true | |
add_adaptor: false | |
text_transformer: | |
activation_fn: ${model.activation_fn} | |
dropout: ${model.dropout} | |
attention_dropout: ${model.attention_dropout} | |
activation_dropout: ${model.activation_dropout} | |
adaptive_input: ${model.adaptive_input} | |
max_source_positions: 3000 | |
checkpoint_activations: ${model.checkpoint_activations} | |
no_scale_embedding: false | |
layernorm_embedding: false | |
quant_noise: | |
pq: ${model.quant_noise_pq} | |
encoder: | |
embed_dim: 768 | |
ffn_embed_dim: 3072 | |
layers: 6 | |
attention_heads: 8 | |
normalize_before: false | |
learned_pos: true | |
layerdrop: ${model.encoder_layerdrop} | |
hydra: | |
job: | |
config: | |
override_dirname: | |
kv_sep: '-' | |
item_sep: '__' | |
exclude_keys: | |
- run | |
- task.data | |
- task.label_dir | |
run: | |
dir: ??? | |
sweep: | |
dir: ??? | |
subdir: ${hydra.job.config_name}__${hydra.job.override_dirname} | |