updating config for aux loss coefficient
Browse files
configuration_lola_gpt2.py
CHANGED
|
@@ -47,6 +47,7 @@ class LOLAConfig(PretrainedConfig):
|
|
| 47 |
reorder_and_upcast_attn=False,
|
| 48 |
num_experts=16,
|
| 49 |
topk=1,
|
|
|
|
| 50 |
**kwargs,
|
| 51 |
):
|
| 52 |
self.vocab_size = vocab_size
|
|
@@ -75,6 +76,7 @@ class LOLAConfig(PretrainedConfig):
|
|
| 75 |
|
| 76 |
self.bos_token_id = bos_token_id
|
| 77 |
self.eos_token_id = eos_token_id
|
|
|
|
| 78 |
|
| 79 |
super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
|
| 80 |
|
|
|
|
| 47 |
reorder_and_upcast_attn=False,
|
| 48 |
num_experts=16,
|
| 49 |
topk=1,
|
| 50 |
+
router_aux_loss_coef=0.01,
|
| 51 |
**kwargs,
|
| 52 |
):
|
| 53 |
self.vocab_size = vocab_size
|
|
|
|
| 76 |
|
| 77 |
self.bos_token_id = bos_token_id
|
| 78 |
self.eos_token_id = eos_token_id
|
| 79 |
+
self.router_aux_loss_coef = router_aux_loss_coef
|
| 80 |
|
| 81 |
super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
|
| 82 |
|