Debito commited on
Commit
aec13a2
·
verified ·
1 Parent(s): 055a9c8

Delete config.py

Browse files
Files changed (1) hide show
  1. config.py +0 -44
config.py DELETED
@@ -1,44 +0,0 @@
1
- # =============================================================================
2
- # core/config.py
3
- # =============================================================================
4
- import torch
5
- from dataclasses import dataclass
6
- from typing import Dict, List, Optional
7
-
8
- @dataclass
9
- class MambaConfig:
10
- # Model architecture
11
- vocab_size: int = 50257
12
- d_model: int = 1024
13
- n_layers: int = 12
14
- d_inner: int = 2048
15
- d_state: int = 16
16
- d_conv: int = 4
17
- dt_rank: Optional[int] = None
18
- bias: bool = False
19
- conv_bias: bool = True
20
-
21
- # Training
22
- max_seq_len: int = 2048
23
- batch_size: int = 8
24
- learning_rate: float = 1e-4
25
- weight_decay: float = 0.1
26
- warmup_steps: int = 1000
27
- max_steps: int = 100000
28
-
29
- # Swarm specific
30
- num_specialists: int = 100
31
- specialist_domains: List[str] = None
32
- shared_embedding: bool = True
33
- hierarchical_sharing: bool = True
34
-
35
- # Hardware
36
- device: str = "cuda" if torch.cuda.is_available() else "cpu"
37
- dtype: torch.dtype = torch.float16
38
-
39
- def __post_init__(self):
40
- if self.dt_rank is None:
41
- self.dt_rank = max(16, self.d_model // 16)
42
- if self.specialist_domains is None:
43
- self.specialist_domains = [f"domain_{i}" for i in range(self.num_specialists)]
44
-