Spaces:
Sleeping
Sleeping
Upload upload_to_hf.py
Browse files- upload_to_hf.py +235 -0
upload_to_hf.py
ADDED
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# upload_to_hf.py - Script to upload your Mamba Swarm to HuggingFace
|
2 |
+
|
3 |
+
import os
|
4 |
+
import shutil
|
5 |
+
from huggingface_hub import HfApi, upload_folder
|
6 |
+
import json
|
7 |
+
|
8 |
+
def prepare_model_repo():
|
9 |
+
"""Prepare model repository structure for HuggingFace"""
|
10 |
+
|
11 |
+
# Create required files for HuggingFace model
|
12 |
+
model_files = {
|
13 |
+
"README.md": create_model_readme(),
|
14 |
+
"config.json": create_model_config(),
|
15 |
+
"requirements.txt": create_requirements(),
|
16 |
+
"modeling_mamba_swarm.py": create_modeling_file()
|
17 |
+
}
|
18 |
+
|
19 |
+
# Create model repo directory
|
20 |
+
os.makedirs("hf_model_repo", exist_ok=True)
|
21 |
+
|
22 |
+
# Copy your mamba_swarm code
|
23 |
+
shutil.copytree("mamba_swarm", "hf_model_repo/mamba_swarm", dirs_exist_ok=True)
|
24 |
+
|
25 |
+
# Create HuggingFace specific files
|
26 |
+
for filename, content in model_files.items():
|
27 |
+
with open(f"hf_model_repo/{filename}", "w") as f:
|
28 |
+
f.write(content)
|
29 |
+
|
30 |
+
print("Model repository prepared!")
|
31 |
+
|
32 |
+
def create_model_readme():
|
33 |
+
return """---
|
34 |
+
license: apache-2.0
|
35 |
+
language:
|
36 |
+
- en
|
37 |
+
pipeline_tag: text-generation
|
38 |
+
tags:
|
39 |
+
- mamba
|
40 |
+
- swarm
|
41 |
+
- routing
|
42 |
+
- language-model
|
43 |
+
---
|
44 |
+
|
45 |
+
# Mamba Swarm: Dynamic Routing Language Model
|
46 |
+
|
47 |
+
A novel architecture combining 100 specialized Mamba encoders with dynamic routing and aggregation for efficient language modeling.
|
48 |
+
|
49 |
+
## Architecture
|
50 |
+
|
51 |
+
- **100 Mamba Encoders**: Specialized domain experts
|
52 |
+
- **Dynamic Router**: Selects relevant encoders per input
|
53 |
+
- **Aggregation Layer**: Combines encoder outputs
|
54 |
+
- **Mamba Decoder**: Generates final responses
|
55 |
+
|
56 |
+
## Usage
|
57 |
+
|
58 |
+
```python
|
59 |
+
from transformers import AutoModel, AutoTokenizer
|
60 |
+
from mamba_swarm import MambaSwarmEngine
|
61 |
+
|
62 |
+
# Load the model
|
63 |
+
model = MambaSwarmEngine.from_pretrained("your-username/mamba-swarm-model")
|
64 |
+
tokenizer = AutoTokenizer.from_pretrained("your-username/mamba-swarm-model")
|
65 |
+
|
66 |
+
# Generate text
|
67 |
+
input_text = "Explain quantum computing"
|
68 |
+
inputs = tokenizer(input_text, return_tensors="pt")
|
69 |
+
outputs = model.generate(**inputs, max_length=100)
|
70 |
+
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
71 |
+
print(response)
|
72 |
+
```
|
73 |
+
|
74 |
+
## Training
|
75 |
+
|
76 |
+
This model uses a three-phase training approach:
|
77 |
+
1. Collective pre-training on general data
|
78 |
+
2. Domain specialization for encoder groups
|
79 |
+
3. End-to-end coordination training
|
80 |
+
|
81 |
+
## Performance
|
82 |
+
|
83 |
+
- **Parameters**: ~7B total (100 × 70M encoders)
|
84 |
+
- **Domains**: Medical, Legal, Code, Science, General
|
85 |
+
- **Routing Efficiency**: Only 10-20% of encoders active per query
|
86 |
+
|
87 |
+
## Citation
|
88 |
+
|
89 |
+
```
|
90 |
+
@misc{mamba-swarm-2025,
|
91 |
+
title={Mamba Swarm: Dynamic Routing for Efficient Language Modeling},
|
92 |
+
author={Your Name},
|
93 |
+
year={2025}
|
94 |
+
}
|
95 |
+
```
|
96 |
+
"""
|
97 |
+
|
98 |
+
def create_model_config():
|
99 |
+
config = {
|
100 |
+
"model_type": "mamba_swarm",
|
101 |
+
"architectures": ["MambaSwarmForCausalLM"],
|
102 |
+
"num_encoders": 100,
|
103 |
+
"encoder_config": {
|
104 |
+
"d_model": 768,
|
105 |
+
"n_layer": 24,
|
106 |
+
"vocab_size": 50280,
|
107 |
+
"ssm_cfg": {},
|
108 |
+
"rms_norm": True,
|
109 |
+
"residual_in_fp32": True,
|
110 |
+
"fused_add_norm": True
|
111 |
+
},
|
112 |
+
"router_config": {
|
113 |
+
"top_k": 10,
|
114 |
+
"routing_strategy": "content_based"
|
115 |
+
},
|
116 |
+
"aggregator_config": {
|
117 |
+
"method": "weighted_sum",
|
118 |
+
"attention_heads": 8
|
119 |
+
},
|
120 |
+
"torch_dtype": "float16",
|
121 |
+
"use_cache": True
|
122 |
+
}
|
123 |
+
return json.dumps(config, indent=2)
|
124 |
+
|
125 |
+
def create_requirements():
|
126 |
+
return """torch>=2.0.0
|
127 |
+
transformers>=4.35.0
|
128 |
+
mamba-ssm>=1.2.0
|
129 |
+
causal-conv1d>=1.2.0
|
130 |
+
numpy>=1.21.0
|
131 |
+
scipy>=1.7.0
|
132 |
+
triton>=2.0.0
|
133 |
+
einops>=0.6.1
|
134 |
+
packaging>=20.0
|
135 |
+
"""
|
136 |
+
|
137 |
+
def create_modeling_file():
|
138 |
+
return """# modeling_mamba_swarm.py - HuggingFace integration
|
139 |
+
|
140 |
+
from transformers import PreTrainedModel, PretrainedConfig
|
141 |
+
from transformers.modeling_outputs import CausalLMOutputWithPast
|
142 |
+
import torch
|
143 |
+
import torch.nn as nn
|
144 |
+
|
145 |
+
class MambaSwarmConfig(PretrainedConfig):
|
146 |
+
model_type = "mamba_swarm"
|
147 |
+
|
148 |
+
def __init__(
|
149 |
+
self,
|
150 |
+
num_encoders=100,
|
151 |
+
encoder_config=None,
|
152 |
+
router_config=None,
|
153 |
+
aggregator_config=None,
|
154 |
+
**kwargs
|
155 |
+
):
|
156 |
+
self.num_encoders = num_encoders
|
157 |
+
self.encoder_config = encoder_config or {}
|
158 |
+
self.router_config = router_config or {}
|
159 |
+
self.aggregator_config = aggregator_config or {}
|
160 |
+
super().__init__(**kwargs)
|
161 |
+
|
162 |
+
class MambaSwarmForCausalLM(PreTrainedModel):
|
163 |
+
config_class = MambaSwarmConfig
|
164 |
+
|
165 |
+
def __init__(self, config):
|
166 |
+
super().__init__(config)
|
167 |
+
|
168 |
+
# Import your actual implementation
|
169 |
+
from mamba_swarm.system.swarm_engine import MambaSwarmEngine
|
170 |
+
|
171 |
+
self.swarm_engine = MambaSwarmEngine(config)
|
172 |
+
|
173 |
+
def forward(
|
174 |
+
self,
|
175 |
+
input_ids=None,
|
176 |
+
attention_mask=None,
|
177 |
+
labels=None,
|
178 |
+
**kwargs
|
179 |
+
):
|
180 |
+
# Your forward pass implementation
|
181 |
+
outputs = self.swarm_engine(input_ids, attention_mask)
|
182 |
+
|
183 |
+
loss = None
|
184 |
+
if labels is not None:
|
185 |
+
# Calculate loss
|
186 |
+
loss_fct = nn.CrossEntropyLoss()
|
187 |
+
loss = loss_fct(outputs.logits.view(-1, outputs.logits.size(-1)), labels.view(-1))
|
188 |
+
|
189 |
+
return CausalLMOutputWithPast(
|
190 |
+
loss=loss,
|
191 |
+
logits=outputs.logits,
|
192 |
+
past_key_values=outputs.past_key_values,
|
193 |
+
)
|
194 |
+
|
195 |
+
def generate(self, *args, **kwargs):
|
196 |
+
return self.swarm_engine.generate(*args, **kwargs)
|
197 |
+
|
198 |
+
@classmethod
|
199 |
+
def from_pretrained(cls, model_name_or_path, *model_args, **kwargs):
|
200 |
+
# Custom loading logic if needed
|
201 |
+
return super().from_pretrained(model_name_or_path, *model_args, **kwargs)
|
202 |
+
"""
|
203 |
+
|
204 |
+
def upload_model():
|
205 |
+
"""Upload model code to HuggingFace"""
|
206 |
+
api = HfApi()
|
207 |
+
|
208 |
+
# Upload model repository
|
209 |
+
upload_folder(
|
210 |
+
folder_path="hf_model_repo",
|
211 |
+
repo_id="your-username/mamba-swarm-model", # Replace with your username
|
212 |
+
repo_type="model",
|
213 |
+
commit_message="Initial upload of Mamba Swarm model"
|
214 |
+
)
|
215 |
+
|
216 |
+
print("Model uploaded successfully!")
|
217 |
+
|
218 |
+
def upload_weights():
|
219 |
+
"""Upload model weights separately"""
|
220 |
+
# This assumes you have trained weights in checkpoints/
|
221 |
+
api = HfApi()
|
222 |
+
|
223 |
+
upload_folder(
|
224 |
+
folder_path="checkpoints",
|
225 |
+
repo_id="your-username/mamba-swarm-weights", # Replace with your username
|
226 |
+
repo_type="model",
|
227 |
+
commit_message="Upload trained model weights"
|
228 |
+
)
|
229 |
+
|
230 |
+
print("Weights uploaded successfully!")
|
231 |
+
|
232 |
+
if __name__ == "__main__":
|
233 |
+
prepare_model_repo()
|
234 |
+
upload_model()
|
235 |
+
# upload_weights() # Uncomment when you have trained weights
|