Spaces:
Running
Running
| # upload_to_hf.py - Script to upload your Mamba Swarm to HuggingFace | |
| import os | |
| import shutil | |
| from huggingface_hub import HfApi, upload_folder | |
| import json | |
| # Import the actual model classes | |
| from modeling_mamba_swarm import MambaSwarmForCausalLM, MambaSwarmConfig | |
| def prepare_model_repo(): | |
| """Prepare model repository structure for HuggingFace""" | |
| # Create required files for HuggingFace model | |
| model_files = { | |
| "README.md": create_model_readme(), | |
| "config.json": create_model_config(), | |
| "requirements.txt": create_requirements(), | |
| "modeling_mamba_swarm.py": create_modeling_file() | |
| } | |
| # Create model repo directory | |
| os.makedirs("hf_model_repo", exist_ok=True) | |
| # Copy your mamba_swarm code | |
| shutil.copytree("mamba_swarm", "hf_model_repo/mamba_swarm", dirs_exist_ok=True) | |
| # Create HuggingFace specific files | |
| for filename, content in model_files.items(): | |
| with open(f"hf_model_repo/{filename}", "w") as f: | |
| f.write(content) | |
| print("Model repository prepared!") | |
| def create_model_readme(): | |
| return """--- | |
| license: apache-2.0 | |
| language: | |
| - en | |
| pipeline_tag: text-generation | |
| tags: | |
| - mamba | |
| - swarm | |
| - routing | |
| - language-model | |
| --- | |
| # Mamba Swarm: Dynamic Routing Language Model | |
| A novel architecture combining 100 specialized Mamba encoders with dynamic routing and aggregation for efficient language modeling. | |
| ## Architecture | |
| - **100 Mamba Encoders**: Specialized domain experts | |
| - **Dynamic Router**: Selects relevant encoders per input | |
| - **Aggregation Layer**: Combines encoder outputs | |
| - **Mamba Decoder**: Generates final responses | |
| ## Usage | |
| ```python | |
| from transformers import AutoModel, AutoTokenizer | |
| from mamba_swarm import MambaSwarmEngine | |
| # Load the model | |
| model = MambaSwarmEngine.from_pretrained("your-username/mamba-swarm-model") | |
| tokenizer = AutoTokenizer.from_pretrained("your-username/mamba-swarm-model") | |
| # Generate text | |
| input_text = "Explain quantum computing" | |
| inputs = tokenizer(input_text, return_tensors="pt") | |
| outputs = model.generate(**inputs, max_length=100) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| print(response) | |
| ``` | |
| ## Training | |
| This model uses a three-phase training approach: | |
| 1. Collective pre-training on general data | |
| 2. Domain specialization for encoder groups | |
| 3. End-to-end coordination training | |
| ## Performance | |
| - **Parameters**: ~7B total (100 × 70M encoders) | |
| - **Domains**: Medical, Legal, Code, Science, General | |
| - **Routing Efficiency**: Only 10-20% of encoders active per query | |
| ## Citation | |
| ``` | |
| @misc{mamba-swarm-2025, | |
| title={Mamba Swarm: Dynamic Routing for Efficient Language Modeling}, | |
| author={Your Name}, | |
| year={2025} | |
| } | |
| ``` | |
| """ | |
| def create_model_config(): | |
| config = { | |
| "model_type": "mamba_swarm", | |
| "architectures": ["MambaSwarmForCausalLM"], | |
| "num_encoders": 100, | |
| "encoder_config": { | |
| "d_model": 768, | |
| "n_layer": 24, | |
| "vocab_size": 50280, | |
| "ssm_cfg": {}, | |
| "rms_norm": True, | |
| "residual_in_fp32": True, | |
| "fused_add_norm": True | |
| }, | |
| "router_config": { | |
| "top_k": 10, | |
| "routing_strategy": "content_based" | |
| }, | |
| "aggregator_config": { | |
| "method": "weighted_sum", | |
| "attention_heads": 8 | |
| }, | |
| "torch_dtype": "float16", | |
| "use_cache": True | |
| } | |
| return json.dumps(config, indent=2) | |
| def create_requirements(): | |
| return """torch>=2.0.0 | |
| transformers>=4.35.0 | |
| mamba-ssm>=1.2.0 | |
| causal-conv1d>=1.2.0 | |
| numpy>=1.21.0 | |
| scipy>=1.7.0 | |
| triton>=2.0.0 | |
| einops>=0.6.1 | |
| packaging>=20.0 | |
| """ | |
| def create_modeling_file(): | |
| return """# modeling_mamba_swarm.py - HuggingFace integration | |
| from transformers import PreTrainedModel, PretrainedConfig | |
| from transformers.modeling_outputs import CausalLMOutputWithPast | |
| import torch | |
| import torch.nn as nn | |
| class MambaSwarmConfig(PretrainedConfig): | |
| model_type = "mamba_swarm" | |
| def __init__( | |
| self, | |
| num_encoders=100, | |
| encoder_config=None, | |
| router_config=None, | |
| aggregator_config=None, | |
| **kwargs | |
| ): | |
| self.num_encoders = num_encoders | |
| self.encoder_config = encoder_config or {} | |
| self.router_config = router_config or {} | |
| self.aggregator_config = aggregator_config or {} | |
| super().__init__(**kwargs) | |
| class MambaSwarmForCausalLM(PreTrainedModel): | |
| config_class = MambaSwarmConfig | |
| def __init__(self, config): | |
| super().__init__(config) | |
| # Import your actual implementation | |
| from mamba_swarm.system.swarm_engine import MambaSwarmEngine | |
| self.swarm_engine = MambaSwarmEngine(config) | |
| def forward( | |
| self, | |
| input_ids=None, | |
| attention_mask=None, | |
| labels=None, | |
| **kwargs | |
| ): | |
| # Your forward pass implementation | |
| outputs = self.swarm_engine(input_ids, attention_mask) | |
| loss = None | |
| if labels is not None: | |
| # Calculate loss | |
| loss_fct = nn.CrossEntropyLoss() | |
| loss = loss_fct(outputs.logits.view(-1, outputs.logits.size(-1)), labels.view(-1)) | |
| return CausalLMOutputWithPast( | |
| loss=loss, | |
| logits=outputs.logits, | |
| past_key_values=outputs.past_key_values, | |
| ) | |
| def generate(self, *args, **kwargs): | |
| return self.swarm_engine.generate(*args, **kwargs) | |
| @classmethod | |
| def from_pretrained(cls, model_name_or_path, *model_args, **kwargs): | |
| # Custom loading logic if needed | |
| return super().from_pretrained(model_name_or_path, *model_args, **kwargs) | |
| """ | |
| def upload_model(): | |
| """Upload model code to HuggingFace""" | |
| api = HfApi() | |
| # Upload model repository | |
| upload_folder( | |
| folder_path="hf_model_repo", | |
| repo_id="your-username/mamba-swarm-model", # Replace with your username | |
| repo_type="model", | |
| commit_message="Initial upload of Mamba Swarm model" | |
| ) | |
| print("Model uploaded successfully!") | |
| def upload_weights(): | |
| """Upload model weights separately""" | |
| # This assumes you have trained weights in checkpoints/ | |
| api = HfApi() | |
| upload_folder( | |
| folder_path="checkpoints", | |
| repo_id="your-username/mamba-swarm-weights", # Replace with your username | |
| repo_type="model", | |
| commit_message="Upload trained model weights" | |
| ) | |
| print("Weights uploaded successfully!") | |
| if __name__ == "__main__": | |
| prepare_model_repo() | |
| upload_model() | |
| # upload_weights() # Uncomment when you have trained weights |