Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
@@ -268,13 +268,17 @@ class UltimateModelLoader:
|
|
268 |
for model_name, config in self.model_configs.items():
|
269 |
# Skip resource-intensive models on limited systems
|
270 |
if not has_gpu and config["params"] > 500_000_000:
|
|
|
271 |
continue
|
272 |
-
if memory_gb <
|
|
|
273 |
continue
|
274 |
# More reasonable Mamba filtering - only skip very large models on low memory
|
275 |
if memory_gb < 12 and "mamba" in model_name.lower() and config["params"] > 1_000_000_000:
|
|
|
276 |
continue
|
277 |
|
|
|
278 |
available_models.append((model_name, config))
|
279 |
|
280 |
# Sort by preference and priority
|
@@ -867,8 +871,10 @@ class UltimateMambaSwarm:
|
|
867 |
|
868 |
# Generate response
|
869 |
if self.model_loaded:
|
|
|
870 |
response = self._generate_with_ultimate_model(prompt, max_length, temperature, top_p, domain)
|
871 |
else:
|
|
|
872 |
response = self._generate_ultimate_fallback(prompt, domain)
|
873 |
|
874 |
# Quality validation
|
@@ -1378,7 +1384,6 @@ Continued research, development, and practical application will likely yield add
|
|
1378 |
|
1379 |
**⚡ Mamba Swarm Performance:**
|
1380 |
- **Architecture**: Mamba Encoder Swarm (CPU Alternative Mode)
|
1381 |
-
- **Active Model**: {model_info}
|
1382 |
- **Model Size**: {routing_info['model_size'].title()}
|
1383 |
- **Selected Encoders**: {routing_info['total_active']}/100
|
1384 |
- **Hardware**: {self.model_loader.device}
|
|
|
268 |
for model_name, config in self.model_configs.items():
|
269 |
# Skip resource-intensive models on limited systems
|
270 |
if not has_gpu and config["params"] > 500_000_000:
|
271 |
+
print(f"⚠️ Skipping {config['display_name']} - too large for CPU ({config['params']:,} > 500M)")
|
272 |
continue
|
273 |
+
if memory_gb < 3 and config["params"] > 150_000_000:
|
274 |
+
print(f"⚠️ Skipping {config['display_name']} - insufficient RAM ({memory_gb:.1f}GB < 3GB for {config['params']:,})")
|
275 |
continue
|
276 |
# More reasonable Mamba filtering - only skip very large models on low memory
|
277 |
if memory_gb < 12 and "mamba" in model_name.lower() and config["params"] > 1_000_000_000:
|
278 |
+
print(f"⚠️ Skipping {config['display_name']} - large Mamba model needs more RAM")
|
279 |
continue
|
280 |
|
281 |
+
print(f"✅ Available: {config['display_name']} ({config['params']:,} params)")
|
282 |
available_models.append((model_name, config))
|
283 |
|
284 |
# Sort by preference and priority
|
|
|
871 |
|
872 |
# Generate response
|
873 |
if self.model_loaded:
|
874 |
+
print(f"🧠 Using actual model inference: {self.model_loader.model_name}")
|
875 |
response = self._generate_with_ultimate_model(prompt, max_length, temperature, top_p, domain)
|
876 |
else:
|
877 |
+
print(f"🔄 Using fallback response system (no model loaded)")
|
878 |
response = self._generate_ultimate_fallback(prompt, domain)
|
879 |
|
880 |
# Quality validation
|
|
|
1384 |
|
1385 |
**⚡ Mamba Swarm Performance:**
|
1386 |
- **Architecture**: Mamba Encoder Swarm (CPU Alternative Mode)
|
|
|
1387 |
- **Model Size**: {routing_info['model_size'].title()}
|
1388 |
- **Selected Encoders**: {routing_info['total_active']}/100
|
1389 |
- **Hardware**: {self.model_loader.device}
|