Spaces:
Running
Running
File size: 12,785 Bytes
7bab86d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 |
"""
Configuration module for Universal MCP Client - Enhanced for GPT-OSS models with full context support
"""
import os
from dataclasses import dataclass
from typing import Optional, Dict, List
import logging
# Set up enhanced logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
@dataclass
class MCPServerConfig:
"""Configuration for an MCP server connection"""
name: str
url: str
description: str
space_id: Optional[str] = None
class AppConfig:
"""Application configuration settings"""
# HuggingFace Configuration
HF_TOKEN = os.getenv("HF_TOKEN")
# OpenAI GPT OSS Models with enhanced configurations
AVAILABLE_MODELS = {
"openai/gpt-oss-120b": {
"name": "GPT OSS 120B",
"description": "117B parameters, 5.1B active - Production use with reasoning",
"size": "120B",
"context_length": 128000, # Full 128k context length
"supports_reasoning": True,
"supports_tool_calling": True,
"active_params": "5.1B"
},
"openai/gpt-oss-20b": {
"name": "GPT OSS 20B",
"description": "21B parameters, 3.6B active - Lower latency with reasoning",
"size": "20B",
"context_length": 128000, # Full 128k context length
"supports_reasoning": True,
"supports_tool_calling": True,
"active_params": "3.6B"
}
}
# Enhanced Inference Providers supporting GPT OSS models
INFERENCE_PROVIDERS = {
"cerebras": {
"name": "Cerebras",
"description": "World-record inference speeds (2-4k tokens/sec for GPT-OSS)",
"supports_120b": True,
"supports_20b": True,
"endpoint_suffix": "cerebras",
"speed": "Very Fast",
"recommended_for": ["production", "high-throughput"],
"max_context_support": 128000 # Full context support
},
"fireworks-ai": {
"name": "Fireworks AI",
"description": "Fast and reliable inference with excellent reliability",
"supports_120b": True,
"supports_20b": True,
"endpoint_suffix": "fireworks-ai",
"speed": "Fast",
"recommended_for": ["production", "general-use"],
"max_context_support": 128000 # Full context support
},
"together-ai": {
"name": "Together AI",
"description": "Collaborative AI inference with good performance",
"supports_120b": True,
"supports_20b": True,
"endpoint_suffix": "together-ai",
"speed": "Fast",
"recommended_for": ["development", "experimentation"],
"max_context_support": 128000 # Full context support
},
"replicate": {
"name": "Replicate",
"description": "Machine learning deployment platform",
"supports_120b": True,
"supports_20b": True,
"endpoint_suffix": "replicate",
"speed": "Medium",
"recommended_for": ["prototyping", "low-volume"],
"max_context_support": 128000 # Full context support
}
}
# Enhanced Model Configuration for GPT-OSS - Utilizing full context
MAX_TOKENS = 128000 # Full context length for GPT-OSS models
# Response token allocation - increased for longer responses
DEFAULT_MAX_RESPONSE_TOKENS = 16384 # Increased from 8192 for longer responses
MIN_RESPONSE_TOKENS = 4096 # Minimum response size
# Context management - optimized for full 128k usage
SYSTEM_PROMPT_RESERVE = 3000 # Reserve for system prompt (includes MCP tool descriptions)
MCP_TOOLS_RESERVE = 2000 # Additional reserve when MCP servers are enabled
# History management - much larger with 128k context
MAX_HISTORY_MESSAGES = 100 # Increased from 50 for better context retention
DEFAULT_HISTORY_MESSAGES = 50 # Default for good performance
# Reasoning configuration
DEFAULT_REASONING_EFFORT = "medium" # low, medium, high
# UI Configuration
GRADIO_THEME = "ocean"
DEBUG_MODE = True
# MCP Server recommendations
OPTIMAL_MCP_SERVER_COUNT = 6 # Recommended maximum for good performance
WARNING_MCP_SERVER_COUNT = 10 # Show warning if more than this
# File Support
SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg']
SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.opus', '.wma']
SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.m4v', '.wmv']
SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx', '.md', '.rtf', '.odt']
@classmethod
def get_available_models_for_provider(cls, provider_id: str) -> List[str]:
"""Get models available for a specific provider"""
if provider_id not in cls.INFERENCE_PROVIDERS:
return []
provider = cls.INFERENCE_PROVIDERS[provider_id]
available_models = []
for model_id, model_info in cls.AVAILABLE_MODELS.items():
if model_info["size"] == "120B" and provider["supports_120b"]:
available_models.append(model_id)
elif model_info["size"] == "20B" and provider["supports_20b"]:
available_models.append(model_id)
return available_models
@classmethod
def get_model_endpoint(cls, model_id: str, provider_id: str) -> str:
"""Get the full model endpoint for HF Inference Providers"""
if provider_id not in cls.INFERENCE_PROVIDERS:
raise ValueError(f"Unknown provider: {provider_id}")
provider = cls.INFERENCE_PROVIDERS[provider_id]
return f"{model_id}:{provider['endpoint_suffix']}"
@classmethod
def get_optimal_context_settings(cls, model_id: str, provider_id: str, mcp_servers_count: int = 0) -> Dict[str, int]:
"""Get optimal context settings for a model/provider combination"""
model_info = cls.AVAILABLE_MODELS.get(model_id, {})
provider_info = cls.INFERENCE_PROVIDERS.get(provider_id, {})
# Get the minimum of model and provider context support
model_context = model_info.get("context_length", 128000)
provider_context = provider_info.get("max_context_support", 128000)
context_length = min(model_context, provider_context)
# Calculate reserves based on MCP server count
system_reserve = cls.SYSTEM_PROMPT_RESERVE
if mcp_servers_count > 0:
# Add extra reserve for MCP tools (roughly 300 tokens per server for tool descriptions)
system_reserve += cls.MCP_TOOLS_RESERVE + (mcp_servers_count * 300)
# Dynamic response token allocation based on available context
if context_length >= 100000:
max_response_tokens = cls.DEFAULT_MAX_RESPONSE_TOKENS # 16384
elif context_length >= 50000:
max_response_tokens = 12288
elif context_length >= 20000:
max_response_tokens = 8192
else:
max_response_tokens = cls.MIN_RESPONSE_TOKENS # 4096
# Calculate available context for history
available_context = context_length - system_reserve - max_response_tokens
# Calculate recommended history limit
# Assume average message is ~200 tokens
avg_message_tokens = 200
recommended_history = min(
cls.MAX_HISTORY_MESSAGES,
available_context // avg_message_tokens
)
return {
"max_context": context_length,
"available_context": available_context,
"max_response_tokens": max_response_tokens,
"system_reserve": system_reserve,
"recommended_history_limit": max(10, recommended_history), # At least 10 messages
"context_utilization": f"{((system_reserve + max_response_tokens) / context_length * 100):.1f}% reserved"
}
@classmethod
def get_all_media_extensions(cls):
"""Get all supported media file extensions"""
return (cls.SUPPORTED_IMAGE_EXTENSIONS +
cls.SUPPORTED_AUDIO_EXTENSIONS +
cls.SUPPORTED_VIDEO_EXTENSIONS)
@classmethod
def is_image_file(cls, file_path: str) -> bool:
"""Check if file is an image"""
if not file_path:
return False
return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS)
@classmethod
def is_audio_file(cls, file_path: str) -> bool:
"""Check if file is an audio file"""
if not file_path:
return False
return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS)
@classmethod
def is_video_file(cls, file_path: str) -> bool:
"""Check if file is a video file"""
if not file_path:
return False
return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS)
@classmethod
def is_media_file(cls, file_path: str) -> bool:
"""Check if file is any supported media type"""
if not file_path:
return False
return any(ext in file_path.lower() for ext in cls.get_all_media_extensions())
@classmethod
def get_provider_recommendation(cls, use_case: str) -> List[str]:
"""Get recommended providers for specific use cases"""
recommendations = {
"production": ["cerebras", "fireworks-ai"],
"development": ["together-ai", "fireworks-ai"],
"experimentation": ["together-ai", "replicate"],
"high-throughput": ["cerebras"],
"cost-effective": ["together-ai", "replicate"],
"maximum-context": ["cerebras", "fireworks-ai"] # Providers with best context support
}
return recommendations.get(use_case, list(cls.INFERENCE_PROVIDERS.keys()))
# Check for dependencies
try:
import httpx
HTTPX_AVAILABLE = True
except ImportError:
HTTPX_AVAILABLE = False
logger.warning("httpx not available - file upload functionality limited")
try:
import huggingface_hub
HF_HUB_AVAILABLE = True
except ImportError:
HF_HUB_AVAILABLE = False
logger.warning("huggingface_hub not available - login functionality disabled")
# Enhanced CSS Configuration with better media display
CUSTOM_CSS = """
/* Hide Gradio footer */
footer {
display: none !important;
}
/* Make chatbot expand to fill available space */
.gradio-container {
height: 100vh !important;
}
/* Ensure proper flex layout */
.main-content {
display: flex;
flex-direction: column;
height: 100%;
}
/* Input area stays at bottom with minimal padding */
.input-area {
margin-top: auto;
padding-top: 0.25rem !important;
padding-bottom: 0 !important;
margin-bottom: 0 !important;
}
/* Reduce padding around chatbot */
.chatbot {
margin-bottom: 0 !important;
padding-bottom: 0 !important;
}
/* Provider and model selection styling */
.provider-model-selection {
padding: 10px;
border-radius: 8px;
margin-bottom: 10px;
border-left: 4px solid #007bff;
}
/* Login section styling */
.login-section {
padding: 10px;
border-radius: 8px;
margin-bottom: 10px;
border-left: 4px solid #4caf50;
}
/* Tool usage indicator */
.tool-usage {
background: #fff3cd;
border: 1px solid #ffeaa7;
border-radius: 4px;
padding: 8px;
margin: 4px 0;
}
/* Media display improvements */
.media-container {
max-width: 100%;
border-radius: 8px;
overflow: hidden;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
/* Enhanced audio player styling */
audio {
width: 100%;
max-width: 500px;
height: 54px;
border-radius: 27px;
outline: none;
margin: 10px 0;
}
/* Enhanced video player styling */
video {
width: 100%;
max-width: 700px;
height: auto;
object-fit: contain;
border-radius: 8px;
margin: 10px 0;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
}
/* Server status indicators */
.server-status {
display: inline-block;
padding: 2px 8px;
border-radius: 12px;
font-size: 12px;
font-weight: bold;
}
.server-status.online {
background: #d4edda;
color: #155724;
}
.server-status.offline {
background: #f8d7da;
color: #721c24;
}
/* Message metadata styling */
.message-metadata {
font-size: 0.85em;
color: #666;
margin-top: 4px;
padding: 4px 8px;
background: #f0f0f0;
border-radius: 4px;
}
"""
|