""" Configuration module for Universal MCP Client - Enhanced for GPT-OSS models with full context support """ import os from dataclasses import dataclass from typing import Optional, Dict, List import logging # Set up enhanced logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) @dataclass class MCPServerConfig: """Configuration for an MCP server connection""" name: str url: str description: str space_id: Optional[str] = None class AppConfig: """Application configuration settings""" # HuggingFace Configuration HF_TOKEN = os.getenv("HF_TOKEN") # OpenAI GPT OSS Models with enhanced configurations AVAILABLE_MODELS = { "openai/gpt-oss-120b": { "name": "GPT OSS 120B", "description": "117B parameters, 5.1B active - Production use with reasoning", "size": "120B", "context_length": 128000, # Full 128k context length "supports_reasoning": True, "supports_tool_calling": True, "active_params": "5.1B" }, "openai/gpt-oss-20b": { "name": "GPT OSS 20B", "description": "21B parameters, 3.6B active - Lower latency with reasoning", "size": "20B", "context_length": 128000, # Full 128k context length "supports_reasoning": True, "supports_tool_calling": True, "active_params": "3.6B" } } # Enhanced Inference Providers supporting GPT OSS models INFERENCE_PROVIDERS = { "cerebras": { "name": "Cerebras", "description": "World-record inference speeds (2-4k tokens/sec for GPT-OSS)", "supports_120b": True, "supports_20b": True, "endpoint_suffix": "cerebras", "speed": "Very Fast", "recommended_for": ["production", "high-throughput"], "max_context_support": 128000 # Full context support }, "fireworks-ai": { "name": "Fireworks AI", "description": "Fast and reliable inference with excellent reliability", "supports_120b": True, "supports_20b": True, "endpoint_suffix": "fireworks-ai", "speed": "Fast", "recommended_for": ["production", "general-use"], "max_context_support": 128000 # Full context support }, "together-ai": { "name": "Together AI", "description": "Collaborative AI inference with good performance", "supports_120b": True, "supports_20b": True, "endpoint_suffix": "together-ai", "speed": "Fast", "recommended_for": ["development", "experimentation"], "max_context_support": 128000 # Full context support }, "replicate": { "name": "Replicate", "description": "Machine learning deployment platform", "supports_120b": True, "supports_20b": True, "endpoint_suffix": "replicate", "speed": "Medium", "recommended_for": ["prototyping", "low-volume"], "max_context_support": 128000 # Full context support } } # Enhanced Model Configuration for GPT-OSS - Utilizing full context MAX_TOKENS = 128000 # Full context length for GPT-OSS models # Response token allocation - increased for longer responses DEFAULT_MAX_RESPONSE_TOKENS = 16384 # Increased from 8192 for longer responses MIN_RESPONSE_TOKENS = 4096 # Minimum response size # Context management - optimized for full 128k usage SYSTEM_PROMPT_RESERVE = 3000 # Reserve for system prompt (includes MCP tool descriptions) MCP_TOOLS_RESERVE = 2000 # Additional reserve when MCP servers are enabled # History management - much larger with 128k context MAX_HISTORY_MESSAGES = 100 # Increased from 50 for better context retention DEFAULT_HISTORY_MESSAGES = 50 # Default for good performance # Reasoning configuration DEFAULT_REASONING_EFFORT = "medium" # low, medium, high # UI Configuration GRADIO_THEME = "ocean" DEBUG_MODE = True # MCP Server recommendations OPTIMAL_MCP_SERVER_COUNT = 6 # Recommended maximum for good performance WARNING_MCP_SERVER_COUNT = 10 # Show warning if more than this # File Support SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg'] SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.opus', '.wma'] SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.m4v', '.wmv'] SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx', '.md', '.rtf', '.odt'] @classmethod def get_available_models_for_provider(cls, provider_id: str) -> List[str]: """Get models available for a specific provider""" if provider_id not in cls.INFERENCE_PROVIDERS: return [] provider = cls.INFERENCE_PROVIDERS[provider_id] available_models = [] for model_id, model_info in cls.AVAILABLE_MODELS.items(): if model_info["size"] == "120B" and provider["supports_120b"]: available_models.append(model_id) elif model_info["size"] == "20B" and provider["supports_20b"]: available_models.append(model_id) return available_models @classmethod def get_model_endpoint(cls, model_id: str, provider_id: str) -> str: """Get the full model endpoint for HF Inference Providers""" if provider_id not in cls.INFERENCE_PROVIDERS: raise ValueError(f"Unknown provider: {provider_id}") provider = cls.INFERENCE_PROVIDERS[provider_id] return f"{model_id}:{provider['endpoint_suffix']}" @classmethod def get_optimal_context_settings(cls, model_id: str, provider_id: str, mcp_servers_count: int = 0) -> Dict[str, int]: """Get optimal context settings for a model/provider combination""" model_info = cls.AVAILABLE_MODELS.get(model_id, {}) provider_info = cls.INFERENCE_PROVIDERS.get(provider_id, {}) # Get the minimum of model and provider context support model_context = model_info.get("context_length", 128000) provider_context = provider_info.get("max_context_support", 128000) context_length = min(model_context, provider_context) # Calculate reserves based on MCP server count system_reserve = cls.SYSTEM_PROMPT_RESERVE if mcp_servers_count > 0: # Add extra reserve for MCP tools (roughly 300 tokens per server for tool descriptions) system_reserve += cls.MCP_TOOLS_RESERVE + (mcp_servers_count * 300) # Dynamic response token allocation based on available context if context_length >= 100000: max_response_tokens = cls.DEFAULT_MAX_RESPONSE_TOKENS # 16384 elif context_length >= 50000: max_response_tokens = 12288 elif context_length >= 20000: max_response_tokens = 8192 else: max_response_tokens = cls.MIN_RESPONSE_TOKENS # 4096 # Calculate available context for history available_context = context_length - system_reserve - max_response_tokens # Calculate recommended history limit # Assume average message is ~200 tokens avg_message_tokens = 200 recommended_history = min( cls.MAX_HISTORY_MESSAGES, available_context // avg_message_tokens ) return { "max_context": context_length, "available_context": available_context, "max_response_tokens": max_response_tokens, "system_reserve": system_reserve, "recommended_history_limit": max(10, recommended_history), # At least 10 messages "context_utilization": f"{((system_reserve + max_response_tokens) / context_length * 100):.1f}% reserved" } @classmethod def get_all_media_extensions(cls): """Get all supported media file extensions""" return (cls.SUPPORTED_IMAGE_EXTENSIONS + cls.SUPPORTED_AUDIO_EXTENSIONS + cls.SUPPORTED_VIDEO_EXTENSIONS) @classmethod def is_image_file(cls, file_path: str) -> bool: """Check if file is an image""" if not file_path: return False return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS) @classmethod def is_audio_file(cls, file_path: str) -> bool: """Check if file is an audio file""" if not file_path: return False return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS) @classmethod def is_video_file(cls, file_path: str) -> bool: """Check if file is a video file""" if not file_path: return False return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS) @classmethod def is_media_file(cls, file_path: str) -> bool: """Check if file is any supported media type""" if not file_path: return False return any(ext in file_path.lower() for ext in cls.get_all_media_extensions()) @classmethod def get_provider_recommendation(cls, use_case: str) -> List[str]: """Get recommended providers for specific use cases""" recommendations = { "production": ["cerebras", "fireworks-ai"], "development": ["together-ai", "fireworks-ai"], "experimentation": ["together-ai", "replicate"], "high-throughput": ["cerebras"], "cost-effective": ["together-ai", "replicate"], "maximum-context": ["cerebras", "fireworks-ai"] # Providers with best context support } return recommendations.get(use_case, list(cls.INFERENCE_PROVIDERS.keys())) # Check for dependencies try: import httpx HTTPX_AVAILABLE = True except ImportError: HTTPX_AVAILABLE = False logger.warning("httpx not available - file upload functionality limited") try: import huggingface_hub HF_HUB_AVAILABLE = True except ImportError: HF_HUB_AVAILABLE = False logger.warning("huggingface_hub not available - login functionality disabled") # Enhanced CSS Configuration with better media display CUSTOM_CSS = """ /* Hide Gradio footer */ footer { display: none !important; } /* Make chatbot expand to fill available space */ .gradio-container { height: 100vh !important; } /* Ensure proper flex layout */ .main-content { display: flex; flex-direction: column; height: 100%; } /* Input area stays at bottom with minimal padding */ .input-area { margin-top: auto; padding-top: 0.25rem !important; padding-bottom: 0 !important; margin-bottom: 0 !important; } /* Reduce padding around chatbot */ .chatbot { margin-bottom: 0 !important; padding-bottom: 0 !important; } /* Provider and model selection styling */ .provider-model-selection { padding: 10px; border-radius: 8px; margin-bottom: 10px; border-left: 4px solid #007bff; } /* Login section styling */ .login-section { padding: 10px; border-radius: 8px; margin-bottom: 10px; border-left: 4px solid #4caf50; } /* Tool usage indicator */ .tool-usage { background: #fff3cd; border: 1px solid #ffeaa7; border-radius: 4px; padding: 8px; margin: 4px 0; } /* Media display improvements */ .media-container { max-width: 100%; border-radius: 8px; overflow: hidden; box-shadow: 0 2px 8px rgba(0,0,0,0.1); } /* Enhanced audio player styling */ audio { width: 100%; max-width: 500px; height: 54px; border-radius: 27px; outline: none; margin: 10px 0; } /* Enhanced video player styling */ video { width: 100%; max-width: 700px; height: auto; object-fit: contain; border-radius: 8px; margin: 10px 0; box-shadow: 0 4px 6px rgba(0,0,0,0.1); } /* Server status indicators */ .server-status { display: inline-block; padding: 2px 8px; border-radius: 12px; font-size: 12px; font-weight: bold; } .server-status.online { background: #d4edda; color: #155724; } .server-status.offline { background: #f8d7da; color: #721c24; } /* Message metadata styling */ .message-metadata { font-size: 0.85em; color: #666; margin-top: 4px; padding: 4px 8px; background: #f0f0f0; border-radius: 4px; } """