ysharma's picture
ysharma HF Staff
Create config.py
7bab86d verified
"""
Configuration module for Universal MCP Client - Enhanced for GPT-OSS models with full context support
"""
import os
from dataclasses import dataclass
from typing import Optional, Dict, List
import logging
# Set up enhanced logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
@dataclass
class MCPServerConfig:
"""Configuration for an MCP server connection"""
name: str
url: str
description: str
space_id: Optional[str] = None
class AppConfig:
"""Application configuration settings"""
# HuggingFace Configuration
HF_TOKEN = os.getenv("HF_TOKEN")
# OpenAI GPT OSS Models with enhanced configurations
AVAILABLE_MODELS = {
"openai/gpt-oss-120b": {
"name": "GPT OSS 120B",
"description": "117B parameters, 5.1B active - Production use with reasoning",
"size": "120B",
"context_length": 128000, # Full 128k context length
"supports_reasoning": True,
"supports_tool_calling": True,
"active_params": "5.1B"
},
"openai/gpt-oss-20b": {
"name": "GPT OSS 20B",
"description": "21B parameters, 3.6B active - Lower latency with reasoning",
"size": "20B",
"context_length": 128000, # Full 128k context length
"supports_reasoning": True,
"supports_tool_calling": True,
"active_params": "3.6B"
}
}
# Enhanced Inference Providers supporting GPT OSS models
INFERENCE_PROVIDERS = {
"cerebras": {
"name": "Cerebras",
"description": "World-record inference speeds (2-4k tokens/sec for GPT-OSS)",
"supports_120b": True,
"supports_20b": True,
"endpoint_suffix": "cerebras",
"speed": "Very Fast",
"recommended_for": ["production", "high-throughput"],
"max_context_support": 128000 # Full context support
},
"fireworks-ai": {
"name": "Fireworks AI",
"description": "Fast and reliable inference with excellent reliability",
"supports_120b": True,
"supports_20b": True,
"endpoint_suffix": "fireworks-ai",
"speed": "Fast",
"recommended_for": ["production", "general-use"],
"max_context_support": 128000 # Full context support
},
"together-ai": {
"name": "Together AI",
"description": "Collaborative AI inference with good performance",
"supports_120b": True,
"supports_20b": True,
"endpoint_suffix": "together-ai",
"speed": "Fast",
"recommended_for": ["development", "experimentation"],
"max_context_support": 128000 # Full context support
},
"replicate": {
"name": "Replicate",
"description": "Machine learning deployment platform",
"supports_120b": True,
"supports_20b": True,
"endpoint_suffix": "replicate",
"speed": "Medium",
"recommended_for": ["prototyping", "low-volume"],
"max_context_support": 128000 # Full context support
}
}
# Enhanced Model Configuration for GPT-OSS - Utilizing full context
MAX_TOKENS = 128000 # Full context length for GPT-OSS models
# Response token allocation - increased for longer responses
DEFAULT_MAX_RESPONSE_TOKENS = 16384 # Increased from 8192 for longer responses
MIN_RESPONSE_TOKENS = 4096 # Minimum response size
# Context management - optimized for full 128k usage
SYSTEM_PROMPT_RESERVE = 3000 # Reserve for system prompt (includes MCP tool descriptions)
MCP_TOOLS_RESERVE = 2000 # Additional reserve when MCP servers are enabled
# History management - much larger with 128k context
MAX_HISTORY_MESSAGES = 100 # Increased from 50 for better context retention
DEFAULT_HISTORY_MESSAGES = 50 # Default for good performance
# Reasoning configuration
DEFAULT_REASONING_EFFORT = "medium" # low, medium, high
# UI Configuration
GRADIO_THEME = "ocean"
DEBUG_MODE = True
# MCP Server recommendations
OPTIMAL_MCP_SERVER_COUNT = 6 # Recommended maximum for good performance
WARNING_MCP_SERVER_COUNT = 10 # Show warning if more than this
# File Support
SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg']
SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.opus', '.wma']
SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.m4v', '.wmv']
SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx', '.md', '.rtf', '.odt']
@classmethod
def get_available_models_for_provider(cls, provider_id: str) -> List[str]:
"""Get models available for a specific provider"""
if provider_id not in cls.INFERENCE_PROVIDERS:
return []
provider = cls.INFERENCE_PROVIDERS[provider_id]
available_models = []
for model_id, model_info in cls.AVAILABLE_MODELS.items():
if model_info["size"] == "120B" and provider["supports_120b"]:
available_models.append(model_id)
elif model_info["size"] == "20B" and provider["supports_20b"]:
available_models.append(model_id)
return available_models
@classmethod
def get_model_endpoint(cls, model_id: str, provider_id: str) -> str:
"""Get the full model endpoint for HF Inference Providers"""
if provider_id not in cls.INFERENCE_PROVIDERS:
raise ValueError(f"Unknown provider: {provider_id}")
provider = cls.INFERENCE_PROVIDERS[provider_id]
return f"{model_id}:{provider['endpoint_suffix']}"
@classmethod
def get_optimal_context_settings(cls, model_id: str, provider_id: str, mcp_servers_count: int = 0) -> Dict[str, int]:
"""Get optimal context settings for a model/provider combination"""
model_info = cls.AVAILABLE_MODELS.get(model_id, {})
provider_info = cls.INFERENCE_PROVIDERS.get(provider_id, {})
# Get the minimum of model and provider context support
model_context = model_info.get("context_length", 128000)
provider_context = provider_info.get("max_context_support", 128000)
context_length = min(model_context, provider_context)
# Calculate reserves based on MCP server count
system_reserve = cls.SYSTEM_PROMPT_RESERVE
if mcp_servers_count > 0:
# Add extra reserve for MCP tools (roughly 300 tokens per server for tool descriptions)
system_reserve += cls.MCP_TOOLS_RESERVE + (mcp_servers_count * 300)
# Dynamic response token allocation based on available context
if context_length >= 100000:
max_response_tokens = cls.DEFAULT_MAX_RESPONSE_TOKENS # 16384
elif context_length >= 50000:
max_response_tokens = 12288
elif context_length >= 20000:
max_response_tokens = 8192
else:
max_response_tokens = cls.MIN_RESPONSE_TOKENS # 4096
# Calculate available context for history
available_context = context_length - system_reserve - max_response_tokens
# Calculate recommended history limit
# Assume average message is ~200 tokens
avg_message_tokens = 200
recommended_history = min(
cls.MAX_HISTORY_MESSAGES,
available_context // avg_message_tokens
)
return {
"max_context": context_length,
"available_context": available_context,
"max_response_tokens": max_response_tokens,
"system_reserve": system_reserve,
"recommended_history_limit": max(10, recommended_history), # At least 10 messages
"context_utilization": f"{((system_reserve + max_response_tokens) / context_length * 100):.1f}% reserved"
}
@classmethod
def get_all_media_extensions(cls):
"""Get all supported media file extensions"""
return (cls.SUPPORTED_IMAGE_EXTENSIONS +
cls.SUPPORTED_AUDIO_EXTENSIONS +
cls.SUPPORTED_VIDEO_EXTENSIONS)
@classmethod
def is_image_file(cls, file_path: str) -> bool:
"""Check if file is an image"""
if not file_path:
return False
return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS)
@classmethod
def is_audio_file(cls, file_path: str) -> bool:
"""Check if file is an audio file"""
if not file_path:
return False
return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS)
@classmethod
def is_video_file(cls, file_path: str) -> bool:
"""Check if file is a video file"""
if not file_path:
return False
return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS)
@classmethod
def is_media_file(cls, file_path: str) -> bool:
"""Check if file is any supported media type"""
if not file_path:
return False
return any(ext in file_path.lower() for ext in cls.get_all_media_extensions())
@classmethod
def get_provider_recommendation(cls, use_case: str) -> List[str]:
"""Get recommended providers for specific use cases"""
recommendations = {
"production": ["cerebras", "fireworks-ai"],
"development": ["together-ai", "fireworks-ai"],
"experimentation": ["together-ai", "replicate"],
"high-throughput": ["cerebras"],
"cost-effective": ["together-ai", "replicate"],
"maximum-context": ["cerebras", "fireworks-ai"] # Providers with best context support
}
return recommendations.get(use_case, list(cls.INFERENCE_PROVIDERS.keys()))
# Check for dependencies
try:
import httpx
HTTPX_AVAILABLE = True
except ImportError:
HTTPX_AVAILABLE = False
logger.warning("httpx not available - file upload functionality limited")
try:
import huggingface_hub
HF_HUB_AVAILABLE = True
except ImportError:
HF_HUB_AVAILABLE = False
logger.warning("huggingface_hub not available - login functionality disabled")
# Enhanced CSS Configuration with better media display
CUSTOM_CSS = """
/* Hide Gradio footer */
footer {
display: none !important;
}
/* Make chatbot expand to fill available space */
.gradio-container {
height: 100vh !important;
}
/* Ensure proper flex layout */
.main-content {
display: flex;
flex-direction: column;
height: 100%;
}
/* Input area stays at bottom with minimal padding */
.input-area {
margin-top: auto;
padding-top: 0.25rem !important;
padding-bottom: 0 !important;
margin-bottom: 0 !important;
}
/* Reduce padding around chatbot */
.chatbot {
margin-bottom: 0 !important;
padding-bottom: 0 !important;
}
/* Provider and model selection styling */
.provider-model-selection {
padding: 10px;
border-radius: 8px;
margin-bottom: 10px;
border-left: 4px solid #007bff;
}
/* Login section styling */
.login-section {
padding: 10px;
border-radius: 8px;
margin-bottom: 10px;
border-left: 4px solid #4caf50;
}
/* Tool usage indicator */
.tool-usage {
background: #fff3cd;
border: 1px solid #ffeaa7;
border-radius: 4px;
padding: 8px;
margin: 4px 0;
}
/* Media display improvements */
.media-container {
max-width: 100%;
border-radius: 8px;
overflow: hidden;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
/* Enhanced audio player styling */
audio {
width: 100%;
max-width: 500px;
height: 54px;
border-radius: 27px;
outline: none;
margin: 10px 0;
}
/* Enhanced video player styling */
video {
width: 100%;
max-width: 700px;
height: auto;
object-fit: contain;
border-radius: 8px;
margin: 10px 0;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
}
/* Server status indicators */
.server-status {
display: inline-block;
padding: 2px 8px;
border-radius: 12px;
font-size: 12px;
font-weight: bold;
}
.server-status.online {
background: #d4edda;
color: #155724;
}
.server-status.offline {
background: #f8d7da;
color: #721c24;
}
/* Message metadata styling */
.message-metadata {
font-size: 0.85em;
color: #666;
margin-top: 4px;
padding: 4px 8px;
background: #f0f0f0;
border-radius: 4px;
}
"""