Spaces:
Running
Running
""" | |
Configuration module for Universal MCP Client - Enhanced for GPT-OSS models with full context support | |
""" | |
import os | |
from dataclasses import dataclass | |
from typing import Optional, Dict, List | |
import logging | |
# Set up enhanced logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
logger = logging.getLogger(__name__) | |
class MCPServerConfig: | |
"""Configuration for an MCP server connection""" | |
name: str | |
url: str | |
description: str | |
space_id: Optional[str] = None | |
class AppConfig: | |
"""Application configuration settings""" | |
# HuggingFace Configuration | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
# OpenAI GPT OSS Models with enhanced configurations | |
AVAILABLE_MODELS = { | |
"openai/gpt-oss-120b": { | |
"name": "GPT OSS 120B", | |
"description": "117B parameters, 5.1B active - Production use with reasoning", | |
"size": "120B", | |
"context_length": 128000, # Full 128k context length | |
"supports_reasoning": True, | |
"supports_tool_calling": True, | |
"active_params": "5.1B" | |
}, | |
"openai/gpt-oss-20b": { | |
"name": "GPT OSS 20B", | |
"description": "21B parameters, 3.6B active - Lower latency with reasoning", | |
"size": "20B", | |
"context_length": 128000, # Full 128k context length | |
"supports_reasoning": True, | |
"supports_tool_calling": True, | |
"active_params": "3.6B" | |
} | |
} | |
# Enhanced Inference Providers supporting GPT OSS models | |
INFERENCE_PROVIDERS = { | |
"cerebras": { | |
"name": "Cerebras", | |
"description": "World-record inference speeds (2-4k tokens/sec for GPT-OSS)", | |
"supports_120b": True, | |
"supports_20b": True, | |
"endpoint_suffix": "cerebras", | |
"speed": "Very Fast", | |
"recommended_for": ["production", "high-throughput"], | |
"max_context_support": 128000 # Full context support | |
}, | |
"fireworks-ai": { | |
"name": "Fireworks AI", | |
"description": "Fast and reliable inference with excellent reliability", | |
"supports_120b": True, | |
"supports_20b": True, | |
"endpoint_suffix": "fireworks-ai", | |
"speed": "Fast", | |
"recommended_for": ["production", "general-use"], | |
"max_context_support": 128000 # Full context support | |
}, | |
"together-ai": { | |
"name": "Together AI", | |
"description": "Collaborative AI inference with good performance", | |
"supports_120b": True, | |
"supports_20b": True, | |
"endpoint_suffix": "together-ai", | |
"speed": "Fast", | |
"recommended_for": ["development", "experimentation"], | |
"max_context_support": 128000 # Full context support | |
}, | |
"replicate": { | |
"name": "Replicate", | |
"description": "Machine learning deployment platform", | |
"supports_120b": True, | |
"supports_20b": True, | |
"endpoint_suffix": "replicate", | |
"speed": "Medium", | |
"recommended_for": ["prototyping", "low-volume"], | |
"max_context_support": 128000 # Full context support | |
} | |
} | |
# Enhanced Model Configuration for GPT-OSS - Utilizing full context | |
MAX_TOKENS = 128000 # Full context length for GPT-OSS models | |
# Response token allocation - increased for longer responses | |
DEFAULT_MAX_RESPONSE_TOKENS = 16384 # Increased from 8192 for longer responses | |
MIN_RESPONSE_TOKENS = 4096 # Minimum response size | |
# Context management - optimized for full 128k usage | |
SYSTEM_PROMPT_RESERVE = 3000 # Reserve for system prompt (includes MCP tool descriptions) | |
MCP_TOOLS_RESERVE = 2000 # Additional reserve when MCP servers are enabled | |
# History management - much larger with 128k context | |
MAX_HISTORY_MESSAGES = 100 # Increased from 50 for better context retention | |
DEFAULT_HISTORY_MESSAGES = 50 # Default for good performance | |
# Reasoning configuration | |
DEFAULT_REASONING_EFFORT = "medium" # low, medium, high | |
# UI Configuration | |
GRADIO_THEME = "ocean" | |
DEBUG_MODE = True | |
# MCP Server recommendations | |
OPTIMAL_MCP_SERVER_COUNT = 6 # Recommended maximum for good performance | |
WARNING_MCP_SERVER_COUNT = 10 # Show warning if more than this | |
# File Support | |
SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg'] | |
SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.opus', '.wma'] | |
SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.m4v', '.wmv'] | |
SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx', '.md', '.rtf', '.odt'] | |
def get_available_models_for_provider(cls, provider_id: str) -> List[str]: | |
"""Get models available for a specific provider""" | |
if provider_id not in cls.INFERENCE_PROVIDERS: | |
return [] | |
provider = cls.INFERENCE_PROVIDERS[provider_id] | |
available_models = [] | |
for model_id, model_info in cls.AVAILABLE_MODELS.items(): | |
if model_info["size"] == "120B" and provider["supports_120b"]: | |
available_models.append(model_id) | |
elif model_info["size"] == "20B" and provider["supports_20b"]: | |
available_models.append(model_id) | |
return available_models | |
def get_model_endpoint(cls, model_id: str, provider_id: str) -> str: | |
"""Get the full model endpoint for HF Inference Providers""" | |
if provider_id not in cls.INFERENCE_PROVIDERS: | |
raise ValueError(f"Unknown provider: {provider_id}") | |
provider = cls.INFERENCE_PROVIDERS[provider_id] | |
return f"{model_id}:{provider['endpoint_suffix']}" | |
def get_optimal_context_settings(cls, model_id: str, provider_id: str, mcp_servers_count: int = 0) -> Dict[str, int]: | |
"""Get optimal context settings for a model/provider combination""" | |
model_info = cls.AVAILABLE_MODELS.get(model_id, {}) | |
provider_info = cls.INFERENCE_PROVIDERS.get(provider_id, {}) | |
# Get the minimum of model and provider context support | |
model_context = model_info.get("context_length", 128000) | |
provider_context = provider_info.get("max_context_support", 128000) | |
context_length = min(model_context, provider_context) | |
# Calculate reserves based on MCP server count | |
system_reserve = cls.SYSTEM_PROMPT_RESERVE | |
if mcp_servers_count > 0: | |
# Add extra reserve for MCP tools (roughly 300 tokens per server for tool descriptions) | |
system_reserve += cls.MCP_TOOLS_RESERVE + (mcp_servers_count * 300) | |
# Dynamic response token allocation based on available context | |
if context_length >= 100000: | |
max_response_tokens = cls.DEFAULT_MAX_RESPONSE_TOKENS # 16384 | |
elif context_length >= 50000: | |
max_response_tokens = 12288 | |
elif context_length >= 20000: | |
max_response_tokens = 8192 | |
else: | |
max_response_tokens = cls.MIN_RESPONSE_TOKENS # 4096 | |
# Calculate available context for history | |
available_context = context_length - system_reserve - max_response_tokens | |
# Calculate recommended history limit | |
# Assume average message is ~200 tokens | |
avg_message_tokens = 200 | |
recommended_history = min( | |
cls.MAX_HISTORY_MESSAGES, | |
available_context // avg_message_tokens | |
) | |
return { | |
"max_context": context_length, | |
"available_context": available_context, | |
"max_response_tokens": max_response_tokens, | |
"system_reserve": system_reserve, | |
"recommended_history_limit": max(10, recommended_history), # At least 10 messages | |
"context_utilization": f"{((system_reserve + max_response_tokens) / context_length * 100):.1f}% reserved" | |
} | |
def get_all_media_extensions(cls): | |
"""Get all supported media file extensions""" | |
return (cls.SUPPORTED_IMAGE_EXTENSIONS + | |
cls.SUPPORTED_AUDIO_EXTENSIONS + | |
cls.SUPPORTED_VIDEO_EXTENSIONS) | |
def is_image_file(cls, file_path: str) -> bool: | |
"""Check if file is an image""" | |
if not file_path: | |
return False | |
return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS) | |
def is_audio_file(cls, file_path: str) -> bool: | |
"""Check if file is an audio file""" | |
if not file_path: | |
return False | |
return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS) | |
def is_video_file(cls, file_path: str) -> bool: | |
"""Check if file is a video file""" | |
if not file_path: | |
return False | |
return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS) | |
def is_media_file(cls, file_path: str) -> bool: | |
"""Check if file is any supported media type""" | |
if not file_path: | |
return False | |
return any(ext in file_path.lower() for ext in cls.get_all_media_extensions()) | |
def get_provider_recommendation(cls, use_case: str) -> List[str]: | |
"""Get recommended providers for specific use cases""" | |
recommendations = { | |
"production": ["cerebras", "fireworks-ai"], | |
"development": ["together-ai", "fireworks-ai"], | |
"experimentation": ["together-ai", "replicate"], | |
"high-throughput": ["cerebras"], | |
"cost-effective": ["together-ai", "replicate"], | |
"maximum-context": ["cerebras", "fireworks-ai"] # Providers with best context support | |
} | |
return recommendations.get(use_case, list(cls.INFERENCE_PROVIDERS.keys())) | |
# Check for dependencies | |
try: | |
import httpx | |
HTTPX_AVAILABLE = True | |
except ImportError: | |
HTTPX_AVAILABLE = False | |
logger.warning("httpx not available - file upload functionality limited") | |
try: | |
import huggingface_hub | |
HF_HUB_AVAILABLE = True | |
except ImportError: | |
HF_HUB_AVAILABLE = False | |
logger.warning("huggingface_hub not available - login functionality disabled") | |
# Enhanced CSS Configuration with better media display | |
CUSTOM_CSS = """ | |
/* Hide Gradio footer */ | |
footer { | |
display: none !important; | |
} | |
/* Make chatbot expand to fill available space */ | |
.gradio-container { | |
height: 100vh !important; | |
} | |
/* Ensure proper flex layout */ | |
.main-content { | |
display: flex; | |
flex-direction: column; | |
height: 100%; | |
} | |
/* Input area stays at bottom with minimal padding */ | |
.input-area { | |
margin-top: auto; | |
padding-top: 0.25rem !important; | |
padding-bottom: 0 !important; | |
margin-bottom: 0 !important; | |
} | |
/* Reduce padding around chatbot */ | |
.chatbot { | |
margin-bottom: 0 !important; | |
padding-bottom: 0 !important; | |
} | |
/* Provider and model selection styling */ | |
.provider-model-selection { | |
padding: 10px; | |
border-radius: 8px; | |
margin-bottom: 10px; | |
border-left: 4px solid #007bff; | |
} | |
/* Login section styling */ | |
.login-section { | |
padding: 10px; | |
border-radius: 8px; | |
margin-bottom: 10px; | |
border-left: 4px solid #4caf50; | |
} | |
/* Tool usage indicator */ | |
.tool-usage { | |
background: #fff3cd; | |
border: 1px solid #ffeaa7; | |
border-radius: 4px; | |
padding: 8px; | |
margin: 4px 0; | |
} | |
/* Media display improvements */ | |
.media-container { | |
max-width: 100%; | |
border-radius: 8px; | |
overflow: hidden; | |
box-shadow: 0 2px 8px rgba(0,0,0,0.1); | |
} | |
/* Enhanced audio player styling */ | |
audio { | |
width: 100%; | |
max-width: 500px; | |
height: 54px; | |
border-radius: 27px; | |
outline: none; | |
margin: 10px 0; | |
} | |
/* Enhanced video player styling */ | |
video { | |
width: 100%; | |
max-width: 700px; | |
height: auto; | |
object-fit: contain; | |
border-radius: 8px; | |
margin: 10px 0; | |
box-shadow: 0 4px 6px rgba(0,0,0,0.1); | |
} | |
/* Server status indicators */ | |
.server-status { | |
display: inline-block; | |
padding: 2px 8px; | |
border-radius: 12px; | |
font-size: 12px; | |
font-weight: bold; | |
} | |
.server-status.online { | |
background: #d4edda; | |
color: #155724; | |
} | |
.server-status.offline { | |
background: #f8d7da; | |
color: #721c24; | |
} | |
/* Message metadata styling */ | |
.message-metadata { | |
font-size: 0.85em; | |
color: #666; | |
margin-top: 4px; | |
padding: 4px 8px; | |
background: #f0f0f0; | |
border-radius: 4px; | |
} | |
""" | |