Spaces:

gradio
/

chat.gradio.app-HFIPs

Running

chat.gradio.app-HFIPs

File size: 12,785 Bytes

7bab86d

"""
Configuration module for Universal MCP Client - Enhanced for GPT-OSS models with full context support
"""
import os
from dataclasses import dataclass
from typing import Optional, Dict, List
import logging

# Set up enhanced logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

@dataclass
class MCPServerConfig:
    """Configuration for an MCP server connection"""
    name: str
    url: str
    description: str
    space_id: Optional[str] = None

class AppConfig:
    """Application configuration settings"""
    
    # HuggingFace Configuration
    HF_TOKEN = os.getenv("HF_TOKEN")
    
    # OpenAI GPT OSS Models with enhanced configurations
    AVAILABLE_MODELS = {
        "openai/gpt-oss-120b": {
            "name": "GPT OSS 120B",
            "description": "117B parameters, 5.1B active - Production use with reasoning",
            "size": "120B",
            "context_length": 128000,  # Full 128k context length
            "supports_reasoning": True,
            "supports_tool_calling": True,
            "active_params": "5.1B"
        },
        "openai/gpt-oss-20b": {
            "name": "GPT OSS 20B", 
            "description": "21B parameters, 3.6B active - Lower latency with reasoning",
            "size": "20B",
            "context_length": 128000,  # Full 128k context length
            "supports_reasoning": True,
            "supports_tool_calling": True,
            "active_params": "3.6B"
        }
    }
    
    # Enhanced Inference Providers supporting GPT OSS models
    INFERENCE_PROVIDERS = {
        "cerebras": {
            "name": "Cerebras",
            "description": "World-record inference speeds (2-4k tokens/sec for GPT-OSS)",
            "supports_120b": True,
            "supports_20b": True,
            "endpoint_suffix": "cerebras",
            "speed": "Very Fast",
            "recommended_for": ["production", "high-throughput"],
            "max_context_support": 128000  # Full context support
        },
        "fireworks-ai": {
            "name": "Fireworks AI",
            "description": "Fast and reliable inference with excellent reliability",
            "supports_120b": True,
            "supports_20b": True,
            "endpoint_suffix": "fireworks-ai",
            "speed": "Fast",
            "recommended_for": ["production", "general-use"],
            "max_context_support": 128000  # Full context support
        },
        "together-ai": {
            "name": "Together AI",
            "description": "Collaborative AI inference with good performance",
            "supports_120b": True,
            "supports_20b": True,
            "endpoint_suffix": "together-ai",
            "speed": "Fast",
            "recommended_for": ["development", "experimentation"],
            "max_context_support": 128000  # Full context support
        },
        "replicate": {
            "name": "Replicate",
            "description": "Machine learning deployment platform",
            "supports_120b": True,
            "supports_20b": True,
            "endpoint_suffix": "replicate",
            "speed": "Medium",
            "recommended_for": ["prototyping", "low-volume"],
            "max_context_support": 128000  # Full context support
        }
    }
    
    # Enhanced Model Configuration for GPT-OSS - Utilizing full context
    MAX_TOKENS = 128000  # Full context length for GPT-OSS models
    
    # Response token allocation - increased for longer responses
    DEFAULT_MAX_RESPONSE_TOKENS = 16384  # Increased from 8192 for longer responses
    MIN_RESPONSE_TOKENS = 4096  # Minimum response size
    
    # Context management - optimized for full 128k usage
    SYSTEM_PROMPT_RESERVE = 3000  # Reserve for system prompt (includes MCP tool descriptions)
    MCP_TOOLS_RESERVE = 2000  # Additional reserve when MCP servers are enabled
    
    # History management - much larger with 128k context
    MAX_HISTORY_MESSAGES = 100  # Increased from 50 for better context retention
    DEFAULT_HISTORY_MESSAGES = 50  # Default for good performance
    
    # Reasoning configuration
    DEFAULT_REASONING_EFFORT = "medium"  # low, medium, high
    
    # UI Configuration
    GRADIO_THEME = "ocean"
    DEBUG_MODE = True
    
    # MCP Server recommendations
    OPTIMAL_MCP_SERVER_COUNT = 6  # Recommended maximum for good performance
    WARNING_MCP_SERVER_COUNT = 10  # Show warning if more than this
    
    # File Support
    SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg']
    SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.opus', '.wma']
    SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.m4v', '.wmv']
    SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx', '.md', '.rtf', '.odt']
    
    @classmethod
    def get_available_models_for_provider(cls, provider_id: str) -> List[str]:
        """Get models available for a specific provider"""
        if provider_id not in cls.INFERENCE_PROVIDERS:
            return []
        
        provider = cls.INFERENCE_PROVIDERS[provider_id]
        available_models = []
        
        for model_id, model_info in cls.AVAILABLE_MODELS.items():
            if model_info["size"] == "120B" and provider["supports_120b"]:
                available_models.append(model_id)
            elif model_info["size"] == "20B" and provider["supports_20b"]:
                available_models.append(model_id)
                
        return available_models
    
    @classmethod
    def get_model_endpoint(cls, model_id: str, provider_id: str) -> str:
        """Get the full model endpoint for HF Inference Providers"""
        if provider_id not in cls.INFERENCE_PROVIDERS:
            raise ValueError(f"Unknown provider: {provider_id}")
        
        provider = cls.INFERENCE_PROVIDERS[provider_id]
        return f"{model_id}:{provider['endpoint_suffix']}"
    
    @classmethod
    def get_optimal_context_settings(cls, model_id: str, provider_id: str, mcp_servers_count: int = 0) -> Dict[str, int]:
        """Get optimal context settings for a model/provider combination"""
        model_info = cls.AVAILABLE_MODELS.get(model_id, {})
        provider_info = cls.INFERENCE_PROVIDERS.get(provider_id, {})
        
        # Get the minimum of model and provider context support
        model_context = model_info.get("context_length", 128000)
        provider_context = provider_info.get("max_context_support", 128000)
        context_length = min(model_context, provider_context)
        
        # Calculate reserves based on MCP server count
        system_reserve = cls.SYSTEM_PROMPT_RESERVE
        if mcp_servers_count > 0:
            # Add extra reserve for MCP tools (roughly 300 tokens per server for tool descriptions)
            system_reserve += cls.MCP_TOOLS_RESERVE + (mcp_servers_count * 300)
        
        # Dynamic response token allocation based on available context
        if context_length >= 100000:
            max_response_tokens = cls.DEFAULT_MAX_RESPONSE_TOKENS  # 16384
        elif context_length >= 50000:
            max_response_tokens = 12288
        elif context_length >= 20000:
            max_response_tokens = 8192
        else:
            max_response_tokens = cls.MIN_RESPONSE_TOKENS  # 4096
        
        # Calculate available context for history
        available_context = context_length - system_reserve - max_response_tokens
        
        # Calculate recommended history limit
        # Assume average message is ~200 tokens
        avg_message_tokens = 200
        recommended_history = min(
            cls.MAX_HISTORY_MESSAGES,
            available_context // avg_message_tokens
        )
        
        return {
            "max_context": context_length,
            "available_context": available_context,
            "max_response_tokens": max_response_tokens,
            "system_reserve": system_reserve,
            "recommended_history_limit": max(10, recommended_history),  # At least 10 messages
            "context_utilization": f"{((system_reserve + max_response_tokens) / context_length * 100):.1f}% reserved"
        }
    
    @classmethod
    def get_all_media_extensions(cls):
        """Get all supported media file extensions"""
        return (cls.SUPPORTED_IMAGE_EXTENSIONS + 
                cls.SUPPORTED_AUDIO_EXTENSIONS + 
                cls.SUPPORTED_VIDEO_EXTENSIONS)
    
    @classmethod
    def is_image_file(cls, file_path: str) -> bool:
        """Check if file is an image"""
        if not file_path:
            return False
        return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS)
    
    @classmethod
    def is_audio_file(cls, file_path: str) -> bool:
        """Check if file is an audio file"""
        if not file_path:
            return False
        return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS)
    
    @classmethod
    def is_video_file(cls, file_path: str) -> bool:
        """Check if file is a video file"""
        if not file_path:
            return False
        return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS)
    
    @classmethod
    def is_media_file(cls, file_path: str) -> bool:
        """Check if file is any supported media type"""
        if not file_path:
            return False
        return any(ext in file_path.lower() for ext in cls.get_all_media_extensions())
    
    @classmethod
    def get_provider_recommendation(cls, use_case: str) -> List[str]:
        """Get recommended providers for specific use cases"""
        recommendations = {
            "production": ["cerebras", "fireworks-ai"],
            "development": ["together-ai", "fireworks-ai"],
            "experimentation": ["together-ai", "replicate"],
            "high-throughput": ["cerebras"],
            "cost-effective": ["together-ai", "replicate"],
            "maximum-context": ["cerebras", "fireworks-ai"]  # Providers with best context support
        }
        return recommendations.get(use_case, list(cls.INFERENCE_PROVIDERS.keys()))

# Check for dependencies
try:
    import httpx
    HTTPX_AVAILABLE = True
except ImportError:
    HTTPX_AVAILABLE = False
    logger.warning("httpx not available - file upload functionality limited")

try:
    import huggingface_hub
    HF_HUB_AVAILABLE = True
except ImportError:
    HF_HUB_AVAILABLE = False
    logger.warning("huggingface_hub not available - login functionality disabled")

# Enhanced CSS Configuration with better media display
CUSTOM_CSS = """
/* Hide Gradio footer */
footer {
    display: none !important;
}
/* Make chatbot expand to fill available space */
.gradio-container {
    height: 100vh !important;
}
/* Ensure proper flex layout */
.main-content {
    display: flex;
    flex-direction: column;
    height: 100%;
}
/* Input area stays at bottom with minimal padding */
.input-area {
    margin-top: auto;
    padding-top: 0.25rem !important;
    padding-bottom: 0 !important;
    margin-bottom: 0 !important;
}
/* Reduce padding around chatbot */
.chatbot {
    margin-bottom: 0 !important;
    padding-bottom: 0 !important;
}
/* Provider and model selection styling */
.provider-model-selection {
    padding: 10px;
    border-radius: 8px;
    margin-bottom: 10px;
    border-left: 4px solid #007bff;
}
/* Login section styling */
.login-section {
    padding: 10px;
    border-radius: 8px;
    margin-bottom: 10px;
    border-left: 4px solid #4caf50;
}
/* Tool usage indicator */
.tool-usage {
    background: #fff3cd;
    border: 1px solid #ffeaa7;
    border-radius: 4px;
    padding: 8px;
    margin: 4px 0;
}
/* Media display improvements */
.media-container {
    max-width: 100%;
    border-radius: 8px;
    overflow: hidden;
    box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
/* Enhanced audio player styling */
audio {
    width: 100%;
    max-width: 500px;
    height: 54px;
    border-radius: 27px;
    outline: none;
    margin: 10px 0;
}
/* Enhanced video player styling */
video {
    width: 100%;
    max-width: 700px;
    height: auto;
    object-fit: contain;
    border-radius: 8px;
    margin: 10px 0;
    box-shadow: 0 4px 6px rgba(0,0,0,0.1);
}
/* Server status indicators */
.server-status {
    display: inline-block;
    padding: 2px 8px;
    border-radius: 12px;
    font-size: 12px;
    font-weight: bold;
}
.server-status.online {
    background: #d4edda;
    color: #155724;
}
.server-status.offline {
    background: #f8d7da;
    color: #721c24;
}
/* Message metadata styling */
.message-metadata {
    font-size: 0.85em;
    color: #666;
    margin-top: 4px;
    padding: 4px 8px;
    background: #f0f0f0;
    border-radius: 4px;
}
"""