Spaces:

gradio
/

chat.gradio.app-HFIPs

Running

App Files Files Community

chat.gradio.app-HFIPs / config.py

ysharma HF Staff

Create config.py

7bab86d verified 5 days ago

raw

history blame contribute delete

12.8 kB

	"""
	Configuration module for Universal MCP Client - Enhanced for GPT-OSS models with full context support
	"""
	import os
	from dataclasses import dataclass
	from typing import Optional, Dict, List
	import logging

	# Set up enhanced logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	@dataclass
	class MCPServerConfig:
	"""Configuration for an MCP server connection"""
	name: str
	url: str
	description: str
	space_id: Optional[str] = None

	class AppConfig:
	"""Application configuration settings"""

	# HuggingFace Configuration
	HF_TOKEN = os.getenv("HF_TOKEN")

	# OpenAI GPT OSS Models with enhanced configurations
	AVAILABLE_MODELS = {
	"openai/gpt-oss-120b": {
	"name": "GPT OSS 120B",
	"description": "117B parameters, 5.1B active - Production use with reasoning",
	"size": "120B",
	"context_length": 128000, # Full 128k context length
	"supports_reasoning": True,
	"supports_tool_calling": True,
	"active_params": "5.1B"
	},
	"openai/gpt-oss-20b": {
	"name": "GPT OSS 20B",
	"description": "21B parameters, 3.6B active - Lower latency with reasoning",
	"size": "20B",
	"context_length": 128000, # Full 128k context length
	"supports_reasoning": True,
	"supports_tool_calling": True,
	"active_params": "3.6B"
	}
	}

	# Enhanced Inference Providers supporting GPT OSS models
	INFERENCE_PROVIDERS = {
	"cerebras": {
	"name": "Cerebras",
	"description": "World-record inference speeds (2-4k tokens/sec for GPT-OSS)",
	"supports_120b": True,
	"supports_20b": True,
	"endpoint_suffix": "cerebras",
	"speed": "Very Fast",
	"recommended_for": ["production", "high-throughput"],
	"max_context_support": 128000 # Full context support
	},
	"fireworks-ai": {
	"name": "Fireworks AI",
	"description": "Fast and reliable inference with excellent reliability",
	"supports_120b": True,
	"supports_20b": True,
	"endpoint_suffix": "fireworks-ai",
	"speed": "Fast",
	"recommended_for": ["production", "general-use"],
	"max_context_support": 128000 # Full context support
	},
	"together-ai": {
	"name": "Together AI",
	"description": "Collaborative AI inference with good performance",
	"supports_120b": True,
	"supports_20b": True,
	"endpoint_suffix": "together-ai",
	"speed": "Fast",
	"recommended_for": ["development", "experimentation"],
	"max_context_support": 128000 # Full context support
	},
	"replicate": {
	"name": "Replicate",
	"description": "Machine learning deployment platform",
	"supports_120b": True,
	"supports_20b": True,
	"endpoint_suffix": "replicate",
	"speed": "Medium",
	"recommended_for": ["prototyping", "low-volume"],
	"max_context_support": 128000 # Full context support
	}
	}

	# Enhanced Model Configuration for GPT-OSS - Utilizing full context
	MAX_TOKENS = 128000 # Full context length for GPT-OSS models

	# Response token allocation - increased for longer responses
	DEFAULT_MAX_RESPONSE_TOKENS = 16384 # Increased from 8192 for longer responses
	MIN_RESPONSE_TOKENS = 4096 # Minimum response size

	# Context management - optimized for full 128k usage
	SYSTEM_PROMPT_RESERVE = 3000 # Reserve for system prompt (includes MCP tool descriptions)
	MCP_TOOLS_RESERVE = 2000 # Additional reserve when MCP servers are enabled

	# History management - much larger with 128k context
	MAX_HISTORY_MESSAGES = 100 # Increased from 50 for better context retention
	DEFAULT_HISTORY_MESSAGES = 50 # Default for good performance

	# Reasoning configuration
	DEFAULT_REASONING_EFFORT = "medium" # low, medium, high

	# UI Configuration
	GRADIO_THEME = "ocean"
	DEBUG_MODE = True

	# MCP Server recommendations
	OPTIMAL_MCP_SERVER_COUNT = 6 # Recommended maximum for good performance
	WARNING_MCP_SERVER_COUNT = 10 # Show warning if more than this

	# File Support
	SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg']
	SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.opus', '.wma']
	SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.m4v', '.wmv']
	SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx', '.md', '.rtf', '.odt']

	@classmethod
	def get_available_models_for_provider(cls, provider_id: str) -> List[str]:
	"""Get models available for a specific provider"""
	if provider_id not in cls.INFERENCE_PROVIDERS:
	return []

	provider = cls.INFERENCE_PROVIDERS[provider_id]
	available_models = []

	for model_id, model_info in cls.AVAILABLE_MODELS.items():
	if model_info["size"] == "120B" and provider["supports_120b"]:
	available_models.append(model_id)
	elif model_info["size"] == "20B" and provider["supports_20b"]:
	available_models.append(model_id)

	return available_models

	@classmethod
	def get_model_endpoint(cls, model_id: str, provider_id: str) -> str:
	"""Get the full model endpoint for HF Inference Providers"""
	if provider_id not in cls.INFERENCE_PROVIDERS:
	raise ValueError(f"Unknown provider: {provider_id}")

	provider = cls.INFERENCE_PROVIDERS[provider_id]
	return f"{model_id}:{provider['endpoint_suffix']}"

	@classmethod
	def get_optimal_context_settings(cls, model_id: str, provider_id: str, mcp_servers_count: int = 0) -> Dict[str, int]:
	"""Get optimal context settings for a model/provider combination"""
	model_info = cls.AVAILABLE_MODELS.get(model_id, {})
	provider_info = cls.INFERENCE_PROVIDERS.get(provider_id, {})

	# Get the minimum of model and provider context support
	model_context = model_info.get("context_length", 128000)
	provider_context = provider_info.get("max_context_support", 128000)
	context_length = min(model_context, provider_context)

	# Calculate reserves based on MCP server count
	system_reserve = cls.SYSTEM_PROMPT_RESERVE
	if mcp_servers_count > 0:
	# Add extra reserve for MCP tools (roughly 300 tokens per server for tool descriptions)
	system_reserve += cls.MCP_TOOLS_RESERVE + (mcp_servers_count * 300)

	# Dynamic response token allocation based on available context
	if context_length >= 100000:
	max_response_tokens = cls.DEFAULT_MAX_RESPONSE_TOKENS # 16384
	elif context_length >= 50000:
	max_response_tokens = 12288
	elif context_length >= 20000:
	max_response_tokens = 8192
	else:
	max_response_tokens = cls.MIN_RESPONSE_TOKENS # 4096

	# Calculate available context for history
	available_context = context_length - system_reserve - max_response_tokens

	# Calculate recommended history limit
	# Assume average message is ~200 tokens
	avg_message_tokens = 200
	recommended_history = min(
	cls.MAX_HISTORY_MESSAGES,
	available_context // avg_message_tokens
	)

	return {
	"max_context": context_length,
	"available_context": available_context,
	"max_response_tokens": max_response_tokens,
	"system_reserve": system_reserve,
	"recommended_history_limit": max(10, recommended_history), # At least 10 messages
	"context_utilization": f"{((system_reserve + max_response_tokens) / context_length * 100):.1f}% reserved"
	}

	@classmethod
	def get_all_media_extensions(cls):
	"""Get all supported media file extensions"""
	return (cls.SUPPORTED_IMAGE_EXTENSIONS +
	cls.SUPPORTED_AUDIO_EXTENSIONS +
	cls.SUPPORTED_VIDEO_EXTENSIONS)

	@classmethod
	def is_image_file(cls, file_path: str) -> bool:
	"""Check if file is an image"""
	if not file_path:
	return False
	return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS)

	@classmethod
	def is_audio_file(cls, file_path: str) -> bool:
	"""Check if file is an audio file"""
	if not file_path:
	return False
	return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS)

	@classmethod
	def is_video_file(cls, file_path: str) -> bool:
	"""Check if file is a video file"""
	if not file_path:
	return False
	return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS)

	@classmethod
	def is_media_file(cls, file_path: str) -> bool:
	"""Check if file is any supported media type"""
	if not file_path:
	return False
	return any(ext in file_path.lower() for ext in cls.get_all_media_extensions())

	@classmethod
	def get_provider_recommendation(cls, use_case: str) -> List[str]:
	"""Get recommended providers for specific use cases"""
	recommendations = {
	"production": ["cerebras", "fireworks-ai"],
	"development": ["together-ai", "fireworks-ai"],
	"experimentation": ["together-ai", "replicate"],
	"high-throughput": ["cerebras"],
	"cost-effective": ["together-ai", "replicate"],
	"maximum-context": ["cerebras", "fireworks-ai"] # Providers with best context support
	}
	return recommendations.get(use_case, list(cls.INFERENCE_PROVIDERS.keys()))

	# Check for dependencies
	try:
	import httpx
	HTTPX_AVAILABLE = True
	except ImportError:
	HTTPX_AVAILABLE = False
	logger.warning("httpx not available - file upload functionality limited")

	try:
	import huggingface_hub
	HF_HUB_AVAILABLE = True
	except ImportError:
	HF_HUB_AVAILABLE = False
	logger.warning("huggingface_hub not available - login functionality disabled")

	# Enhanced CSS Configuration with better media display
	CUSTOM_CSS = """
	/* Hide Gradio footer */
	footer {
	display: none !important;
	}
	/* Make chatbot expand to fill available space */
	.gradio-container {
	height: 100vh !important;
	}
	/* Ensure proper flex layout */
	.main-content {
	display: flex;
	flex-direction: column;
	height: 100%;
	}
	/* Input area stays at bottom with minimal padding */
	.input-area {
	margin-top: auto;
	padding-top: 0.25rem !important;
	padding-bottom: 0 !important;
	margin-bottom: 0 !important;
	}
	/* Reduce padding around chatbot */
	.chatbot {
	margin-bottom: 0 !important;
	padding-bottom: 0 !important;
	}
	/* Provider and model selection styling */
	.provider-model-selection {
	padding: 10px;
	border-radius: 8px;
	margin-bottom: 10px;
	border-left: 4px solid #007bff;
	}
	/* Login section styling */
	.login-section {
	padding: 10px;
	border-radius: 8px;
	margin-bottom: 10px;
	border-left: 4px solid #4caf50;
	}
	/* Tool usage indicator */
	.tool-usage {
	background: #fff3cd;
	border: 1px solid #ffeaa7;
	border-radius: 4px;
	padding: 8px;
	margin: 4px 0;
	}
	/* Media display improvements */
	.media-container {
	max-width: 100%;
	border-radius: 8px;
	overflow: hidden;
	box-shadow: 0 2px 8px rgba(0,0,0,0.1);
	}
	/* Enhanced audio player styling */
	audio {
	width: 100%;
	max-width: 500px;
	height: 54px;
	border-radius: 27px;
	outline: none;
	margin: 10px 0;
	}
	/* Enhanced video player styling */
	video {
	width: 100%;
	max-width: 700px;
	height: auto;
	object-fit: contain;
	border-radius: 8px;
	margin: 10px 0;
	box-shadow: 0 4px 6px rgba(0,0,0,0.1);
	}
	/* Server status indicators */
	.server-status {
	display: inline-block;
	padding: 2px 8px;
	border-radius: 12px;
	font-size: 12px;
	font-weight: bold;
	}
	.server-status.online {
	background: #d4edda;
	color: #155724;
	}
	.server-status.offline {
	background: #f8d7da;
	color: #721c24;
	}
	/* Message metadata styling */
	.message-metadata {
	font-size: 0.85em;
	color: #666;
	margin-top: 4px;
	padding: 4px 8px;
	background: #f0f0f0;
	border-radius: 4px;
	}
	"""