File size: 12,785 Bytes
7bab86d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
"""
Configuration module for Universal MCP Client - Enhanced for GPT-OSS models with full context support
"""
import os
from dataclasses import dataclass
from typing import Optional, Dict, List
import logging

# Set up enhanced logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

@dataclass
class MCPServerConfig:
    """Configuration for an MCP server connection"""
    name: str
    url: str
    description: str
    space_id: Optional[str] = None

class AppConfig:
    """Application configuration settings"""
    
    # HuggingFace Configuration
    HF_TOKEN = os.getenv("HF_TOKEN")
    
    # OpenAI GPT OSS Models with enhanced configurations
    AVAILABLE_MODELS = {
        "openai/gpt-oss-120b": {
            "name": "GPT OSS 120B",
            "description": "117B parameters, 5.1B active - Production use with reasoning",
            "size": "120B",
            "context_length": 128000,  # Full 128k context length
            "supports_reasoning": True,
            "supports_tool_calling": True,
            "active_params": "5.1B"
        },
        "openai/gpt-oss-20b": {
            "name": "GPT OSS 20B", 
            "description": "21B parameters, 3.6B active - Lower latency with reasoning",
            "size": "20B",
            "context_length": 128000,  # Full 128k context length
            "supports_reasoning": True,
            "supports_tool_calling": True,
            "active_params": "3.6B"
        }
    }
    
    # Enhanced Inference Providers supporting GPT OSS models
    INFERENCE_PROVIDERS = {
        "cerebras": {
            "name": "Cerebras",
            "description": "World-record inference speeds (2-4k tokens/sec for GPT-OSS)",
            "supports_120b": True,
            "supports_20b": True,
            "endpoint_suffix": "cerebras",
            "speed": "Very Fast",
            "recommended_for": ["production", "high-throughput"],
            "max_context_support": 128000  # Full context support
        },
        "fireworks-ai": {
            "name": "Fireworks AI",
            "description": "Fast and reliable inference with excellent reliability",
            "supports_120b": True,
            "supports_20b": True,
            "endpoint_suffix": "fireworks-ai",
            "speed": "Fast",
            "recommended_for": ["production", "general-use"],
            "max_context_support": 128000  # Full context support
        },
        "together-ai": {
            "name": "Together AI",
            "description": "Collaborative AI inference with good performance",
            "supports_120b": True,
            "supports_20b": True,
            "endpoint_suffix": "together-ai",
            "speed": "Fast",
            "recommended_for": ["development", "experimentation"],
            "max_context_support": 128000  # Full context support
        },
        "replicate": {
            "name": "Replicate",
            "description": "Machine learning deployment platform",
            "supports_120b": True,
            "supports_20b": True,
            "endpoint_suffix": "replicate",
            "speed": "Medium",
            "recommended_for": ["prototyping", "low-volume"],
            "max_context_support": 128000  # Full context support
        }
    }
    
    # Enhanced Model Configuration for GPT-OSS - Utilizing full context
    MAX_TOKENS = 128000  # Full context length for GPT-OSS models
    
    # Response token allocation - increased for longer responses
    DEFAULT_MAX_RESPONSE_TOKENS = 16384  # Increased from 8192 for longer responses
    MIN_RESPONSE_TOKENS = 4096  # Minimum response size
    
    # Context management - optimized for full 128k usage
    SYSTEM_PROMPT_RESERVE = 3000  # Reserve for system prompt (includes MCP tool descriptions)
    MCP_TOOLS_RESERVE = 2000  # Additional reserve when MCP servers are enabled
    
    # History management - much larger with 128k context
    MAX_HISTORY_MESSAGES = 100  # Increased from 50 for better context retention
    DEFAULT_HISTORY_MESSAGES = 50  # Default for good performance
    
    # Reasoning configuration
    DEFAULT_REASONING_EFFORT = "medium"  # low, medium, high
    
    # UI Configuration
    GRADIO_THEME = "ocean"
    DEBUG_MODE = True
    
    # MCP Server recommendations
    OPTIMAL_MCP_SERVER_COUNT = 6  # Recommended maximum for good performance
    WARNING_MCP_SERVER_COUNT = 10  # Show warning if more than this
    
    # File Support
    SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg']
    SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.opus', '.wma']
    SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.m4v', '.wmv']
    SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx', '.md', '.rtf', '.odt']
    
    @classmethod
    def get_available_models_for_provider(cls, provider_id: str) -> List[str]:
        """Get models available for a specific provider"""
        if provider_id not in cls.INFERENCE_PROVIDERS:
            return []
        
        provider = cls.INFERENCE_PROVIDERS[provider_id]
        available_models = []
        
        for model_id, model_info in cls.AVAILABLE_MODELS.items():
            if model_info["size"] == "120B" and provider["supports_120b"]:
                available_models.append(model_id)
            elif model_info["size"] == "20B" and provider["supports_20b"]:
                available_models.append(model_id)
                
        return available_models
    
    @classmethod
    def get_model_endpoint(cls, model_id: str, provider_id: str) -> str:
        """Get the full model endpoint for HF Inference Providers"""
        if provider_id not in cls.INFERENCE_PROVIDERS:
            raise ValueError(f"Unknown provider: {provider_id}")
        
        provider = cls.INFERENCE_PROVIDERS[provider_id]
        return f"{model_id}:{provider['endpoint_suffix']}"
    
    @classmethod
    def get_optimal_context_settings(cls, model_id: str, provider_id: str, mcp_servers_count: int = 0) -> Dict[str, int]:
        """Get optimal context settings for a model/provider combination"""
        model_info = cls.AVAILABLE_MODELS.get(model_id, {})
        provider_info = cls.INFERENCE_PROVIDERS.get(provider_id, {})
        
        # Get the minimum of model and provider context support
        model_context = model_info.get("context_length", 128000)
        provider_context = provider_info.get("max_context_support", 128000)
        context_length = min(model_context, provider_context)
        
        # Calculate reserves based on MCP server count
        system_reserve = cls.SYSTEM_PROMPT_RESERVE
        if mcp_servers_count > 0:
            # Add extra reserve for MCP tools (roughly 300 tokens per server for tool descriptions)
            system_reserve += cls.MCP_TOOLS_RESERVE + (mcp_servers_count * 300)
        
        # Dynamic response token allocation based on available context
        if context_length >= 100000:
            max_response_tokens = cls.DEFAULT_MAX_RESPONSE_TOKENS  # 16384
        elif context_length >= 50000:
            max_response_tokens = 12288
        elif context_length >= 20000:
            max_response_tokens = 8192
        else:
            max_response_tokens = cls.MIN_RESPONSE_TOKENS  # 4096
        
        # Calculate available context for history
        available_context = context_length - system_reserve - max_response_tokens
        
        # Calculate recommended history limit
        # Assume average message is ~200 tokens
        avg_message_tokens = 200
        recommended_history = min(
            cls.MAX_HISTORY_MESSAGES,
            available_context // avg_message_tokens
        )
        
        return {
            "max_context": context_length,
            "available_context": available_context,
            "max_response_tokens": max_response_tokens,
            "system_reserve": system_reserve,
            "recommended_history_limit": max(10, recommended_history),  # At least 10 messages
            "context_utilization": f"{((system_reserve + max_response_tokens) / context_length * 100):.1f}% reserved"
        }
    
    @classmethod
    def get_all_media_extensions(cls):
        """Get all supported media file extensions"""
        return (cls.SUPPORTED_IMAGE_EXTENSIONS + 
                cls.SUPPORTED_AUDIO_EXTENSIONS + 
                cls.SUPPORTED_VIDEO_EXTENSIONS)
    
    @classmethod
    def is_image_file(cls, file_path: str) -> bool:
        """Check if file is an image"""
        if not file_path:
            return False
        return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS)
    
    @classmethod
    def is_audio_file(cls, file_path: str) -> bool:
        """Check if file is an audio file"""
        if not file_path:
            return False
        return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS)
    
    @classmethod
    def is_video_file(cls, file_path: str) -> bool:
        """Check if file is a video file"""
        if not file_path:
            return False
        return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS)
    
    @classmethod
    def is_media_file(cls, file_path: str) -> bool:
        """Check if file is any supported media type"""
        if not file_path:
            return False
        return any(ext in file_path.lower() for ext in cls.get_all_media_extensions())
    
    @classmethod
    def get_provider_recommendation(cls, use_case: str) -> List[str]:
        """Get recommended providers for specific use cases"""
        recommendations = {
            "production": ["cerebras", "fireworks-ai"],
            "development": ["together-ai", "fireworks-ai"],
            "experimentation": ["together-ai", "replicate"],
            "high-throughput": ["cerebras"],
            "cost-effective": ["together-ai", "replicate"],
            "maximum-context": ["cerebras", "fireworks-ai"]  # Providers with best context support
        }
        return recommendations.get(use_case, list(cls.INFERENCE_PROVIDERS.keys()))

# Check for dependencies
try:
    import httpx
    HTTPX_AVAILABLE = True
except ImportError:
    HTTPX_AVAILABLE = False
    logger.warning("httpx not available - file upload functionality limited")

try:
    import huggingface_hub
    HF_HUB_AVAILABLE = True
except ImportError:
    HF_HUB_AVAILABLE = False
    logger.warning("huggingface_hub not available - login functionality disabled")

# Enhanced CSS Configuration with better media display
CUSTOM_CSS = """
/* Hide Gradio footer */
footer {
    display: none !important;
}
/* Make chatbot expand to fill available space */
.gradio-container {
    height: 100vh !important;
}
/* Ensure proper flex layout */
.main-content {
    display: flex;
    flex-direction: column;
    height: 100%;
}
/* Input area stays at bottom with minimal padding */
.input-area {
    margin-top: auto;
    padding-top: 0.25rem !important;
    padding-bottom: 0 !important;
    margin-bottom: 0 !important;
}
/* Reduce padding around chatbot */
.chatbot {
    margin-bottom: 0 !important;
    padding-bottom: 0 !important;
}
/* Provider and model selection styling */
.provider-model-selection {
    padding: 10px;
    border-radius: 8px;
    margin-bottom: 10px;
    border-left: 4px solid #007bff;
}
/* Login section styling */
.login-section {
    padding: 10px;
    border-radius: 8px;
    margin-bottom: 10px;
    border-left: 4px solid #4caf50;
}
/* Tool usage indicator */
.tool-usage {
    background: #fff3cd;
    border: 1px solid #ffeaa7;
    border-radius: 4px;
    padding: 8px;
    margin: 4px 0;
}
/* Media display improvements */
.media-container {
    max-width: 100%;
    border-radius: 8px;
    overflow: hidden;
    box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
/* Enhanced audio player styling */
audio {
    width: 100%;
    max-width: 500px;
    height: 54px;
    border-radius: 27px;
    outline: none;
    margin: 10px 0;
}
/* Enhanced video player styling */
video {
    width: 100%;
    max-width: 700px;
    height: auto;
    object-fit: contain;
    border-radius: 8px;
    margin: 10px 0;
    box-shadow: 0 4px 6px rgba(0,0,0,0.1);
}
/* Server status indicators */
.server-status {
    display: inline-block;
    padding: 2px 8px;
    border-radius: 12px;
    font-size: 12px;
    font-weight: bold;
}
.server-status.online {
    background: #d4edda;
    color: #155724;
}
.server-status.offline {
    background: #f8d7da;
    color: #721c24;
}
/* Message metadata styling */
.message-metadata {
    font-size: 0.85em;
    color: #666;
    margin-top: 4px;
    padding: 4px 8px;
    background: #f0f0f0;
    border-radius: 4px;
}
"""