ReallyFloppyPenguin commited on
Commit
a769262
·
verified ·
1 Parent(s): 3e3b2ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +662 -244
app.py CHANGED
@@ -7,176 +7,575 @@ from typing import List, Dict, Optional
7
  import time
8
  from datetime import datetime
9
 
10
- class HuggingFaceModelExplorer:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def __init__(self):
 
 
12
  self.hf_token = os.getenv("HF_TOKEN")
 
13
  if not self.hf_token:
14
- raise ValueError("HF_TOKEN environment variable is required")
15
 
16
  self.headers = {"Authorization": f"Bearer {self.hf_token}"}
17
- self.base_url = "https://huggingface.co/api"
18
 
19
- def get_inference_endpoints(self) -> List[Dict]:
20
- """Fetch all available inference endpoints"""
21
- try:
22
- # Get serverless inference API models
23
- url = f"{self.base_url}/models"
24
- params = {
25
- "pipeline_tag": None,
26
- "library": None,
27
- "sort": "downloads",
28
- "direction": -1,
29
- "limit": 100,
30
- "full": True,
31
- "config": True
32
- }
33
-
34
- response = requests.get(url, headers=self.headers, params=params)
35
- response.raise_for_status()
36
-
37
- models = response.json()
38
-
39
- # Filter models that support inference API
40
- inference_models = []
41
- for model in models:
42
- if self._supports_inference_api(model):
43
- inference_models.append({
44
- "id": model.get("id", "Unknown"),
45
- "pipeline_tag": model.get("pipeline_tag", "Unknown"),
46
- "library_name": model.get("library_name", "Unknown"),
47
- "downloads": model.get("downloads", 0),
48
- "likes": model.get("likes", 0),
49
- "created_at": model.get("createdAt", "Unknown"),
50
- "updated_at": model.get("lastModified", "Unknown"),
51
- "tags": model.get("tags", []),
52
- "inference_status": self._check_inference_status(model.get("id"))
53
- })
54
-
55
- return inference_models
56
 
57
- except Exception as e:
58
- print(f"Error fetching inference endpoints: {e}")
59
- return []
60
-
61
- def _supports_inference_api(self, model: Dict) -> bool:
62
- """Check if a model supports the inference API"""
63
- # Models with these pipeline tags typically support inference API
64
- supported_pipelines = {
65
- "text-generation", "text2text-generation", "fill-mask",
66
- "token-classification", "question-answering", "summarization",
67
- "translation", "text-classification", "conversational",
68
- "image-classification", "object-detection", "image-segmentation",
69
- "text-to-image", "image-to-text", "automatic-speech-recognition",
70
- "audio-classification", "voice-activity-detection",
71
- "depth-estimation", "feature-extraction"
72
- }
73
 
74
- pipeline_tag = model.get("pipeline_tag")
75
- return pipeline_tag in supported_pipelines
76
 
77
- def _check_inference_status(self, model_id: str) -> str:
78
- """Check if inference API is currently available for a model"""
79
  try:
80
- url = f"https://api-inference.huggingface.co/models/{model_id}"
81
- response = requests.get(url, headers=self.headers, timeout=5)
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- if response.status_code == 200:
84
- return " Available"
85
- elif response.status_code == 503:
86
- return "🔄 Loading"
87
- else:
88
- return "❌ Unavailable"
89
- except:
90
- return "❓ Unknown"
91
-
92
- def get_dedicated_endpoints(self) -> List[Dict]:
93
- """Fetch dedicated inference endpoints (requires paid plan)"""
94
- try:
95
- url = f"{self.base_url}/inference-endpoints"
96
- response = requests.get(url, headers=self.headers)
97
 
98
- if response.status_code == 200:
99
- endpoints = response.json()
100
- return [{
101
- "name": ep.get("name", "Unknown"),
102
- "model_id": ep.get("model", {}).get("repository", "Unknown"),
103
- "status": ep.get("status", "Unknown"),
104
- "created_at": ep.get("created_at", "Unknown"),
105
- "updated_at": ep.get("updated_at", "Unknown"),
106
- "compute": ep.get("compute", {}),
107
- "url": ep.get("url", "")
108
- } for ep in endpoints]
109
- else:
110
- return []
111
- except Exception as e:
112
- print(f"Error fetching dedicated endpoints: {e}")
113
- return []
114
 
115
- def test_model_inference(self, model_id: str, input_text: str = "Hello, how are you?") -> Dict:
116
- """Test inference on a specific model"""
 
 
 
 
 
 
 
 
 
 
117
  try:
118
- url = f"https://api-inference.huggingface.co/models/{model_id}"
119
-
120
- # Determine appropriate payload based on model type
121
- payload = {"inputs": input_text}
122
-
123
- response = requests.post(url, headers=self.headers, json=payload, timeout=30)
124
 
125
- if response.status_code == 200:
126
- result = response.json()
127
- return {
128
- "status": "success",
129
- "result": result,
130
- "response_time": response.elapsed.total_seconds()
131
- }
 
 
132
  else:
133
  return {
134
  "status": "error",
135
- "error": f"HTTP {response.status_code}: {response.text}",
136
- "response_time": response.elapsed.total_seconds()
137
  }
 
 
 
138
 
139
  except Exception as e:
140
  return {
141
- "status": "error",
142
  "error": str(e),
143
- "response_time": None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  def create_interface():
147
- explorer = HuggingFaceModelExplorer()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
- def refresh_serverless_models():
150
- """Refresh the list of serverless inference models"""
151
- models = explorer.get_inference_endpoints()
 
 
 
 
152
  if not models:
153
- return "No models found or error occurred"
154
 
155
  df = pd.DataFrame(models)
156
  return df
157
 
158
- def refresh_dedicated_endpoints():
159
- """Refresh the list of dedicated inference endpoints"""
160
- endpoints = explorer.get_dedicated_endpoints()
161
- if not endpoints:
162
- return "No dedicated endpoints found (requires paid plan) or error occurred"
163
 
164
- df = pd.DataFrame(endpoints)
 
 
 
 
 
 
165
  return df
166
 
167
  def test_model(model_id: str, test_input: str):
168
  """Test inference on a selected model"""
169
- if not model_id.strip():
170
- return "Please enter a model ID"
 
 
 
 
 
 
 
 
 
171
 
172
  if not test_input.strip():
173
  test_input = "Hello, how are you today?"
174
 
175
  result = explorer.test_model_inference(model_id, test_input)
176
 
 
 
177
  if result["status"] == "success":
178
  return f"""
179
  **Model:** {model_id}
 
 
 
180
  **Status:** ✅ Success
181
  **Response Time:** {result['response_time']:.2f}s
182
 
@@ -184,101 +583,142 @@ def create_interface():
184
  ```json
185
  {json.dumps(result['result'], indent=2)}
186
  ```
 
 
 
 
 
 
 
 
 
 
 
187
  """
188
  else:
189
  return f"""
190
  **Model:** {model_id}
 
 
191
  **Status:** ❌ Error
192
- **Response Time:** {result['response_time'] if result['response_time'] else 'N/A'}s
193
 
194
  **Error:**
195
  {result['error']}
196
  """
197
 
198
- def search_models(query: str, pipeline_filter: str = "All"):
199
- """Search models by name or tags"""
200
- models = explorer.get_inference_endpoints()
201
-
202
- if query:
203
- models = [m for m in models if query.lower() in m['id'].lower() or
204
- any(query.lower() in tag.lower() for tag in m['tags'])]
205
-
206
- if pipeline_filter != "All":
207
- models = [m for m in models if m['pipeline_tag'] == pipeline_filter]
208
 
209
- if not models:
210
- return "No models found matching your criteria"
 
 
 
 
 
 
 
 
 
 
211
 
212
- df = pd.DataFrame(models)
213
- return df
 
 
 
 
214
 
215
  # Create Gradio interface
216
- with gr.Blocks(title="🤗 HuggingFace Inference API Explorer", theme=gr.themes.Soft()) as demo:
217
  gr.Markdown("""
218
- # 🤗 HuggingFace Inference API Explorer
219
 
220
- Explore all available models on HuggingFace Inference API providers!
221
 
222
- This space showcases:
223
- - **Serverless Inference API**: Free tier models available through HF's inference API
224
- - **Dedicated Inference Endpoints**: Private endpoints (requires paid plan)
225
- - **Model Testing**: Test any model directly from the interface
 
 
 
 
 
 
 
226
 
227
  ---
228
  """)
229
 
230
  with gr.Tabs():
231
- # Serverless Models Tab
232
- with gr.TabItem("🚀 Serverless Models"):
233
- gr.Markdown("### Available Serverless Inference API Models")
234
 
235
- with gr.Row():
236
- search_query = gr.Textbox(
237
- placeholder="Search models by name or tags...",
238
- label="Search Query"
239
- )
240
- pipeline_filter = gr.Dropdown(
241
- choices=["All", "text-generation", "text-classification", "question-answering",
242
- "summarization", "translation", "image-classification", "text-to-image"],
243
- value="All",
244
- label="Pipeline Filter"
245
- )
246
- search_btn = gr.Button("🔍 Search Models")
 
 
 
 
 
247
 
248
- refresh_serverless_btn = gr.Button("🔄 Refresh All Models", variant="primary")
249
- serverless_output = gr.Dataframe(
250
- headers=["Model ID", "Pipeline", "Library", "Downloads", "Likes", "Status"],
251
- label="Serverless Models"
252
  )
253
 
254
- search_btn.click(
255
- search_models,
256
- inputs=[search_query, pipeline_filter],
257
- outputs=serverless_output
258
  )
259
- refresh_serverless_btn.click(refresh_serverless_models, outputs=serverless_output)
260
 
261
- # Dedicated Endpoints Tab
262
- with gr.TabItem("🏢 Dedicated Endpoints"):
263
- gr.Markdown("### Dedicated Inference Endpoints (Requires Paid Plan)")
264
 
265
- refresh_dedicated_btn = gr.Button("🔄 Refresh Dedicated Endpoints", variant="primary")
266
- dedicated_output = gr.Dataframe(
267
- headers=["Name", "Model ID", "Status", "Created", "URL"],
268
- label="Dedicated Endpoints"
269
  )
270
 
271
- refresh_dedicated_btn.click(refresh_dedicated_endpoints, outputs=dedicated_output)
 
 
 
 
 
 
 
 
 
 
272
 
273
  # Model Testing Tab
274
  with gr.TabItem("🧪 Test Models"):
275
- gr.Markdown("### Test Model Inference")
276
 
277
  with gr.Row():
278
- model_id_input = gr.Textbox(
279
- placeholder="e.g., microsoft/DialoGPT-medium",
280
- label="Model ID",
281
- info="Enter the full model ID from HuggingFace"
282
  )
283
  test_input = gr.Textbox(
284
  placeholder="Hello, how are you today?",
@@ -287,77 +727,55 @@ def create_interface():
287
  )
288
 
289
  test_btn = gr.Button("🚀 Test Model", variant="primary")
290
- test_output = gr.Markdown(label="Test Results")
291
 
292
  test_btn.click(
293
  test_model,
294
- inputs=[model_id_input, test_input],
295
  outputs=test_output
296
  )
297
 
298
- # Statistics Tab
299
- with gr.TabItem("📊 Statistics"):
300
- gr.Markdown("### Inference API Statistics")
301
-
302
- stats_btn = gr.Button("📈 Generate Statistics", variant="primary")
303
 
304
- def generate_stats():
305
- models = explorer.get_inference_endpoints()
306
- if not models:
307
- return "No data available"
308
-
309
- total_models = len(models)
310
- pipelines = {}
311
- libraries = {}
312
- statuses = {}
313
-
314
- for model in models:
315
- # Count pipelines
316
- pipeline = model['pipeline_tag']
317
- pipelines[pipeline] = pipelines.get(pipeline, 0) + 1
318
-
319
- # Count libraries
320
- library = model['library_name']
321
- libraries[library] = libraries.get(library, 0) + 1
322
-
323
- # Count statuses
324
- status = model['inference_status']
325
- statuses[status] = statuses.get(status, 0) + 1
326
-
327
- # Sort by count
328
- top_pipelines = sorted(pipelines.items(), key=lambda x: x[1], reverse=True)[:10]
329
- top_libraries = sorted(libraries.items(), key=lambda x: x[1], reverse=True)[:10]
330
-
331
- stats_text = f"""
332
- ## 📊 HuggingFace Inference API Statistics
333
-
334
- **Total Models Available:** {total_models}
335
-
336
- ### Top Pipeline Tags:
337
- {chr(10).join([f"- **{pipeline}**: {count} models" for pipeline, count in top_pipelines])}
338
-
339
- ### Top Libraries:
340
- {chr(10).join([f"- **{library}**: {count} models" for library, count in top_libraries])}
341
-
342
- ### Inference Status Distribution:
343
- {chr(10).join([f"- **{status}**: {count} models" for status, count in statuses.items()])}
344
-
345
- *Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}*
346
- """
347
- return stats_text
348
 
349
- stats_output = gr.Markdown()
350
- stats_btn.click(generate_stats, outputs=stats_output)
 
 
351
 
352
  # Footer
353
- gr.Markdown("""
354
  ---
355
 
356
- **Note:** This space requires a HuggingFace token set as the `HF_TOKEN` environment variable.
357
 
358
- - 🌟 Star this space if you find it useful!
359
- - 🐛 Report issues on the Community tab
360
- - 📚 Learn more about [HuggingFace Inference API](https://huggingface.co/docs/api-inference/index)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  """)
362
 
363
  return demo
@@ -370,6 +788,6 @@ if __name__ == "__main__":
370
  server_port=7860,
371
  share=False
372
  )
373
- except ValueError as e:
374
- print(f"Error: {e}")
375
- print("Please set the HF_TOKEN environment variable with your HuggingFace token.")
 
7
  import time
8
  from datetime import datetime
9
 
10
+ # Updated dictionary of allowed models with current HF Inference Providers
11
+ ALLOWED_MODELS = {
12
+ # Text Generation Models - HF Inference API
13
+ "microsoft/DialoGPT-medium": {
14
+ "provider": "HF Inference",
15
+ "pipeline": "text-generation",
16
+ "description": "Conversational AI model for dialog generation",
17
+ "endpoint": "https://api-inference.huggingface.co/models/microsoft/DialoGPT-medium",
18
+ "api_format": "hf_inference"
19
+ },
20
+ "meta-llama/Llama-3.1-8B-Instruct": {
21
+ "provider": "HF Inference",
22
+ "pipeline": "text-generation",
23
+ "description": "Meta's Llama 3.1 8B Instruct model",
24
+ "endpoint": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct",
25
+ "api_format": "hf_inference"
26
+ },
27
+ "deepseek-ai/DeepSeek-V3-0324": {
28
+ "provider": "HF Inference",
29
+ "pipeline": "text-generation",
30
+ "description": "DeepSeek V3 state-of-the-art conversational model",
31
+ "endpoint": "https://router.huggingface.co/v1/chat/completions",
32
+ "api_format": "openai_compatible"
33
+ },
34
+
35
+ # Cerebras Models (Chat completion LLM only)
36
+ "meta-llama/Llama-3.3-70B-Instruct": {
37
+ "provider": "Cerebras",
38
+ "pipeline": "text-generation",
39
+ "description": "Meta's Llama 3.3 70B Instruct model via Cerebras ultra-fast LPUs",
40
+ "endpoint": "https://router.huggingface.co/v1/chat/completions",
41
+ "api_format": "openai_compatible"
42
+ },
43
+
44
+ # Cohere Models (Chat completion LLM + VLM)
45
+ "cohere/command-r-plus": {
46
+ "provider": "Cohere",
47
+ "pipeline": "text-generation",
48
+ "description": "Cohere's Command R+ enterprise-grade NLP model",
49
+ "endpoint": "https://router.huggingface.co/v1/chat/completions",
50
+ "api_format": "openai_compatible"
51
+ },
52
+
53
+ # Fal AI Models (Text-to-Image, Text-to-Video, Speech-to-Text)
54
+ "black-forest-labs/FLUX.1-schnell": {
55
+ "provider": "Fal AI",
56
+ "pipeline": "text-to-image",
57
+ "description": "FLUX.1 schnell model for fast image generation via Fal AI",
58
+ "endpoint": "https://router.huggingface.co/v1/text-to-image",
59
+ "api_format": "hf_router"
60
+ },
61
+
62
+ # Featherless AI Models (Chat completion LLM + VLM)
63
+ "meta-llama/Llama-3.1-70B-Instruct": {
64
+ "provider": "Featherless AI",
65
+ "pipeline": "text-generation",
66
+ "description": "Meta's Llama 3.1 70B Instruct via Featherless AI",
67
+ "endpoint": "https://router.huggingface.co/v1/chat/completions",
68
+ "api_format": "openai_compatible"
69
+ },
70
+
71
+ # Fireworks Models (Chat completion LLM + VLM)
72
+ "accounts/fireworks/models/llama-v3p1-8b-instruct": {
73
+ "provider": "Fireworks",
74
+ "pipeline": "text-generation",
75
+ "description": "Llama 3.1 8B Instruct via Fireworks AI production-ready serving",
76
+ "endpoint": "https://router.huggingface.co/v1/chat/completions",
77
+ "api_format": "openai_compatible"
78
+ },
79
+
80
+ # Groq Models (Chat completion LLM only)
81
+ "deepseek-ai/DeepSeek-R1": {
82
+ "provider": "Groq",
83
+ "pipeline": "text-generation",
84
+ "description": "DeepSeek R1 model via Groq hardware acceleration",
85
+ "endpoint": "https://router.huggingface.co/v1/chat/completions",
86
+ "api_format": "openai_compatible"
87
+ },
88
+
89
+ # Hyperbolic Models (Chat completion LLM + VLM)
90
+ "meta-llama/Meta-Llama-3-8B-Instruct": {
91
+ "provider": "Hyperbolic",
92
+ "pipeline": "text-generation",
93
+ "description": "Meta's Llama 3 8B Instruct via Hyperbolic",
94
+ "endpoint": "https://router.huggingface.co/v1/chat/completions",
95
+ "api_format": "openai_compatible"
96
+ },
97
+
98
+ # Nebius Models (Chat completion LLM + VLM, Feature Extraction, Text-to-Image)
99
+ "mistralai/Mixtral-8x7B-Instruct-v0.1": {
100
+ "provider": "Nebius",
101
+ "pipeline": "text-generation",
102
+ "description": "Mistral's Mixtral 8x7B Instruct via Nebius cloud platform",
103
+ "endpoint": "https://router.huggingface.co/v1/chat/completions",
104
+ "api_format": "openai_compatible"
105
+ },
106
+
107
+ # Novita Models (Chat completion LLM + VLM, Text-to-Video)
108
+ "Qwen/Qwen2.5-72B-Instruct": {
109
+ "provider": "Novita",
110
+ "pipeline": "text-generation",
111
+ "description": "Qwen 2.5 72B Instruct via Novita",
112
+ "endpoint": "https://router.huggingface.co/v1/chat/completions",
113
+ "api_format": "openai_compatible"
114
+ },
115
+
116
+ # Nscale Models (Chat completion LLM + VLM, Feature Extraction, Text-to-Image)
117
+ "microsoft/Phi-3-medium-4k-instruct": {
118
+ "provider": "Nscale",
119
+ "pipeline": "text-generation",
120
+ "description": "Microsoft Phi-3 Medium via Nscale",
121
+ "endpoint": "https://router.huggingface.co/v1/chat/completions",
122
+ "api_format": "openai_compatible"
123
+ },
124
+
125
+ # Replicate Models (Text-to-Image, Text-to-Video, Speech-to-Text)
126
+ "stabilityai/stable-diffusion-xl-base-1.0": {
127
+ "provider": "Replicate",
128
+ "pipeline": "text-to-image",
129
+ "description": "Stable Diffusion XL via Replicate cloud platform",
130
+ "endpoint": "https://router.huggingface.co/v1/text-to-image",
131
+ "api_format": "hf_router"
132
+ },
133
+
134
+ # SambaNova Models (Chat completion LLM, Feature Extraction)
135
+ "meta-llama/Meta-Llama-3.1-405B-Instruct": {
136
+ "provider": "SambaNova",
137
+ "pipeline": "text-generation",
138
+ "description": "Meta's Llama 3.1 405B Instruct via SambaNova",
139
+ "endpoint": "https://router.huggingface.co/v1/chat/completions",
140
+ "api_format": "openai_compatible"
141
+ },
142
+
143
+ # Together AI Models (Chat completion LLM + VLM, Text-to-Image)
144
+ "meta-llama/Meta-Llama-3-70B-Instruct": {
145
+ "provider": "Together",
146
+ "pipeline": "text-generation",
147
+ "description": "Meta's Llama 3 70B Instruct via Together AI high-performance inference",
148
+ "endpoint": "https://router.huggingface.co/v1/chat/completions",
149
+ "api_format": "openai_compatible"
150
+ },
151
+
152
+ # HF Inference - Additional Models for various tasks
153
+ "black-forest-labs/FLUX.1-dev": {
154
+ "provider": "HF Inference",
155
+ "pipeline": "text-to-image",
156
+ "description": "FLUX.1 development model for high-quality text-to-image generation",
157
+ "endpoint": "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev",
158
+ "api_format": "hf_inference"
159
+ },
160
+ "openai/whisper-large-v3": {
161
+ "provider": "HF Inference",
162
+ "pipeline": "automatic-speech-recognition",
163
+ "description": "Whisper Large V3 for speech recognition",
164
+ "endpoint": "https://api-inference.huggingface.co/models/openai/whisper-large-v3",
165
+ "api_format": "hf_inference"
166
+ },
167
+ "sentence-transformers/all-MiniLM-L6-v2": {
168
+ "provider": "HF Inference",
169
+ "pipeline": "feature-extraction",
170
+ "description": "Sentence transformer for embeddings and semantic search",
171
+ "endpoint": "https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2",
172
+ "api_format": "hf_inference"
173
+ },
174
+ "cardiffnlp/twitter-roberta-base-sentiment-latest": {
175
+ "provider": "HF Inference",
176
+ "pipeline": "text-classification",
177
+ "description": "Sentiment analysis model trained on Twitter data",
178
+ "endpoint": "https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment-latest",
179
+ "api_format": "hf_inference"
180
+ }
181
+ }
182
+
183
+ # Updated provider configuration for current HF Inference Providers ecosystem
184
+ PROVIDER_CONFIG = {
185
+ "HF Inference": {
186
+ "description": "HuggingFace's native serverless inference API",
187
+ "auth_header": "Authorization",
188
+ "auth_format": "Bearer {token}",
189
+ "env_var": "HF_TOKEN",
190
+ "base_url": "https://api-inference.huggingface.co",
191
+ "pricing": "Free tier + pay-per-use",
192
+ "docs_url": "https://huggingface.co/docs/inference-providers/providers/hf-inference",
193
+ "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Feature Extraction", "Text to Image", "Speech to text"]
194
+ },
195
+ "Cerebras": {
196
+ "description": "Ultra-fast inference with Language Processing Units (LPUs)",
197
+ "auth_header": "Authorization",
198
+ "auth_format": "Bearer {token}",
199
+ "env_var": "HF_TOKEN",
200
+ "base_url": "https://router.huggingface.co/v1",
201
+ "pricing": "Pay-per-token via HF routing",
202
+ "docs_url": "https://huggingface.co/docs/inference-providers/providers/cerebras",
203
+ "capabilities": ["Chat completion (LLM)"]
204
+ },
205
+ "Cohere": {
206
+ "description": "Enterprise-grade NLP models and APIs",
207
+ "auth_header": "Authorization",
208
+ "auth_format": "Bearer {token}",
209
+ "env_var": "HF_TOKEN",
210
+ "base_url": "https://router.huggingface.co/v1",
211
+ "pricing": "Pay-per-token via HF routing",
212
+ "docs_url": "https://huggingface.co/docs/inference-providers/providers/cohere",
213
+ "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
214
+ },
215
+ "Fal AI": {
216
+ "description": "Fast and reliable model inference platform",
217
+ "auth_header": "Authorization",
218
+ "auth_format": "Bearer {token}",
219
+ "env_var": "HF_TOKEN",
220
+ "base_url": "https://router.huggingface.co/v1",
221
+ "pricing": "Pay-per-token via HF routing",
222
+ "docs_url": "https://huggingface.co/docs/inference-providers/providers/fal-ai",
223
+ "capabilities": ["Text to Image", "Text to video", "Speech to text"]
224
+ },
225
+ "Featherless AI": {
226
+ "description": "Optimized inference for open-source models",
227
+ "auth_header": "Authorization",
228
+ "auth_format": "Bearer {token}",
229
+ "env_var": "HF_TOKEN",
230
+ "base_url": "https://router.huggingface.co/v1",
231
+ "pricing": "Pay-per-token via HF routing",
232
+ "docs_url": "https://huggingface.co/docs/inference-providers/providers/featherless-ai",
233
+ "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
234
+ },
235
+ "Fireworks": {
236
+ "description": "Production-ready inference with fast model serving",
237
+ "auth_header": "Authorization",
238
+ "auth_format": "Bearer {token}",
239
+ "env_var": "HF_TOKEN",
240
+ "base_url": "https://router.huggingface.co/v1",
241
+ "pricing": "Pay-per-token via HF routing",
242
+ "docs_url": "https://huggingface.co/docs/inference-providers/providers/fireworks-ai",
243
+ "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
244
+ },
245
+ "Groq": {
246
+ "description": "Fast inference with specialized hardware acceleration",
247
+ "auth_header": "Authorization",
248
+ "auth_format": "Bearer {token}",
249
+ "env_var": "HF_TOKEN",
250
+ "base_url": "https://router.huggingface.co/v1",
251
+ "pricing": "Pay-per-token via HF routing",
252
+ "docs_url": "https://huggingface.co/docs/inference-providers/providers/groq",
253
+ "capabilities": ["Chat completion (LLM)"]
254
+ },
255
+ "Hyperbolic": {
256
+ "description": "GPU-accelerated inference platform",
257
+ "auth_header": "Authorization",
258
+ "auth_format": "Bearer {token}",
259
+ "env_var": "HF_TOKEN",
260
+ "base_url": "https://router.huggingface.co/v1",
261
+ "pricing": "Pay-per-token via HF routing",
262
+ "docs_url": "https://huggingface.co/docs/inference-providers/providers/hyperbolic",
263
+ "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
264
+ },
265
+ "Nebius": {
266
+ "description": "Cloud-based AI infrastructure platform",
267
+ "auth_header": "Authorization",
268
+ "auth_format": "Bearer {token}",
269
+ "env_var": "HF_TOKEN",
270
+ "base_url": "https://router.huggingface.co/v1",
271
+ "pricing": "Pay-per-token via HF routing",
272
+ "docs_url": "https://huggingface.co/docs/inference-providers/providers/nebius",
273
+ "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Feature Extraction", "Text to Image"]
274
+ },
275
+ "Novita": {
276
+ "description": "AI inference platform with video generation",
277
+ "auth_header": "Authorization",
278
+ "auth_format": "Bearer {token}",
279
+ "env_var": "HF_TOKEN",
280
+ "base_url": "https://router.huggingface.co/v1",
281
+ "pricing": "Pay-per-token via HF routing",
282
+ "docs_url": "https://huggingface.co/docs/inference-providers/providers/novita",
283
+ "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Text to video"]
284
+ },
285
+ "Nscale": {
286
+ "description": "Scalable AI model deployment platform",
287
+ "auth_header": "Authorization",
288
+ "auth_format": "Bearer {token}",
289
+ "env_var": "HF_TOKEN",
290
+ "base_url": "https://router.huggingface.co/v1",
291
+ "pricing": "Pay-per-token via HF routing",
292
+ "docs_url": "https://huggingface.co/docs/inference-providers/providers/nscale",
293
+ "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Feature Extraction", "Text to Image"]
294
+ },
295
+ "Replicate": {
296
+ "description": "Run models in the cloud with simple API",
297
+ "auth_header": "Authorization",
298
+ "auth_format": "Bearer {token}",
299
+ "env_var": "HF_TOKEN",
300
+ "base_url": "https://router.huggingface.co/v1",
301
+ "pricing": "Pay-per-token via HF routing",
302
+ "docs_url": "https://huggingface.co/docs/inference-providers/providers/replicate",
303
+ "capabilities": ["Text to Image", "Text to video", "Speech to text"]
304
+ },
305
+ "SambaNova": {
306
+ "description": "Enterprise AI platform with DataFlow architecture",
307
+ "auth_header": "Authorization",
308
+ "auth_format": "Bearer {token}",
309
+ "env_var": "HF_TOKEN",
310
+ "base_url": "https://router.huggingface.co/v1",
311
+ "pricing": "Pay-per-token via HF routing",
312
+ "docs_url": "https://huggingface.co/docs/inference-providers/providers/sambanova",
313
+ "capabilities": ["Chat completion (LLM)", "Feature Extraction"]
314
+ },
315
+ "Together": {
316
+ "description": "High-performance inference for open-source models",
317
+ "auth_header": "Authorization",
318
+ "auth_format": "Bearer {token}",
319
+ "env_var": "HF_TOKEN",
320
+ "base_url": "https://router.huggingface.co/v1",
321
+ "pricing": "Pay-per-token via HF routing",
322
+ "docs_url": "https://huggingface.co/docs/inference-providers/providers/together",
323
+ "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Text to Image"]
324
+ }
325
+ }
326
+
327
+ class ModernHFInferenceExplorer:
328
  def __init__(self):
329
+ self.allowed_models = ALLOWED_MODELS
330
+ self.provider_config = PROVIDER_CONFIG
331
  self.hf_token = os.getenv("HF_TOKEN")
332
+
333
  if not self.hf_token:
334
+ raise ValueError("HF_TOKEN environment variable is required for HuggingFace Inference Providers")
335
 
336
  self.headers = {"Authorization": f"Bearer {self.hf_token}"}
 
337
 
338
+ def get_available_models(self) -> List[Dict]:
339
+ """Get the predefined allowed models with provider info and live status"""
340
+ models = []
341
+ for model_id, model_info in self.allowed_models.items():
342
+ provider = model_info["provider"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
 
344
+ models.append({
345
+ "model_id": model_id,
346
+ "provider": provider,
347
+ "pipeline": model_info["pipeline"],
348
+ "description": model_info["description"],
349
+ "endpoint": model_info["endpoint"],
350
+ "api_format": model_info["api_format"],
351
+ "status": self._check_model_status(model_id, provider),
352
+ "pricing": self.provider_config[provider]["pricing"]
353
+ })
 
 
 
 
 
 
354
 
355
+ return models
 
356
 
357
+ def _check_model_status(self, model_id: str, provider: str) -> str:
358
+ """Check if a specific model is currently available via HF Inference Providers"""
359
  try:
360
+ # For models using the new HF Router API
361
+ if provider in ["Cerebras", "Groq", "Together", "Fireworks", "Replicate", "Cohere", "Fal AI"]:
362
+ # Use the models endpoint to check availability
363
+ url = "https://router.huggingface.co/v1/models"
364
+ response = requests.get(url, headers=self.headers, timeout=5)
365
+
366
+ if response.status_code == 200:
367
+ available_models = response.json()
368
+ if isinstance(available_models, dict) and "data" in available_models:
369
+ model_ids = [m["id"] for m in available_models["data"]]
370
+ return "✅ Available" if model_id in model_ids else "❓ Check Provider"
371
+ return "✅ Available"
372
+ else:
373
+ return "❓ Unknown"
374
 
375
+ # For traditional HF Inference API models
376
+ elif provider == "HF Inference":
377
+ url = f"https://api-inference.huggingface.co/models/{model_id}"
378
+ response = requests.get(url, headers=self.headers, timeout=5)
379
+
380
+ if response.status_code == 200:
381
+ return "✅ Available"
382
+ elif response.status_code == 503:
383
+ return "🔄 Loading"
384
+ else:
385
+ return " Unavailable"
 
 
 
386
 
387
+ return "❓ Unknown"
388
+
389
+ except Exception:
390
+ return " Connection Error"
 
 
 
 
 
 
 
 
 
 
 
 
391
 
392
+ def test_model_inference(self, model_id: str, input_text: str) -> Dict:
393
+ """Test inference on a specific allowed model using current HF Inference Providers API"""
394
+ if model_id not in self.allowed_models:
395
+ return {
396
+ "status": "error",
397
+ "error": f"Model '{model_id}' is not in the allowed models list",
398
+ "response_time": None
399
+ }
400
+
401
+ model_info = self.allowed_models[model_id]
402
+ api_format = model_info["api_format"]
403
+
404
  try:
405
+ start_time = time.time()
 
 
 
 
 
406
 
407
+ if api_format == "openai_compatible":
408
+ # Use the new OpenAI-compatible chat completions endpoint
409
+ result = self._test_openai_compatible_model(model_id, input_text)
410
+ elif api_format == "hf_inference":
411
+ # Use traditional HF Inference API
412
+ result = self._test_hf_inference_model(model_id, input_text, model_info)
413
+ elif api_format == "hf_router":
414
+ # Use HF Router for other tasks
415
+ result = self._test_hf_router_model(model_id, input_text, model_info)
416
  else:
417
  return {
418
  "status": "error",
419
+ "error": f"Unsupported API format: {api_format}",
420
+ "response_time": None
421
  }
422
+
423
+ result["response_time"] = time.time() - start_time
424
+ return result
425
 
426
  except Exception as e:
427
  return {
428
+ "status": "error",
429
  "error": str(e),
430
+ "response_time": time.time() - start_time if 'start_time' in locals() else None
431
+ }
432
+
433
+ def _test_openai_compatible_model(self, model_id: str, input_text: str) -> Dict:
434
+ """Test model using OpenAI-compatible chat completions API"""
435
+ url = "https://router.huggingface.co/v1/chat/completions"
436
+
437
+ payload = {
438
+ "model": model_id,
439
+ "messages": [
440
+ {"role": "user", "content": input_text}
441
+ ],
442
+ "max_tokens": 100,
443
+ "temperature": 0.7
444
+ }
445
+
446
+ response = requests.post(url, headers=self.headers, json=payload, timeout=30)
447
+
448
+ if response.status_code == 200:
449
+ return {
450
+ "status": "success",
451
+ "result": response.json()
452
+ }
453
+ else:
454
+ return {
455
+ "status": "error",
456
+ "error": f"HTTP {response.status_code}: {response.text}"
457
+ }
458
+
459
+ def _test_hf_inference_model(self, model_id: str, input_text: str, model_info: Dict) -> Dict:
460
+ """Test model using traditional HF Inference API"""
461
+ url = model_info["endpoint"]
462
+
463
+ # Adjust payload based on pipeline type
464
+ pipeline = model_info["pipeline"]
465
+ if pipeline in ["text-generation", "text2text-generation"]:
466
+ payload = {"inputs": input_text, "parameters": {"max_new_tokens": 100}}
467
+ elif pipeline == "text-to-image":
468
+ payload = {"inputs": input_text}
469
+ elif pipeline == "feature-extraction":
470
+ payload = {"inputs": input_text}
471
+ else:
472
+ payload = {"inputs": input_text}
473
+
474
+ response = requests.post(url, headers=self.headers, json=payload, timeout=30)
475
+
476
+ if response.status_code == 200:
477
+ return {
478
+ "status": "success",
479
+ "result": response.json()
480
  }
481
+ else:
482
+ return {
483
+ "status": "error",
484
+ "error": f"HTTP {response.status_code}: {response.text}"
485
+ }
486
+
487
+ def _test_hf_router_model(self, model_id: str, input_text: str, model_info: Dict) -> Dict:
488
+ """Test model using HF Router API for specialized tasks"""
489
+ pipeline = model_info["pipeline"]
490
+
491
+ if pipeline == "text-to-image":
492
+ # Use the text-to-image endpoint via HF Router
493
+ payload = {
494
+ "model": model_id,
495
+ "prompt": input_text,
496
+ "num_inference_steps": 20
497
+ }
498
+ # Note: This would need to be implemented based on actual HF Router text-to-image API
499
+ return {
500
+ "status": "info",
501
+ "result": "Text-to-image testing via HF Router not fully implemented in demo"
502
+ }
503
+
504
+ return {
505
+ "status": "error",
506
+ "error": f"HF Router testing not implemented for pipeline: {pipeline}"
507
+ }
508
 
509
  def create_interface():
510
+ try:
511
+ explorer = ModernHFInferenceExplorer()
512
+ except ValueError as e:
513
+ # Create a dummy interface that shows the error
514
+ with gr.Blocks(title="❌ Configuration Error") as demo:
515
+ gr.Markdown(f"""
516
+ # ❌ Configuration Error
517
+
518
+ **Error:** {str(e)}
519
+
520
+ Please set the `HF_TOKEN` environment variable with your HuggingFace token.
521
+
522
+ You can get a token from: https://huggingface.co/settings/tokens
523
+ """)
524
+ return demo
525
 
526
+ def get_models_by_provider(provider_filter: str = "All"):
527
+ """Get models filtered by provider"""
528
+ models = explorer.get_available_models()
529
+
530
+ if provider_filter != "All":
531
+ models = [m for m in models if m['provider'] == provider_filter]
532
+
533
  if not models:
534
+ return "No models found for the selected provider"
535
 
536
  df = pd.DataFrame(models)
537
  return df
538
 
539
+ def get_models_by_pipeline(pipeline_filter: str = "All"):
540
+ """Get models filtered by pipeline"""
541
+ models = explorer.get_available_models()
 
 
542
 
543
+ if pipeline_filter != "All":
544
+ models = [m for m in models if m['pipeline'] == pipeline_filter]
545
+
546
+ if not models:
547
+ return "No models found for the selected pipeline"
548
+
549
+ df = pd.DataFrame(models)
550
  return df
551
 
552
  def test_model(model_id: str, test_input: str):
553
  """Test inference on a selected model"""
554
+ if not model_id or model_id.strip() == "":
555
+ return "Please select a model ID from the dropdown"
556
+
557
+ if model_id not in explorer.allowed_models:
558
+ available_models = "\n".join([f"- {mid}" for mid in explorer.allowed_models.keys()])
559
+ return f"""
560
+ **Error:** Model '{model_id}' is not in the allowed models list.
561
+
562
+ **Available models:**
563
+ {available_models}
564
+ """
565
 
566
  if not test_input.strip():
567
  test_input = "Hello, how are you today?"
568
 
569
  result = explorer.test_model_inference(model_id, test_input)
570
 
571
+ model_info = explorer.allowed_models[model_id]
572
+
573
  if result["status"] == "success":
574
  return f"""
575
  **Model:** {model_id}
576
+ **Provider:** {model_info['provider']}
577
+ **Pipeline:** {model_info['pipeline']}
578
+ **API Format:** {model_info['api_format']}
579
  **Status:** ✅ Success
580
  **Response Time:** {result['response_time']:.2f}s
581
 
 
583
  ```json
584
  {json.dumps(result['result'], indent=2)}
585
  ```
586
+ """
587
+ elif result["status"] == "info":
588
+ return f"""
589
+ **Model:** {model_id}
590
+ **Provider:** {model_info['provider']}
591
+ **Pipeline:** {model_info['pipeline']}
592
+ **Status:** ℹ️ Info
593
+ **Response Time:** {result['response_time']:.2f}s if result['response_time'] else 'N/A'
594
+
595
+ **Info:**
596
+ {result['result']}
597
  """
598
  else:
599
  return f"""
600
  **Model:** {model_id}
601
+ **Provider:** {model_info['provider']}
602
+ **Pipeline:** {model_info['pipeline']}
603
  **Status:** ❌ Error
604
+ **Response Time:** {result['response_time']:.2f}s if result['response_time'] else 'N/A'
605
 
606
  **Error:**
607
  {result['error']}
608
  """
609
 
610
+ def get_provider_status():
611
+ """Get comprehensive status of all providers"""
612
+ status_info = []
 
 
 
 
 
 
 
613
 
614
+ for provider, config in explorer.provider_config.items():
615
+ model_count = len([m for m in explorer.allowed_models.values() if m["provider"] == provider])
616
+ capabilities_str = ", ".join(config.get("capabilities", ["N/A"]))
617
+
618
+ status_info.append({
619
+ "Provider": provider,
620
+ "Description": config["description"],
621
+ "Capabilities": capabilities_str,
622
+ "Models Available": model_count,
623
+ "Pricing": config["pricing"],
624
+ "Documentation": config["docs_url"]
625
+ })
626
 
627
+ return pd.DataFrame(status_info)
628
+
629
+ # Get unique providers and pipelines for filters
630
+ providers = ["All"] + list(set(model["provider"] for model in explorer.allowed_models.values()))
631
+ pipelines = ["All"] + list(set(model["pipeline"] for model in explorer.allowed_models.values()))
632
+ model_ids = list(explorer.allowed_models.keys())
633
 
634
  # Create Gradio interface
635
+ with gr.Blocks(title="🤗 HuggingFace Inference Providers Explorer", theme=gr.themes.Soft()) as demo:
636
  gr.Markdown("""
637
+ # 🤗 HuggingFace Inference Providers Explorer
638
 
639
+ **Modern Inference Ecosystem**: Explore models from HuggingFace's unified inference providers platform!
640
 
641
+ ## 🚀 Current Inference Providers:
642
+ - **HF Inference**: Native serverless inference API (free tier available)
643
+ - **Cerebras**: Ultra-fast LPU-powered inference
644
+ - **Groq**: Hardware-accelerated language processing
645
+ - **Together AI**: High-performance open-source models
646
+ - **Fireworks AI**: Production-ready model serving
647
+ - **Replicate**: Cloud-based model deployment
648
+ - **Cohere**: Enterprise NLP models
649
+ - **Fal AI**: Fast and reliable inference
650
+
651
+ All providers use **HuggingFace routing** with unified billing and authentication!
652
 
653
  ---
654
  """)
655
 
656
  with gr.Tabs():
657
+ # Provider Status Tab
658
+ with gr.TabItem("🏢 Provider Overview"):
659
+ gr.Markdown("### HuggingFace Inference Providers Status")
660
 
661
+ status_btn = gr.Button("📊 View Provider Details", variant="primary")
662
+ provider_status_output = gr.Dataframe(
663
+ headers=["Provider", "Description", "Capabilities", "Models", "Pricing", "Documentation"],
664
+ label="Provider Information"
665
+ )
666
+
667
+ status_btn.click(get_provider_status, outputs=provider_status_output)
668
+
669
+ # Models by Provider Tab
670
+ with gr.TabItem("🔍 Browse by Provider"):
671
+ gr.Markdown("### Models Available by Provider")
672
+
673
+ provider_filter = gr.Dropdown(
674
+ choices=providers,
675
+ value="All",
676
+ label="Select Provider"
677
+ )
678
 
679
+ provider_models_btn = gr.Button("📋 Show Models", variant="primary")
680
+ provider_models_output = gr.Dataframe(
681
+ headers=["Model ID", "Provider", "Pipeline", "Description", "API Format", "Status", "Pricing"],
682
+ label="Models by Provider"
683
  )
684
 
685
+ provider_models_btn.click(
686
+ get_models_by_provider,
687
+ inputs=provider_filter,
688
+ outputs=provider_models_output
689
  )
 
690
 
691
+ # Models by Pipeline Tab
692
+ with gr.TabItem("⚙️ Browse by Task"):
693
+ gr.Markdown("### Models Available by Task/Pipeline")
694
 
695
+ pipeline_filter = gr.Dropdown(
696
+ choices=pipelines,
697
+ value="All",
698
+ label="Select Task/Pipeline"
699
  )
700
 
701
+ pipeline_models_btn = gr.Button("📋 Show Models", variant="primary")
702
+ pipeline_models_output = gr.Dataframe(
703
+ headers=["Model ID", "Provider", "Pipeline", "Description", "API Format", "Status"],
704
+ label="Models by Task"
705
+ )
706
+
707
+ pipeline_models_btn.click(
708
+ get_models_by_pipeline,
709
+ inputs=pipeline_filter,
710
+ outputs=pipeline_models_output
711
+ )
712
 
713
  # Model Testing Tab
714
  with gr.TabItem("🧪 Test Models"):
715
+ gr.Markdown("### Test Live Model Inference")
716
 
717
  with gr.Row():
718
+ model_id_dropdown = gr.Dropdown(
719
+ choices=model_ids,
720
+ label="Select Model",
721
+ info="Choose from curated inference provider models"
722
  )
723
  test_input = gr.Textbox(
724
  placeholder="Hello, how are you today?",
 
727
  )
728
 
729
  test_btn = gr.Button("🚀 Test Model", variant="primary")
730
+ test_output = gr.Markdown(label="Inference Results")
731
 
732
  test_btn.click(
733
  test_model,
734
+ inputs=[model_id_dropdown, test_input],
735
  outputs=test_output
736
  )
737
 
738
+ # All Models Tab
739
+ with gr.TabItem("📊 All Available Models"):
740
+ gr.Markdown("### Complete Model Catalog")
 
 
741
 
742
+ all_models_btn = gr.Button("📋 Load All Models", variant="primary")
743
+ all_models_output = gr.Dataframe(
744
+ headers=["Model ID", "Provider", "Pipeline", "Description", "API Format", "Status", "Pricing"],
745
+ label="Complete Model Catalog"
746
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
747
 
748
+ all_models_btn.click(
749
+ lambda: get_models_by_provider("All"),
750
+ outputs=all_models_output
751
+ )
752
 
753
  # Footer
754
+ gr.Markdown(f"""
755
  ---
756
 
757
+ ## 🔧 Setup Instructions:
758
 
759
+ 1. **Get HuggingFace Token**: Visit [HF Settings](https://huggingface.co/settings/tokens)
760
+ 2. **Set Environment Variable**: `export HF_TOKEN=hf_your_token_here`
761
+ 3. **Start Testing**: All providers use unified HF authentication!
762
+
763
+ ## 📋 Current Statistics:
764
+
765
+ - **Total Models**: {len(explorer.allowed_models)}
766
+ - **Providers**: {len(explorer.provider_config)}
767
+ - **Pipelines**: {len(set(model['pipeline'] for model in explorer.allowed_models.values()))}
768
+
769
+ ## 🔗 Useful Links:
770
+
771
+ - 📚 [Inference Providers Docs](https://huggingface.co/docs/inference-providers/index)
772
+ - 💰 [Pricing Information](https://huggingface.co/docs/inference-providers/pricing-and-billing)
773
+ - 🔑 [Authentication Guide](https://huggingface.co/docs/inference-providers/get-started#authentication)
774
+ - 🌟 [Provider Comparison](https://huggingface.co/inference-providers/models)
775
+
776
+ ---
777
+
778
+ *Powered by HuggingFace Inference Providers - Unified access to the best AI models!*
779
  """)
780
 
781
  return demo
 
788
  server_port=7860,
789
  share=False
790
  )
791
+ except Exception as e:
792
+ print(f"Error starting application: {e}")
793
+ print("Please ensure HF_TOKEN environment variable is set.")