Spaces:

ReallyFloppyPenguin
/

AstonishingSuperIntelV2

Sleeping

App Files Files Community

ReallyFloppyPenguin commited on Jul 15

Commit

a769262

verified ·

1 Parent(s): 3e3b2ba

Update app.py

Browse files

Files changed (1) hide show

app.py +662 -244

app.py CHANGED Viewed

@@ -7,176 +7,575 @@ from typing import List, Dict, Optional
 import time
 from datetime import datetime
-class HuggingFaceModelExplorer:
     def __init__(self):
         self.hf_token = os.getenv("HF_TOKEN")
         if not self.hf_token:
-            raise ValueError("HF_TOKEN environment variable is required")
         self.headers = {"Authorization": f"Bearer {self.hf_token}"}
-        self.base_url = "https://huggingface.co/api"
-    def get_inference_endpoints(self) -> List[Dict]:
-        """Fetch all available inference endpoints"""
-        try:
-            # Get serverless inference API models
-            url = f"{self.base_url}/models"
-            params = {
-                "pipeline_tag": None,
-                "library": None,
-                "sort": "downloads",
-                "direction": -1,
-                "limit": 100,
-                "full": True,
-                "config": True
-            }
-            response = requests.get(url, headers=self.headers, params=params)
-            response.raise_for_status()
-            models = response.json()
-            # Filter models that support inference API
-            inference_models = []
-            for model in models:
-                if self._supports_inference_api(model):
-                    inference_models.append({
-                        "id": model.get("id", "Unknown"),
-                        "pipeline_tag": model.get("pipeline_tag", "Unknown"),
-                        "library_name": model.get("library_name", "Unknown"),
-                        "downloads": model.get("downloads", 0),
-                        "likes": model.get("likes", 0),
-                        "created_at": model.get("createdAt", "Unknown"),
-                        "updated_at": model.get("lastModified", "Unknown"),
-                        "tags": model.get("tags", []),
-                        "inference_status": self._check_inference_status(model.get("id"))
-                    })
-            return inference_models
-        except Exception as e:
-            print(f"Error fetching inference endpoints: {e}")
-            return []
-    def _supports_inference_api(self, model: Dict) -> bool:
-        """Check if a model supports the inference API"""
-        # Models with these pipeline tags typically support inference API
-        supported_pipelines = {
-            "text-generation", "text2text-generation", "fill-mask",
-            "token-classification", "question-answering", "summarization",
-            "translation", "text-classification", "conversational",
-            "image-classification", "object-detection", "image-segmentation",
-            "text-to-image", "image-to-text", "automatic-speech-recognition",
-            "audio-classification", "voice-activity-detection",
-            "depth-estimation", "feature-extraction"
-        }
-        pipeline_tag = model.get("pipeline_tag")
-        return pipeline_tag in supported_pipelines
-    def _check_inference_status(self, model_id: str) -> str:
-        """Check if inference API is currently available for a model"""
         try:
-            url = f"https://api-inference.huggingface.co/models/{model_id}"
-            response = requests.get(url, headers=self.headers, timeout=5)
-            if response.status_code == 200:
-                return "✅ Available"
-            elif response.status_code == 503:
-                return "🔄 Loading"
-            else:
-                return "❌ Unavailable"
-        except:
-            return "❓ Unknown"
-    def get_dedicated_endpoints(self) -> List[Dict]:
-        """Fetch dedicated inference endpoints (requires paid plan)"""
-        try:
-            url = f"{self.base_url}/inference-endpoints"
-            response = requests.get(url, headers=self.headers)
-            if response.status_code == 200:
-                endpoints = response.json()
-                return [{
-                    "name": ep.get("name", "Unknown"),
-                    "model_id": ep.get("model", {}).get("repository", "Unknown"),
-                    "status": ep.get("status", "Unknown"),
-                    "created_at": ep.get("created_at", "Unknown"),
-                    "updated_at": ep.get("updated_at", "Unknown"),
-                    "compute": ep.get("compute", {}),
-                    "url": ep.get("url", "")
-                } for ep in endpoints]
-            else:
-                return []
-        except Exception as e:
-            print(f"Error fetching dedicated endpoints: {e}")
-            return []
-    def test_model_inference(self, model_id: str, input_text: str = "Hello, how are you?") -> Dict:
-        """Test inference on a specific model"""
         try:
-            url = f"https://api-inference.huggingface.co/models/{model_id}"
-            # Determine appropriate payload based on model type
-            payload = {"inputs": input_text}
-            response = requests.post(url, headers=self.headers, json=payload, timeout=30)
-            if response.status_code == 200:
-                result = response.json()
-                return {
-                    "status": "success",
-                    "result": result,
-                    "response_time": response.elapsed.total_seconds()
-                }
             else:
                 return {
                     "status": "error",
-                    "error": f"HTTP {response.status_code}: {response.text}",
-                    "response_time": response.elapsed.total_seconds()
                 }
         except Exception as e:
             return {
-                "status": "error",
                 "error": str(e),
-                "response_time": None
             }
 def create_interface():
-    explorer = HuggingFaceModelExplorer()
-    def refresh_serverless_models():
-        """Refresh the list of serverless inference models"""
-        models = explorer.get_inference_endpoints()
         if not models:
-            return "No models found or error occurred"
         df = pd.DataFrame(models)
         return df
-    def refresh_dedicated_endpoints():
-        """Refresh the list of dedicated inference endpoints"""
-        endpoints = explorer.get_dedicated_endpoints()
-        if not endpoints:
-            return "No dedicated endpoints found (requires paid plan) or error occurred"
-        df = pd.DataFrame(endpoints)
         return df
     def test_model(model_id: str, test_input: str):
         """Test inference on a selected model"""
-        if not model_id.strip():
-            return "Please enter a model ID"
         if not test_input.strip():
             test_input = "Hello, how are you today?"
         result = explorer.test_model_inference(model_id, test_input)
         if result["status"] == "success":
             return f"""
 **Model:** {model_id}
 **Status:** ✅ Success
 **Response Time:** {result['response_time']:.2f}s
@@ -184,101 +583,142 @@ def create_interface():
 ```json
 {json.dumps(result['result'], indent=2)}
 ```
 """
         else:
             return f"""
 **Model:** {model_id}
 **Status:** ❌ Error
-**Response Time:** {result['response_time'] if result['response_time'] else 'N/A'}s
 **Error:**
 {result['error']}
 """
-    def search_models(query: str, pipeline_filter: str = "All"):
-        """Search models by name or tags"""
-        models = explorer.get_inference_endpoints()
-        if query:
-            models = [m for m in models if query.lower() in m['id'].lower() or
-                     any(query.lower() in tag.lower() for tag in m['tags'])]
-        if pipeline_filter != "All":
-            models = [m for m in models if m['pipeline_tag'] == pipeline_filter]
-        if not models:
-            return "No models found matching your criteria"
-        df = pd.DataFrame(models)
-        return df
     # Create Gradio interface
-    with gr.Blocks(title="🤗 HuggingFace Inference API Explorer", theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
-        # 🤗 HuggingFace Inference API Explorer
-        Explore all available models on HuggingFace Inference API providers!
-        This space showcases:
-        - **Serverless Inference API**: Free tier models available through HF's inference API
-        - **Dedicated Inference Endpoints**: Private endpoints (requires paid plan)
-        - **Model Testing**: Test any model directly from the interface
         ---
         """)
         with gr.Tabs():
-            # Serverless Models Tab
-            with gr.TabItem("🚀 Serverless Models"):
-                gr.Markdown("### Available Serverless Inference API Models")
-                with gr.Row():
-                    search_query = gr.Textbox(
-                        placeholder="Search models by name or tags...",
-                        label="Search Query"
-                    )
-                    pipeline_filter = gr.Dropdown(
-                        choices=["All", "text-generation", "text-classification", "question-answering",
-                                "summarization", "translation", "image-classification", "text-to-image"],
-                        value="All",
-                        label="Pipeline Filter"
-                    )
-                    search_btn = gr.Button("🔍 Search Models")
-                refresh_serverless_btn = gr.Button("🔄 Refresh All Models", variant="primary")
-                serverless_output = gr.Dataframe(
-                    headers=["Model ID", "Pipeline", "Library", "Downloads", "Likes", "Status"],
-                    label="Serverless Models"
                 )
-                search_btn.click(
-                    search_models,
-                    inputs=[search_query, pipeline_filter],
-                    outputs=serverless_output
                 )
-                refresh_serverless_btn.click(refresh_serverless_models, outputs=serverless_output)
-            # Dedicated Endpoints Tab
-            with gr.TabItem("🏢 Dedicated Endpoints"):
-                gr.Markdown("### Dedicated Inference Endpoints (Requires Paid Plan)")
-                refresh_dedicated_btn = gr.Button("🔄 Refresh Dedicated Endpoints", variant="primary")
-                dedicated_output = gr.Dataframe(
-                    headers=["Name", "Model ID", "Status", "Created", "URL"],
-                    label="Dedicated Endpoints"
                 )
-                refresh_dedicated_btn.click(refresh_dedicated_endpoints, outputs=dedicated_output)
             # Model Testing Tab
             with gr.TabItem("🧪 Test Models"):
-                gr.Markdown("### Test Model Inference")
                 with gr.Row():
-                    model_id_input = gr.Textbox(
-                        placeholder="e.g., microsoft/DialoGPT-medium",
-                        label="Model ID",
-                        info="Enter the full model ID from HuggingFace"
                     )
                     test_input = gr.Textbox(
                         placeholder="Hello, how are you today?",
@@ -287,77 +727,55 @@ def create_interface():
                     )
                 test_btn = gr.Button("🚀 Test Model", variant="primary")
-                test_output = gr.Markdown(label="Test Results")
                 test_btn.click(
                     test_model,
-                    inputs=[model_id_input, test_input],
                     outputs=test_output
                 )
-            # Statistics Tab
-            with gr.TabItem("📊 Statistics"):
-                gr.Markdown("### Inference API Statistics")
-                stats_btn = gr.Button("📈 Generate Statistics", variant="primary")
-                def generate_stats():
-                    models = explorer.get_inference_endpoints()
-                    if not models:
-                        return "No data available"
-                    total_models = len(models)
-                    pipelines = {}
-                    libraries = {}
-                    statuses = {}
-                    for model in models:
-                        # Count pipelines
-                        pipeline = model['pipeline_tag']
-                        pipelines[pipeline] = pipelines.get(pipeline, 0) + 1
-                        # Count libraries
-                        library = model['library_name']
-                        libraries[library] = libraries.get(library, 0) + 1
-                        # Count statuses
-                        status = model['inference_status']
-                        statuses[status] = statuses.get(status, 0) + 1
-                    # Sort by count
-                    top_pipelines = sorted(pipelines.items(), key=lambda x: x[1], reverse=True)[:10]
-                    top_libraries = sorted(libraries.items(), key=lambda x: x[1], reverse=True)[:10]
-                    stats_text = f"""
-## 📊 HuggingFace Inference API Statistics
-**Total Models Available:** {total_models}
-### Top Pipeline Tags:
-{chr(10).join([f"- **{pipeline}**: {count} models" for pipeline, count in top_pipelines])}
-### Top Libraries:
-{chr(10).join([f"- **{library}**: {count} models" for library, count in top_libraries])}
-### Inference Status Distribution:
-{chr(10).join([f"- **{status}**: {count} models" for status, count in statuses.items()])}
-*Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}*
-"""
-                    return stats_text
-                stats_output = gr.Markdown()
-                stats_btn.click(generate_stats, outputs=stats_output)
         # Footer
-        gr.Markdown("""
         ---
-        **Note:** This space requires a HuggingFace token set as the `HF_TOKEN` environment variable.
-        - 🌟 Star this space if you find it useful!
-        - 🐛 Report issues on the Community tab
-        - 📚 Learn more about [HuggingFace Inference API](https://huggingface.co/docs/api-inference/index)
         """)
     return demo
@@ -370,6 +788,6 @@ if __name__ == "__main__":
             server_port=7860,
             share=False
         )
-    except ValueError as e:
-        print(f"Error: {e}")
-        print("Please set the HF_TOKEN environment variable with your HuggingFace token.")

 import time
 from datetime import datetime
+# Updated dictionary of allowed models with current HF Inference Providers
+ALLOWED_MODELS = {
+    # Text Generation Models - HF Inference API
+    "microsoft/DialoGPT-medium": {
+        "provider": "HF Inference",
+        "pipeline": "text-generation",
+        "description": "Conversational AI model for dialog generation",
+        "endpoint": "https://api-inference.huggingface.co/models/microsoft/DialoGPT-medium",
+        "api_format": "hf_inference"
+    },
+    "meta-llama/Llama-3.1-8B-Instruct": {
+        "provider": "HF Inference",
+        "pipeline": "text-generation",
+        "description": "Meta's Llama 3.1 8B Instruct model",
+        "endpoint": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct",
+        "api_format": "hf_inference"
+    },
+    "deepseek-ai/DeepSeek-V3-0324": {
+        "provider": "HF Inference",
+        "pipeline": "text-generation",
+        "description": "DeepSeek V3 state-of-the-art conversational model",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions",
+        "api_format": "openai_compatible"
+    },
+    # Cerebras Models (Chat completion LLM only)
+    "meta-llama/Llama-3.3-70B-Instruct": {
+        "provider": "Cerebras",
+        "pipeline": "text-generation",
+        "description": "Meta's Llama 3.3 70B Instruct model via Cerebras ultra-fast LPUs",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions",
+        "api_format": "openai_compatible"
+    },
+    # Cohere Models (Chat completion LLM + VLM)
+    "cohere/command-r-plus": {
+        "provider": "Cohere",
+        "pipeline": "text-generation",
+        "description": "Cohere's Command R+ enterprise-grade NLP model",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions",
+        "api_format": "openai_compatible"
+    },
+    # Fal AI Models (Text-to-Image, Text-to-Video, Speech-to-Text)
+    "black-forest-labs/FLUX.1-schnell": {
+        "provider": "Fal AI",
+        "pipeline": "text-to-image",
+        "description": "FLUX.1 schnell model for fast image generation via Fal AI",
+        "endpoint": "https://router.huggingface.co/v1/text-to-image",
+        "api_format": "hf_router"
+    },
+    # Featherless AI Models (Chat completion LLM + VLM)
+    "meta-llama/Llama-3.1-70B-Instruct": {
+        "provider": "Featherless AI",
+        "pipeline": "text-generation",
+        "description": "Meta's Llama 3.1 70B Instruct via Featherless AI",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions",
+        "api_format": "openai_compatible"
+    },
+    # Fireworks Models (Chat completion LLM + VLM)
+    "accounts/fireworks/models/llama-v3p1-8b-instruct": {
+        "provider": "Fireworks",
+        "pipeline": "text-generation",
+        "description": "Llama 3.1 8B Instruct via Fireworks AI production-ready serving",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions",
+        "api_format": "openai_compatible"
+    },
+    # Groq Models (Chat completion LLM only)
+    "deepseek-ai/DeepSeek-R1": {
+        "provider": "Groq",
+        "pipeline": "text-generation",
+        "description": "DeepSeek R1 model via Groq hardware acceleration",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions",
+        "api_format": "openai_compatible"
+    },
+    # Hyperbolic Models (Chat completion LLM + VLM)
+    "meta-llama/Meta-Llama-3-8B-Instruct": {
+        "provider": "Hyperbolic",
+        "pipeline": "text-generation",
+        "description": "Meta's Llama 3 8B Instruct via Hyperbolic",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions",
+        "api_format": "openai_compatible"
+    },
+    # Nebius Models (Chat completion LLM + VLM, Feature Extraction, Text-to-Image)
+    "mistralai/Mixtral-8x7B-Instruct-v0.1": {
+        "provider": "Nebius",
+        "pipeline": "text-generation",
+        "description": "Mistral's Mixtral 8x7B Instruct via Nebius cloud platform",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions",
+        "api_format": "openai_compatible"
+    },
+    # Novita Models (Chat completion LLM + VLM, Text-to-Video)
+    "Qwen/Qwen2.5-72B-Instruct": {
+        "provider": "Novita",
+        "pipeline": "text-generation",
+        "description": "Qwen 2.5 72B Instruct via Novita",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions",
+        "api_format": "openai_compatible"
+    },
+    # Nscale Models (Chat completion LLM + VLM, Feature Extraction, Text-to-Image)
+    "microsoft/Phi-3-medium-4k-instruct": {
+        "provider": "Nscale",
+        "pipeline": "text-generation",
+        "description": "Microsoft Phi-3 Medium via Nscale",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions",
+        "api_format": "openai_compatible"
+    },
+    # Replicate Models (Text-to-Image, Text-to-Video, Speech-to-Text)
+    "stabilityai/stable-diffusion-xl-base-1.0": {
+        "provider": "Replicate",
+        "pipeline": "text-to-image",
+        "description": "Stable Diffusion XL via Replicate cloud platform",
+        "endpoint": "https://router.huggingface.co/v1/text-to-image",
+        "api_format": "hf_router"
+    },
+    # SambaNova Models (Chat completion LLM, Feature Extraction)
+    "meta-llama/Meta-Llama-3.1-405B-Instruct": {
+        "provider": "SambaNova",
+        "pipeline": "text-generation",
+        "description": "Meta's Llama 3.1 405B Instruct via SambaNova",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions",
+        "api_format": "openai_compatible"
+    },
+    # Together AI Models (Chat completion LLM + VLM, Text-to-Image)
+    "meta-llama/Meta-Llama-3-70B-Instruct": {
+        "provider": "Together",
+        "pipeline": "text-generation",
+        "description": "Meta's Llama 3 70B Instruct via Together AI high-performance inference",
+        "endpoint": "https://router.huggingface.co/v1/chat/completions",
+        "api_format": "openai_compatible"
+    },
+    # HF Inference - Additional Models for various tasks
+    "black-forest-labs/FLUX.1-dev": {
+        "provider": "HF Inference",
+        "pipeline": "text-to-image",
+        "description": "FLUX.1 development model for high-quality text-to-image generation",
+        "endpoint": "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev",
+        "api_format": "hf_inference"
+    },
+    "openai/whisper-large-v3": {
+        "provider": "HF Inference",
+        "pipeline": "automatic-speech-recognition",
+        "description": "Whisper Large V3 for speech recognition",
+        "endpoint": "https://api-inference.huggingface.co/models/openai/whisper-large-v3",
+        "api_format": "hf_inference"
+    },
+    "sentence-transformers/all-MiniLM-L6-v2": {
+        "provider": "HF Inference",
+        "pipeline": "feature-extraction",
+        "description": "Sentence transformer for embeddings and semantic search",
+        "endpoint": "https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2",
+        "api_format": "hf_inference"
+    },
+    "cardiffnlp/twitter-roberta-base-sentiment-latest": {
+        "provider": "HF Inference",
+        "pipeline": "text-classification",
+        "description": "Sentiment analysis model trained on Twitter data",
+        "endpoint": "https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment-latest",
+        "api_format": "hf_inference"
+    }
+}
+# Updated provider configuration for current HF Inference Providers ecosystem
+PROVIDER_CONFIG = {
+    "HF Inference": {
+        "description": "HuggingFace's native serverless inference API",
+        "auth_header": "Authorization",
+        "auth_format": "Bearer {token}",
+        "env_var": "HF_TOKEN",
+        "base_url": "https://api-inference.huggingface.co",
+        "pricing": "Free tier + pay-per-use",
+        "docs_url": "https://huggingface.co/docs/inference-providers/providers/hf-inference",
+        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Feature Extraction", "Text to Image", "Speech to text"]
+    },
+    "Cerebras": {
+        "description": "Ultra-fast inference with Language Processing Units (LPUs)",
+        "auth_header": "Authorization",
+        "auth_format": "Bearer {token}",
+        "env_var": "HF_TOKEN",
+        "base_url": "https://router.huggingface.co/v1",
+        "pricing": "Pay-per-token via HF routing",
+        "docs_url": "https://huggingface.co/docs/inference-providers/providers/cerebras",
+        "capabilities": ["Chat completion (LLM)"]
+    },
+    "Cohere": {
+        "description": "Enterprise-grade NLP models and APIs",
+        "auth_header": "Authorization",
+        "auth_format": "Bearer {token}",
+        "env_var": "HF_TOKEN",
+        "base_url": "https://router.huggingface.co/v1",
+        "pricing": "Pay-per-token via HF routing",
+        "docs_url": "https://huggingface.co/docs/inference-providers/providers/cohere",
+        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
+    },
+    "Fal AI": {
+        "description": "Fast and reliable model inference platform",
+        "auth_header": "Authorization",
+        "auth_format": "Bearer {token}",
+        "env_var": "HF_TOKEN",
+        "base_url": "https://router.huggingface.co/v1",
+        "pricing": "Pay-per-token via HF routing",
+        "docs_url": "https://huggingface.co/docs/inference-providers/providers/fal-ai",
+        "capabilities": ["Text to Image", "Text to video", "Speech to text"]
+    },
+    "Featherless AI": {
+        "description": "Optimized inference for open-source models",
+        "auth_header": "Authorization",
+        "auth_format": "Bearer {token}",
+        "env_var": "HF_TOKEN",
+        "base_url": "https://router.huggingface.co/v1",
+        "pricing": "Pay-per-token via HF routing",
+        "docs_url": "https://huggingface.co/docs/inference-providers/providers/featherless-ai",
+        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
+    },
+    "Fireworks": {
+        "description": "Production-ready inference with fast model serving",
+        "auth_header": "Authorization",
+        "auth_format": "Bearer {token}",
+        "env_var": "HF_TOKEN",
+        "base_url": "https://router.huggingface.co/v1",
+        "pricing": "Pay-per-token via HF routing",
+        "docs_url": "https://huggingface.co/docs/inference-providers/providers/fireworks-ai",
+        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
+    },
+    "Groq": {
+        "description": "Fast inference with specialized hardware acceleration",
+        "auth_header": "Authorization",
+        "auth_format": "Bearer {token}",
+        "env_var": "HF_TOKEN",
+        "base_url": "https://router.huggingface.co/v1",
+        "pricing": "Pay-per-token via HF routing",
+        "docs_url": "https://huggingface.co/docs/inference-providers/providers/groq",
+        "capabilities": ["Chat completion (LLM)"]
+    },
+    "Hyperbolic": {
+        "description": "GPU-accelerated inference platform",
+        "auth_header": "Authorization",
+        "auth_format": "Bearer {token}",
+        "env_var": "HF_TOKEN",
+        "base_url": "https://router.huggingface.co/v1",
+        "pricing": "Pay-per-token via HF routing",
+        "docs_url": "https://huggingface.co/docs/inference-providers/providers/hyperbolic",
+        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)"]
+    },
+    "Nebius": {
+        "description": "Cloud-based AI infrastructure platform",
+        "auth_header": "Authorization",
+        "auth_format": "Bearer {token}",
+        "env_var": "HF_TOKEN",
+        "base_url": "https://router.huggingface.co/v1",
+        "pricing": "Pay-per-token via HF routing",
+        "docs_url": "https://huggingface.co/docs/inference-providers/providers/nebius",
+        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Feature Extraction", "Text to Image"]
+    },
+    "Novita": {
+        "description": "AI inference platform with video generation",
+        "auth_header": "Authorization",
+        "auth_format": "Bearer {token}",
+        "env_var": "HF_TOKEN",
+        "base_url": "https://router.huggingface.co/v1",
+        "pricing": "Pay-per-token via HF routing",
+        "docs_url": "https://huggingface.co/docs/inference-providers/providers/novita",
+        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Text to video"]
+    },
+    "Nscale": {
+        "description": "Scalable AI model deployment platform",
+        "auth_header": "Authorization",
+        "auth_format": "Bearer {token}",
+        "env_var": "HF_TOKEN",
+        "base_url": "https://router.huggingface.co/v1",
+        "pricing": "Pay-per-token via HF routing",
+        "docs_url": "https://huggingface.co/docs/inference-providers/providers/nscale",
+        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Feature Extraction", "Text to Image"]
+    },
+    "Replicate": {
+        "description": "Run models in the cloud with simple API",
+        "auth_header": "Authorization",
+        "auth_format": "Bearer {token}",
+        "env_var": "HF_TOKEN",
+        "base_url": "https://router.huggingface.co/v1",
+        "pricing": "Pay-per-token via HF routing",
+        "docs_url": "https://huggingface.co/docs/inference-providers/providers/replicate",
+        "capabilities": ["Text to Image", "Text to video", "Speech to text"]
+    },
+    "SambaNova": {
+        "description": "Enterprise AI platform with DataFlow architecture",
+        "auth_header": "Authorization",
+        "auth_format": "Bearer {token}",
+        "env_var": "HF_TOKEN",
+        "base_url": "https://router.huggingface.co/v1",
+        "pricing": "Pay-per-token via HF routing",
+        "docs_url": "https://huggingface.co/docs/inference-providers/providers/sambanova",
+        "capabilities": ["Chat completion (LLM)", "Feature Extraction"]
+    },
+    "Together": {
+        "description": "High-performance inference for open-source models",
+        "auth_header": "Authorization",
+        "auth_format": "Bearer {token}",
+        "env_var": "HF_TOKEN",
+        "base_url": "https://router.huggingface.co/v1",
+        "pricing": "Pay-per-token via HF routing",
+        "docs_url": "https://huggingface.co/docs/inference-providers/providers/together",
+        "capabilities": ["Chat completion (LLM)", "Chat completion (VLM)", "Text to Image"]
+    }
+}
+class ModernHFInferenceExplorer:
     def __init__(self):
+        self.allowed_models = ALLOWED_MODELS
+        self.provider_config = PROVIDER_CONFIG
         self.hf_token = os.getenv("HF_TOKEN")
         if not self.hf_token:
+            raise ValueError("HF_TOKEN environment variable is required for HuggingFace Inference Providers")
         self.headers = {"Authorization": f"Bearer {self.hf_token}"}
+    def get_available_models(self) -> List[Dict]:
+        """Get the predefined allowed models with provider info and live status"""
+        models = []
+        for model_id, model_info in self.allowed_models.items():
+            provider = model_info["provider"]
+            models.append({
+                "model_id": model_id,
+                "provider": provider,
+                "pipeline": model_info["pipeline"],
+                "description": model_info["description"],
+                "endpoint": model_info["endpoint"],
+                "api_format": model_info["api_format"],
+                "status": self._check_model_status(model_id, provider),
+                "pricing": self.provider_config[provider]["pricing"]
+            })
+        return models
+    def _check_model_status(self, model_id: str, provider: str) -> str:
+        """Check if a specific model is currently available via HF Inference Providers"""
         try:
+            # For models using the new HF Router API
+            if provider in ["Cerebras", "Groq", "Together", "Fireworks", "Replicate", "Cohere", "Fal AI"]:
+                # Use the models endpoint to check availability
+                url = "https://router.huggingface.co/v1/models"
+                response = requests.get(url, headers=self.headers, timeout=5)
+                if response.status_code == 200:
+                    available_models = response.json()
+                    if isinstance(available_models, dict) and "data" in available_models:
+                        model_ids = [m["id"] for m in available_models["data"]]
+                        return "✅ Available" if model_id in model_ids else "❓ Check Provider"
+                    return "✅ Available"
+                else:
+                    return "❓ Unknown"
+            # For traditional HF Inference API models
+            elif provider == "HF Inference":
+                url = f"https://api-inference.huggingface.co/models/{model_id}"
+                response = requests.get(url, headers=self.headers, timeout=5)
+                if response.status_code == 200:
+                    return "✅ Available"
+                elif response.status_code == 503:
+                    return "🔄 Loading"
+                else:
+                    return "❌ Unavailable"
+            return "❓ Unknown"
+        except Exception:
+            return "❓ Connection Error"
+    def test_model_inference(self, model_id: str, input_text: str) -> Dict:
+        """Test inference on a specific allowed model using current HF Inference Providers API"""
+        if model_id not in self.allowed_models:
+            return {
+                "status": "error",
+                "error": f"Model '{model_id}' is not in the allowed models list",
+                "response_time": None
+            }
+        model_info = self.allowed_models[model_id]
+        api_format = model_info["api_format"]
         try:
+            start_time = time.time()
+            if api_format == "openai_compatible":
+                # Use the new OpenAI-compatible chat completions endpoint
+                result = self._test_openai_compatible_model(model_id, input_text)
+            elif api_format == "hf_inference":
+                # Use traditional HF Inference API
+                result = self._test_hf_inference_model(model_id, input_text, model_info)
+            elif api_format == "hf_router":
+                # Use HF Router for other tasks
+                result = self._test_hf_router_model(model_id, input_text, model_info)
             else:
                 return {
                     "status": "error",
+                    "error": f"Unsupported API format: {api_format}",
+                    "response_time": None
                 }
+            result["response_time"] = time.time() - start_time
+            return result
         except Exception as e:
             return {
+                "status": "error",
                 "error": str(e),
+                "response_time": time.time() - start_time if 'start_time' in locals() else None
+            }
+    def _test_openai_compatible_model(self, model_id: str, input_text: str) -> Dict:
+        """Test model using OpenAI-compatible chat completions API"""
+        url = "https://router.huggingface.co/v1/chat/completions"
+        payload = {
+            "model": model_id,
+            "messages": [
+                {"role": "user", "content": input_text}
+            ],
+            "max_tokens": 100,
+            "temperature": 0.7
+        }
+        response = requests.post(url, headers=self.headers, json=payload, timeout=30)
+        if response.status_code == 200:
+            return {
+                "status": "success",
+                "result": response.json()
+            }
+        else:
+            return {
+                "status": "error",
+                "error": f"HTTP {response.status_code}: {response.text}"
+            }
+    def _test_hf_inference_model(self, model_id: str, input_text: str, model_info: Dict) -> Dict:
+        """Test model using traditional HF Inference API"""
+        url = model_info["endpoint"]
+        # Adjust payload based on pipeline type
+        pipeline = model_info["pipeline"]
+        if pipeline in ["text-generation", "text2text-generation"]:
+            payload = {"inputs": input_text, "parameters": {"max_new_tokens": 100}}
+        elif pipeline == "text-to-image":
+            payload = {"inputs": input_text}
+        elif pipeline == "feature-extraction":
+            payload = {"inputs": input_text}
+        else:
+            payload = {"inputs": input_text}
+        response = requests.post(url, headers=self.headers, json=payload, timeout=30)
+        if response.status_code == 200:
+            return {
+                "status": "success",
+                "result": response.json()
             }
+        else:
+            return {
+                "status": "error",
+                "error": f"HTTP {response.status_code}: {response.text}"
+            }
+    def _test_hf_router_model(self, model_id: str, input_text: str, model_info: Dict) -> Dict:
+        """Test model using HF Router API for specialized tasks"""
+        pipeline = model_info["pipeline"]
+        if pipeline == "text-to-image":
+            # Use the text-to-image endpoint via HF Router
+            payload = {
+                "model": model_id,
+                "prompt": input_text,
+                "num_inference_steps": 20
+            }
+            # Note: This would need to be implemented based on actual HF Router text-to-image API
+            return {
+                "status": "info",
+                "result": "Text-to-image testing via HF Router not fully implemented in demo"
+            }
+        return {
+            "status": "error",
+            "error": f"HF Router testing not implemented for pipeline: {pipeline}"
+        }
 def create_interface():
+    try:
+        explorer = ModernHFInferenceExplorer()
+    except ValueError as e:
+        # Create a dummy interface that shows the error
+        with gr.Blocks(title="❌ Configuration Error") as demo:
+            gr.Markdown(f"""
+            # ❌ Configuration Error
+            **Error:** {str(e)}
+            Please set the `HF_TOKEN` environment variable with your HuggingFace token.
+            You can get a token from: https://huggingface.co/settings/tokens
+            """)
+        return demo
+    def get_models_by_provider(provider_filter: str = "All"):
+        """Get models filtered by provider"""
+        models = explorer.get_available_models()
+        if provider_filter != "All":
+            models = [m for m in models if m['provider'] == provider_filter]
         if not models:
+            return "No models found for the selected provider"
         df = pd.DataFrame(models)
         return df
+    def get_models_by_pipeline(pipeline_filter: str = "All"):
+        """Get models filtered by pipeline"""
+        models = explorer.get_available_models()
+        if pipeline_filter != "All":
+            models = [m for m in models if m['pipeline'] == pipeline_filter]
+        if not models:
+            return "No models found for the selected pipeline"
+        df = pd.DataFrame(models)
         return df
     def test_model(model_id: str, test_input: str):
         """Test inference on a selected model"""
+        if not model_id or model_id.strip() == "":
+            return "Please select a model ID from the dropdown"
+        if model_id not in explorer.allowed_models:
+            available_models = "\n".join([f"- {mid}" for mid in explorer.allowed_models.keys()])
+            return f"""
+**Error:** Model '{model_id}' is not in the allowed models list.
+**Available models:**
+{available_models}
+"""
         if not test_input.strip():
             test_input = "Hello, how are you today?"
         result = explorer.test_model_inference(model_id, test_input)
+        model_info = explorer.allowed_models[model_id]
         if result["status"] == "success":
             return f"""
 **Model:** {model_id}
+**Provider:** {model_info['provider']}
+**Pipeline:** {model_info['pipeline']}
+**API Format:** {model_info['api_format']}
 **Status:** ✅ Success
 **Response Time:** {result['response_time']:.2f}s
 ```json
 {json.dumps(result['result'], indent=2)}
 ```
+"""
+        elif result["status"] == "info":
+            return f"""
+**Model:** {model_id}
+**Provider:** {model_info['provider']}
+**Pipeline:** {model_info['pipeline']}
+**Status:** ℹ️ Info
+**Response Time:** {result['response_time']:.2f}s if result['response_time'] else 'N/A'
+**Info:**
+{result['result']}
 """
         else:
             return f"""
 **Model:** {model_id}
+**Provider:** {model_info['provider']}
+**Pipeline:** {model_info['pipeline']}
 **Status:** ❌ Error
+**Response Time:** {result['response_time']:.2f}s if result['response_time'] else 'N/A'
 **Error:**
 {result['error']}
 """
+    def get_provider_status():
+        """Get comprehensive status of all providers"""
+        status_info = []
+        for provider, config in explorer.provider_config.items():
+            model_count = len([m for m in explorer.allowed_models.values() if m["provider"] == provider])
+            capabilities_str = ", ".join(config.get("capabilities", ["N/A"]))
+            status_info.append({
+                "Provider": provider,
+                "Description": config["description"],
+                "Capabilities": capabilities_str,
+                "Models Available": model_count,
+                "Pricing": config["pricing"],
+                "Documentation": config["docs_url"]
+            })
+        return pd.DataFrame(status_info)
+    # Get unique providers and pipelines for filters
+    providers = ["All"] + list(set(model["provider"] for model in explorer.allowed_models.values()))
+    pipelines = ["All"] + list(set(model["pipeline"] for model in explorer.allowed_models.values()))
+    model_ids = list(explorer.allowed_models.keys())
     # Create Gradio interface
+    with gr.Blocks(title="🤗 HuggingFace Inference Providers Explorer", theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
+        # 🤗 HuggingFace Inference Providers Explorer
+        **Modern Inference Ecosystem**: Explore models from HuggingFace's unified inference providers platform!
+        ## 🚀 Current Inference Providers:
+        - **HF Inference**: Native serverless inference API (free tier available)
+        - **Cerebras**: Ultra-fast LPU-powered inference
+        - **Groq**: Hardware-accelerated language processing
+        - **Together AI**: High-performance open-source models
+        - **Fireworks AI**: Production-ready model serving
+        - **Replicate**: Cloud-based model deployment
+        - **Cohere**: Enterprise NLP models
+        - **Fal AI**: Fast and reliable inference
+        All providers use **HuggingFace routing** with unified billing and authentication!
         ---
         """)
         with gr.Tabs():
+            # Provider Status Tab
+            with gr.TabItem("🏢 Provider Overview"):
+                gr.Markdown("### HuggingFace Inference Providers Status")
+                status_btn = gr.Button("📊 View Provider Details", variant="primary")
+                provider_status_output = gr.Dataframe(
+                    headers=["Provider", "Description", "Capabilities", "Models", "Pricing", "Documentation"],
+                    label="Provider Information"
+                )
+                status_btn.click(get_provider_status, outputs=provider_status_output)
+            # Models by Provider Tab
+            with gr.TabItem("🔍 Browse by Provider"):
+                gr.Markdown("### Models Available by Provider")
+                provider_filter = gr.Dropdown(
+                    choices=providers,
+                    value="All",
+                    label="Select Provider"
+                )
+                provider_models_btn = gr.Button("📋 Show Models", variant="primary")
+                provider_models_output = gr.Dataframe(
+                    headers=["Model ID", "Provider", "Pipeline", "Description", "API Format", "Status", "Pricing"],
+                    label="Models by Provider"
                 )
+                provider_models_btn.click(
+                    get_models_by_provider,
+                    inputs=provider_filter,
+                    outputs=provider_models_output
                 )
+            # Models by Pipeline Tab
+            with gr.TabItem("⚙️ Browse by Task"):
+                gr.Markdown("### Models Available by Task/Pipeline")
+                pipeline_filter = gr.Dropdown(
+                    choices=pipelines,
+                    value="All",
+                    label="Select Task/Pipeline"
                 )
+                pipeline_models_btn = gr.Button("📋 Show Models", variant="primary")
+                pipeline_models_output = gr.Dataframe(
+                    headers=["Model ID", "Provider", "Pipeline", "Description", "API Format", "Status"],
+                    label="Models by Task"
+                )
+                pipeline_models_btn.click(
+                    get_models_by_pipeline,
+                    inputs=pipeline_filter,
+                    outputs=pipeline_models_output
+                )
             # Model Testing Tab
             with gr.TabItem("🧪 Test Models"):
+                gr.Markdown("### Test Live Model Inference")
                 with gr.Row():
+                    model_id_dropdown = gr.Dropdown(
+                        choices=model_ids,
+                        label="Select Model",
+                        info="Choose from curated inference provider models"
                     )
                     test_input = gr.Textbox(
                         placeholder="Hello, how are you today?",
                     )
                 test_btn = gr.Button("🚀 Test Model", variant="primary")
+                test_output = gr.Markdown(label="Inference Results")
                 test_btn.click(
                     test_model,
+                    inputs=[model_id_dropdown, test_input],
                     outputs=test_output
                 )
+            # All Models Tab
+            with gr.TabItem("📊 All Available Models"):
+                gr.Markdown("### Complete Model Catalog")
+                all_models_btn = gr.Button("📋 Load All Models", variant="primary")
+                all_models_output = gr.Dataframe(
+                    headers=["Model ID", "Provider", "Pipeline", "Description", "API Format", "Status", "Pricing"],
+                    label="Complete Model Catalog"
+                )
+                all_models_btn.click(
+                    lambda: get_models_by_provider("All"),
+                    outputs=all_models_output
+                )
         # Footer
+        gr.Markdown(f"""
         ---
+        ## 🔧 Setup Instructions:
+        1. **Get HuggingFace Token**: Visit [HF Settings](https://huggingface.co/settings/tokens)
+        2. **Set Environment Variable**: `export HF_TOKEN=hf_your_token_here`
+        3. **Start Testing**: All providers use unified HF authentication!
+        ## 📋 Current Statistics:
+        - **Total Models**: {len(explorer.allowed_models)}
+        - **Providers**: {len(explorer.provider_config)}
+        - **Pipelines**: {len(set(model['pipeline'] for model in explorer.allowed_models.values()))}
+        ## 🔗 Useful Links:
+        - 📚 [Inference Providers Docs](https://huggingface.co/docs/inference-providers/index)
+        - 💰 [Pricing Information](https://huggingface.co/docs/inference-providers/pricing-and-billing)
+        - 🔑 [Authentication Guide](https://huggingface.co/docs/inference-providers/get-started#authentication)
+        - 🌟 [Provider Comparison](https://huggingface.co/inference-providers/models)
+        ---
+        *Powered by HuggingFace Inference Providers - Unified access to the best AI models!*
         """)
     return demo
             server_port=7860,
             share=False
         )
+    except Exception as e:
+        print(f"Error starting application: {e}")
+        print("Please ensure HF_TOKEN environment variable is set.")