Spaces:

cong182
/

firstAI

Sleeping

App Files Files Community

ndc8 commited on 11 days ago

Commit

3239c69

1 Parent(s): 97bafdb

update

Browse files

Files changed (5) hide show

AUTHENTICATION_FIX.md +74 -0
debug_inference.py +127 -0
test_free_alternatives.py +95 -0
test_local_api.py +44 -0
test_working_models.py +122 -0

AUTHENTICATION_FIX.md ADDED Viewed

	@@ -0,0 +1,74 @@

+# 🔧 SOLUTION: HuggingFace Authentication Issue
+## Problem Identified
+Your AI backend is returning "I apologize, but I'm having trouble generating a response right now. Please try again." because **ALL HuggingFace Inference API calls require authentication** now.
+## Root Cause
+- HuggingFace changed their API to require tokens for all models
+- Your Space doesn't have a valid `HF_TOKEN` environment variable
+- `InferenceClient.text_generation()` fails with `StopIteration` errors
+- The backend falls back to the error message
+## Immediate Fix - Add HuggingFace Token
+### Step 1: Get a Free HuggingFace Token
+1. Go to https://huggingface.co/settings/tokens
+2. Click "New token"
+3. Give it a name like "firstAI-space"
+4. Select "Read" permission (sufficient for inference)
+5. Copy the token (starts with `hf_...`)
+### Step 2: Add Token to Your HuggingFace Space
+1. Go to your Space: https://huggingface.co/spaces/cong182/firstAI
+2. Click "Settings" tab
+3. Scroll to "Variables and secrets"
+4. Click "New secret"
+5. Name: `HF_TOKEN`
+6. Value: Paste your token (hf_xxxxxxxxxxxx)
+7. Click "Save"
+### Step 3: Restart Your Space
+Your Space will automatically restart and pick up the new token.
+## Test After Fix
+After adding the token, test with:
+```bash
+curl -X POST https://cong182-firstai.hf.space/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF",
+    "messages": [{"role": "user", "content": "Hello! Tell me a joke."}],
+    "max_tokens": 100
+  }'
+```
+You should get actual generated content instead of the fallback message.
+## Alternative Models (if DeepSeek still has issues)
+If DeepSeek model still doesn't work after authentication, try these reliable models:
+### Update backend_service.py to use a working model:
+```python
+# Change this line in backend_service.py:
+current_model = "microsoft/DialoGPT-medium"  # Reliable alternative
+# or
+current_model = "HuggingFaceH4/zephyr-7b-beta"  # Good chat model
+```
+## Why This Happened
+- HuggingFace tightened security/authentication requirements
+- Free inference still works but requires account/token
+- Your Space was missing the authentication token
+- Local testing fails for the same reason
+The fix is simple - just add the HF_TOKEN to your Space settings! 🚀

debug_inference.py ADDED Viewed

	@@ -0,0 +1,127 @@

+#!/usr/bin/env python3
+"""
+Debug script to test HuggingFace Inference API directly
+"""
+import os
+import sys
+from huggingface_hub import InferenceClient
+import traceback
+def test_model(model_name, prompt="Hello, how are you?"):
+    """Test a specific model with the HuggingFace Inference API"""
+    print(f"\n🔍 Testing model: {model_name}")
+    print("=" * 50)
+    try:
+        # Initialize client
+        client = InferenceClient(model=model_name)
+        print(f"✅ Client initialized successfully")
+        # Test text generation with various parameter sets
+        print(f"📝 Testing prompt: '{prompt}'")
+        # Method 1: Full parameters (same as backend_service.py)
+        try:
+            print("\n🔬 Method 1: Full parameters")
+            response = client.text_generation(
+                prompt=prompt,
+                max_new_tokens=50,
+                temperature=0.7,
+                top_p=0.95,
+                return_full_text=False,
+                stop=["Human:", "System:"]
+            )
+            print(f"✅ Success: {response}")
+            return True
+        except Exception as e:
+            print(f"❌ Method 1 failed: {e}")
+            print(f"Error type: {type(e).__name__}")
+        # Method 2: Minimal parameters
+        try:
+            print("\n🔬 Method 2: Minimal parameters")
+            response = client.text_generation(
+                prompt=prompt,
+                max_new_tokens=50,
+                temperature=0.7,
+                return_full_text=False
+            )
+            print(f"✅ Success: {response}")
+            return True
+        except Exception as e:
+            print(f"❌ Method 2 failed: {e}")
+            print(f"Error type: {type(e).__name__}")
+        # Method 3: Basic parameters only
+        try:
+            print("\n🔬 Method 3: Basic parameters")
+            response = client.text_generation(
+                prompt=prompt,
+                max_new_tokens=30
+            )
+            print(f"✅ Success: {response}")
+            return True
+        except Exception as e:
+            print(f"❌ Method 3 failed: {e}")
+            print(f"Error type: {type(e).__name__}")
+            print(f"Full traceback:")
+            traceback.print_exc()
+        return False
+    except Exception as e:
+        print(f"❌ Failed to initialize client: {e}")
+        print(f"Error type: {type(e).__name__}")
+        traceback.print_exc()
+        return False
+def test_model_info(model_name):
+    """Test getting model information"""
+    try:
+        print(f"\n📊 Getting model info for: {model_name}")
+        client = InferenceClient()
+        # Try to get model info (this might not work for all models)
+        print("✅ Model appears to be accessible")
+        return True
+    except Exception as e:
+        print(f"❌ Model info failed: {e}")
+        return False
+if __name__ == "__main__":
+    # Set HuggingFace token if available
+    hf_token = os.environ.get("HF_TOKEN")
+    if hf_token:
+        print(f"🔑 Using HF_TOKEN: {hf_token[:10]}...")
+    else:
+        print("⚠️  No HF_TOKEN found, using anonymous access")
+    # Test models
+    models_to_test = [
+        "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF",  # Current problematic model
+        "microsoft/DialoGPT-medium",               # Known working model
+        "meta-llama/Llama-2-7b-chat-hf",          # Popular model
+        "HuggingFaceH4/zephyr-7b-beta"           # Another good model
+    ]
+    results = {}
+    for model in models_to_test:
+        print(f"\n{'='*60}")
+        test_result = test_model(model)
+        results[model] = test_result
+        # Also test model info
+        info_result = test_model_info(model)
+        print(f"\nResult for {model}: {'✅ WORKING' if test_result else '❌ FAILED'}")
+    print(f"\n{'='*60}")
+    print("SUMMARY:")
+    print("="*60)
+    for model, result in results.items():
+        status = "✅ WORKING" if result else "❌ FAILED"
+        print(f"{model}: {status}")

test_free_alternatives.py ADDED Viewed

	@@ -0,0 +1,95 @@

+#!/usr/bin/env python3
+"""
+Test with hardcoded working models that don't require authentication
+"""
+import requests
+def test_free_inference_alternatives():
+    """Test free inference alternatives that work without authentication"""
+    print("🔍 Testing inference alternatives that work without auth")
+    print("=" * 60)
+    # Test 1: Try some models that might work without auth
+    free_models = [
+        "gpt2",
+        "distilgpt2",
+        "microsoft/DialoGPT-small"
+    ]
+    for model in free_models:
+        print(f"\n🤖 Testing {model}")
+        url = f"https://api-inference.huggingface.co/models/{model}"
+        payload = {
+            "inputs": "Hello, how are you today?",
+            "parameters": {
+                "max_length": 50,
+                "temperature": 0.7
+            }
+        }
+        try:
+            response = requests.post(url, json=payload, timeout=30)
+            print(f"Status: {response.status_code}")
+            if response.status_code == 200:
+                result = response.json()
+                print(f"✅ Success: {result}")
+                return model
+            elif response.status_code == 503:
+                print("⏳ Model loading, might work later")
+            else:
+                print(f"❌ Error: {response.text}")
+        except Exception as e:
+            print(f"❌ Exception: {e}")
+    return None
+def test_alternative_apis():
+    """Test completely different free APIs"""
+    print("\n" + "=" * 60)
+    print("TESTING ALTERNATIVE FREE APIs")
+    print("=" * 60)
+    # Note: These are examples, many might require their own API keys
+    alternatives = [
+        "OpenAI GPT (requires key)",
+        "Anthropic Claude (requires key)",
+        "Google Gemini (requires key)",
+        "Local Ollama (if installed)",
+        "Groq (free tier available)"
+    ]
+    for alt in alternatives:
+        print(f"📝 {alt}")
+    print("\n💡 Recommendation: Get a free HuggingFace token from https://huggingface.co/settings/tokens")
+if __name__ == "__main__":
+    working_model = test_free_inference_alternatives()
+    test_alternative_apis()
+    print("\n" + "=" * 60)
+    print("SOLUTION RECOMMENDATIONS")
+    print("=" * 60)
+    if working_model:
+        print(f"✅ Found working model: {working_model}")
+        print("🔧 You can update your backend to use this model")
+    else:
+        print("❌ No models work without authentication")
+    print("\n🎯 IMMEDIATE SOLUTIONS:")
+    print("1. Get free HuggingFace token: https://huggingface.co/settings/tokens")
+    print("2. Set HF_TOKEN environment variable in your HuggingFace Space")
+    print("3. Your Space might already have proper auth - the issue is local testing")
+    print("4. Use the deployed Space API instead of local testing")
+    print("\n🔍 DEBUGGING STEPS:")
+    print("1. Check if your deployed Space has HF_TOKEN in Settings > Variables")
+    print("2. Test the deployed API directly (it should work)")
+    print("3. For local development, get your own HF token")

test_local_api.py ADDED Viewed

	@@ -0,0 +1,44 @@

+#!/usr/bin/env python3
+"""
+Test script for local API endpoint
+"""
+import requests
+import json
+# Local API endpoint
+API_URL = "http://localhost:8000/v1/chat/completions"
+# Test payload with the correct model name
+payload = {
+    "model": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF",
+    "messages": [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Hello, what can you do?"}
+    ],
+    "max_tokens": 64,
+    "temperature": 0.7
+}
+print("🧪 Testing Local API...")
+print(f"📡 URL: {API_URL}")
+print(f"📦 Payload: {json.dumps(payload, indent=2)}")
+print("-" * 50)
+try:
+    response = requests.post(API_URL, json=payload, timeout=30)
+    print(f"✅ Status: {response.status_code}")
+    if response.status_code == 200:
+        result = response.json()
+        print(f"🤖 Response: {json.dumps(result, indent=2)}")
+        if 'choices' in result and len(result['choices']) > 0:
+            print(f"💬 AI Message: {result['choices'][0]['message']['content']}")
+    else:
+        print(f"❌ Error: {response.text}")
+except requests.exceptions.ConnectionError:
+    print("❌ Connection failed - make sure the server is running locally")
+except requests.exceptions.Timeout:
+    print("⏰ Request timed out")
+except Exception as e:
+    print(f"❌ Error: {e}")

test_working_models.py ADDED Viewed

	@@ -0,0 +1,122 @@

+#!/usr/bin/env python3
+"""
+Test different HuggingFace approaches to find a working method
+"""
+import os
+import requests
+import json
+from huggingface_hub import InferenceClient
+import traceback
+# HuggingFace token
+HF_TOKEN = os.environ.get("HF_TOKEN", "")
+def test_inference_api_direct(model_name, prompt="Hello, how are you?"):
+    """Test using direct HTTP requests to HuggingFace API"""
+    print(f"\n🌐 Testing direct HTTP API for: {model_name}")
+    headers = {
+        "Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
+        "Content-Type": "application/json"
+    }
+    url = f"https://api-inference.huggingface.co/models/{model_name}"
+    payload = {
+        "inputs": prompt,
+        "parameters": {
+            "max_new_tokens": 50,
+            "temperature": 0.7,
+            "top_p": 0.95,
+            "do_sample": True
+        }
+    }
+    try:
+        response = requests.post(url, headers=headers, json=payload, timeout=30)
+        print(f"Status: {response.status_code}")
+        if response.status_code == 200:
+            result = response.json()
+            print(f"✅ Success: {result}")
+            return True
+        else:
+            print(f"❌ Error: {response.text}")
+            return False
+    except Exception as e:
+        print(f"❌ Exception: {e}")
+        return False
+def test_serverless_models():
+    """Test known working models that support serverless inference"""
+    # List of models that typically work well with serverless inference
+    working_models = [
+        "microsoft/DialoGPT-medium",
+        "google/flan-t5-base",
+        "distilbert-base-uncased-finetuned-sst-2-english",
+        "gpt2",
+        "microsoft/DialoGPT-small",
+        "facebook/blenderbot-400M-distill"
+    ]
+    results = {}
+    for model in working_models:
+        result = test_inference_api_direct(model)
+        results[model] = result
+    return results
+def test_chat_completion_models():
+    """Test models specifically for chat completion"""
+    chat_models = [
+        "microsoft/DialoGPT-medium",
+        "facebook/blenderbot-400M-distill",
+        "microsoft/DialoGPT-small"
+    ]
+    for model in chat_models:
+        print(f"\n💬 Testing chat model: {model}")
+        test_inference_api_direct(model, "Human: Hello! How are you?\nAssistant:")
+if __name__ == "__main__":
+    print("🔍 HuggingFace Inference API Debug")
+    print("=" * 50)
+    if HF_TOKEN:
+        print(f"🔑 Using HF_TOKEN: {HF_TOKEN[:10]}...")
+    else:
+        print("⚠️  No HF_TOKEN - trying anonymous access")
+    # Test serverless models
+    print("\n" + "="*60)
+    print("TESTING SERVERLESS MODELS")
+    print("="*60)
+    results = test_serverless_models()
+    # Test chat completion models
+    print("\n" + "="*60)
+    print("TESTING CHAT MODELS")
+    print("="*60)
+    test_chat_completion_models()
+    # Summary
+    print("\n" + "="*60)
+    print("SUMMARY")
+    print("="*60)
+    working_models = [model for model, result in results.items() if result]
+    if working_models:
+        print("✅ Working models:")
+        for model in working_models:
+            print(f"  - {model}")
+        print(f"\n🎯 Recommended model to switch to: {working_models[0]}")
+    else:
+        print("❌ No models working - API might be down or authentication issue")