ndc8 commited on
Commit
3239c69
Β·
1 Parent(s): 97bafdb
AUTHENTICATION_FIX.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # πŸ”§ SOLUTION: HuggingFace Authentication Issue
2
+
3
+ ## Problem Identified
4
+
5
+ Your AI backend is returning "I apologize, but I'm having trouble generating a response right now. Please try again." because **ALL HuggingFace Inference API calls require authentication** now.
6
+
7
+ ## Root Cause
8
+
9
+ - HuggingFace changed their API to require tokens for all models
10
+ - Your Space doesn't have a valid `HF_TOKEN` environment variable
11
+ - `InferenceClient.text_generation()` fails with `StopIteration` errors
12
+ - The backend falls back to the error message
13
+
14
+ ## Immediate Fix - Add HuggingFace Token
15
+
16
+ ### Step 1: Get a Free HuggingFace Token
17
+
18
+ 1. Go to https://huggingface.co/settings/tokens
19
+ 2. Click "New token"
20
+ 3. Give it a name like "firstAI-space"
21
+ 4. Select "Read" permission (sufficient for inference)
22
+ 5. Copy the token (starts with `hf_...`)
23
+
24
+ ### Step 2: Add Token to Your HuggingFace Space
25
+
26
+ 1. Go to your Space: https://huggingface.co/spaces/cong182/firstAI
27
+ 2. Click "Settings" tab
28
+ 3. Scroll to "Variables and secrets"
29
+ 4. Click "New secret"
30
+ 5. Name: `HF_TOKEN`
31
+ 6. Value: Paste your token (hf_xxxxxxxxxxxx)
32
+ 7. Click "Save"
33
+
34
+ ### Step 3: Restart Your Space
35
+
36
+ Your Space will automatically restart and pick up the new token.
37
+
38
+ ## Test After Fix
39
+
40
+ After adding the token, test with:
41
+
42
+ ```bash
43
+ curl -X POST https://cong182-firstai.hf.space/v1/chat/completions \
44
+ -H "Content-Type: application/json" \
45
+ -d '{
46
+ "model": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF",
47
+ "messages": [{"role": "user", "content": "Hello! Tell me a joke."}],
48
+ "max_tokens": 100
49
+ }'
50
+ ```
51
+
52
+ You should get actual generated content instead of the fallback message.
53
+
54
+ ## Alternative Models (if DeepSeek still has issues)
55
+
56
+ If DeepSeek model still doesn't work after authentication, try these reliable models:
57
+
58
+ ### Update backend_service.py to use a working model:
59
+
60
+ ```python
61
+ # Change this line in backend_service.py:
62
+ current_model = "microsoft/DialoGPT-medium" # Reliable alternative
63
+ # or
64
+ current_model = "HuggingFaceH4/zephyr-7b-beta" # Good chat model
65
+ ```
66
+
67
+ ## Why This Happened
68
+
69
+ - HuggingFace tightened security/authentication requirements
70
+ - Free inference still works but requires account/token
71
+ - Your Space was missing the authentication token
72
+ - Local testing fails for the same reason
73
+
74
+ The fix is simple - just add the HF_TOKEN to your Space settings! πŸš€
debug_inference.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Debug script to test HuggingFace Inference API directly
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ from huggingface_hub import InferenceClient
9
+ import traceback
10
+
11
+ def test_model(model_name, prompt="Hello, how are you?"):
12
+ """Test a specific model with the HuggingFace Inference API"""
13
+ print(f"\nπŸ” Testing model: {model_name}")
14
+ print("=" * 50)
15
+
16
+ try:
17
+ # Initialize client
18
+ client = InferenceClient(model=model_name)
19
+ print(f"βœ… Client initialized successfully")
20
+
21
+ # Test text generation with various parameter sets
22
+ print(f"πŸ“ Testing prompt: '{prompt}'")
23
+
24
+ # Method 1: Full parameters (same as backend_service.py)
25
+ try:
26
+ print("\nπŸ”¬ Method 1: Full parameters")
27
+ response = client.text_generation(
28
+ prompt=prompt,
29
+ max_new_tokens=50,
30
+ temperature=0.7,
31
+ top_p=0.95,
32
+ return_full_text=False,
33
+ stop=["Human:", "System:"]
34
+ )
35
+ print(f"βœ… Success: {response}")
36
+ return True
37
+
38
+ except Exception as e:
39
+ print(f"❌ Method 1 failed: {e}")
40
+ print(f"Error type: {type(e).__name__}")
41
+
42
+ # Method 2: Minimal parameters
43
+ try:
44
+ print("\nπŸ”¬ Method 2: Minimal parameters")
45
+ response = client.text_generation(
46
+ prompt=prompt,
47
+ max_new_tokens=50,
48
+ temperature=0.7,
49
+ return_full_text=False
50
+ )
51
+ print(f"βœ… Success: {response}")
52
+ return True
53
+
54
+ except Exception as e:
55
+ print(f"❌ Method 2 failed: {e}")
56
+ print(f"Error type: {type(e).__name__}")
57
+
58
+ # Method 3: Basic parameters only
59
+ try:
60
+ print("\nπŸ”¬ Method 3: Basic parameters")
61
+ response = client.text_generation(
62
+ prompt=prompt,
63
+ max_new_tokens=30
64
+ )
65
+ print(f"βœ… Success: {response}")
66
+ return True
67
+
68
+ except Exception as e:
69
+ print(f"❌ Method 3 failed: {e}")
70
+ print(f"Error type: {type(e).__name__}")
71
+ print(f"Full traceback:")
72
+ traceback.print_exc()
73
+
74
+ return False
75
+
76
+ except Exception as e:
77
+ print(f"❌ Failed to initialize client: {e}")
78
+ print(f"Error type: {type(e).__name__}")
79
+ traceback.print_exc()
80
+ return False
81
+
82
+ def test_model_info(model_name):
83
+ """Test getting model information"""
84
+ try:
85
+ print(f"\nπŸ“Š Getting model info for: {model_name}")
86
+ client = InferenceClient()
87
+ # Try to get model info (this might not work for all models)
88
+ print("βœ… Model appears to be accessible")
89
+ return True
90
+ except Exception as e:
91
+ print(f"❌ Model info failed: {e}")
92
+ return False
93
+
94
+ if __name__ == "__main__":
95
+ # Set HuggingFace token if available
96
+ hf_token = os.environ.get("HF_TOKEN")
97
+ if hf_token:
98
+ print(f"πŸ”‘ Using HF_TOKEN: {hf_token[:10]}...")
99
+ else:
100
+ print("⚠️ No HF_TOKEN found, using anonymous access")
101
+
102
+ # Test models
103
+ models_to_test = [
104
+ "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF", # Current problematic model
105
+ "microsoft/DialoGPT-medium", # Known working model
106
+ "meta-llama/Llama-2-7b-chat-hf", # Popular model
107
+ "HuggingFaceH4/zephyr-7b-beta" # Another good model
108
+ ]
109
+
110
+ results = {}
111
+
112
+ for model in models_to_test:
113
+ print(f"\n{'='*60}")
114
+ test_result = test_model(model)
115
+ results[model] = test_result
116
+
117
+ # Also test model info
118
+ info_result = test_model_info(model)
119
+
120
+ print(f"\nResult for {model}: {'βœ… WORKING' if test_result else '❌ FAILED'}")
121
+
122
+ print(f"\n{'='*60}")
123
+ print("SUMMARY:")
124
+ print("="*60)
125
+ for model, result in results.items():
126
+ status = "βœ… WORKING" if result else "❌ FAILED"
127
+ print(f"{model}: {status}")
test_free_alternatives.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test with hardcoded working models that don't require authentication
4
+ """
5
+
6
+ import requests
7
+
8
+ def test_free_inference_alternatives():
9
+ """Test free inference alternatives that work without authentication"""
10
+
11
+ print("πŸ” Testing inference alternatives that work without auth")
12
+ print("=" * 60)
13
+
14
+ # Test 1: Try some models that might work without auth
15
+ free_models = [
16
+ "gpt2",
17
+ "distilgpt2",
18
+ "microsoft/DialoGPT-small"
19
+ ]
20
+
21
+ for model in free_models:
22
+ print(f"\nπŸ€– Testing {model}")
23
+ url = f"https://api-inference.huggingface.co/models/{model}"
24
+
25
+ payload = {
26
+ "inputs": "Hello, how are you today?",
27
+ "parameters": {
28
+ "max_length": 50,
29
+ "temperature": 0.7
30
+ }
31
+ }
32
+
33
+ try:
34
+ response = requests.post(url, json=payload, timeout=30)
35
+ print(f"Status: {response.status_code}")
36
+
37
+ if response.status_code == 200:
38
+ result = response.json()
39
+ print(f"βœ… Success: {result}")
40
+ return model
41
+ elif response.status_code == 503:
42
+ print("⏳ Model loading, might work later")
43
+ else:
44
+ print(f"❌ Error: {response.text}")
45
+
46
+ except Exception as e:
47
+ print(f"❌ Exception: {e}")
48
+
49
+ return None
50
+
51
+ def test_alternative_apis():
52
+ """Test completely different free APIs"""
53
+
54
+ print("\n" + "=" * 60)
55
+ print("TESTING ALTERNATIVE FREE APIs")
56
+ print("=" * 60)
57
+
58
+ # Note: These are examples, many might require their own API keys
59
+ alternatives = [
60
+ "OpenAI GPT (requires key)",
61
+ "Anthropic Claude (requires key)",
62
+ "Google Gemini (requires key)",
63
+ "Local Ollama (if installed)",
64
+ "Groq (free tier available)"
65
+ ]
66
+
67
+ for alt in alternatives:
68
+ print(f"πŸ“ {alt}")
69
+
70
+ print("\nπŸ’‘ Recommendation: Get a free HuggingFace token from https://huggingface.co/settings/tokens")
71
+
72
+ if __name__ == "__main__":
73
+ working_model = test_free_inference_alternatives()
74
+ test_alternative_apis()
75
+
76
+ print("\n" + "=" * 60)
77
+ print("SOLUTION RECOMMENDATIONS")
78
+ print("=" * 60)
79
+
80
+ if working_model:
81
+ print(f"βœ… Found working model: {working_model}")
82
+ print("πŸ”§ You can update your backend to use this model")
83
+ else:
84
+ print("❌ No models work without authentication")
85
+
86
+ print("\n🎯 IMMEDIATE SOLUTIONS:")
87
+ print("1. Get free HuggingFace token: https://huggingface.co/settings/tokens")
88
+ print("2. Set HF_TOKEN environment variable in your HuggingFace Space")
89
+ print("3. Your Space might already have proper auth - the issue is local testing")
90
+ print("4. Use the deployed Space API instead of local testing")
91
+
92
+ print("\nπŸ” DEBUGGING STEPS:")
93
+ print("1. Check if your deployed Space has HF_TOKEN in Settings > Variables")
94
+ print("2. Test the deployed API directly (it should work)")
95
+ print("3. For local development, get your own HF token")
test_local_api.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script for local API endpoint
4
+ """
5
+ import requests
6
+ import json
7
+
8
+ # Local API endpoint
9
+ API_URL = "http://localhost:8000/v1/chat/completions"
10
+
11
+ # Test payload with the correct model name
12
+ payload = {
13
+ "model": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF",
14
+ "messages": [
15
+ {"role": "system", "content": "You are a helpful assistant."},
16
+ {"role": "user", "content": "Hello, what can you do?"}
17
+ ],
18
+ "max_tokens": 64,
19
+ "temperature": 0.7
20
+ }
21
+
22
+ print("πŸ§ͺ Testing Local API...")
23
+ print(f"πŸ“‘ URL: {API_URL}")
24
+ print(f"πŸ“¦ Payload: {json.dumps(payload, indent=2)}")
25
+ print("-" * 50)
26
+
27
+ try:
28
+ response = requests.post(API_URL, json=payload, timeout=30)
29
+ print(f"βœ… Status: {response.status_code}")
30
+
31
+ if response.status_code == 200:
32
+ result = response.json()
33
+ print(f"πŸ€– Response: {json.dumps(result, indent=2)}")
34
+ if 'choices' in result and len(result['choices']) > 0:
35
+ print(f"πŸ’¬ AI Message: {result['choices'][0]['message']['content']}")
36
+ else:
37
+ print(f"❌ Error: {response.text}")
38
+
39
+ except requests.exceptions.ConnectionError:
40
+ print("❌ Connection failed - make sure the server is running locally")
41
+ except requests.exceptions.Timeout:
42
+ print("⏰ Request timed out")
43
+ except Exception as e:
44
+ print(f"❌ Error: {e}")
test_working_models.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test different HuggingFace approaches to find a working method
4
+ """
5
+
6
+ import os
7
+ import requests
8
+ import json
9
+ from huggingface_hub import InferenceClient
10
+ import traceback
11
+
12
+ # HuggingFace token
13
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
14
+
15
+ def test_inference_api_direct(model_name, prompt="Hello, how are you?"):
16
+ """Test using direct HTTP requests to HuggingFace API"""
17
+ print(f"\n🌐 Testing direct HTTP API for: {model_name}")
18
+
19
+ headers = {
20
+ "Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
21
+ "Content-Type": "application/json"
22
+ }
23
+
24
+ url = f"https://api-inference.huggingface.co/models/{model_name}"
25
+
26
+ payload = {
27
+ "inputs": prompt,
28
+ "parameters": {
29
+ "max_new_tokens": 50,
30
+ "temperature": 0.7,
31
+ "top_p": 0.95,
32
+ "do_sample": True
33
+ }
34
+ }
35
+
36
+ try:
37
+ response = requests.post(url, headers=headers, json=payload, timeout=30)
38
+ print(f"Status: {response.status_code}")
39
+
40
+ if response.status_code == 200:
41
+ result = response.json()
42
+ print(f"βœ… Success: {result}")
43
+ return True
44
+ else:
45
+ print(f"❌ Error: {response.text}")
46
+ return False
47
+
48
+ except Exception as e:
49
+ print(f"❌ Exception: {e}")
50
+ return False
51
+
52
+ def test_serverless_models():
53
+ """Test known working models that support serverless inference"""
54
+
55
+ # List of models that typically work well with serverless inference
56
+ working_models = [
57
+ "microsoft/DialoGPT-medium",
58
+ "google/flan-t5-base",
59
+ "distilbert-base-uncased-finetuned-sst-2-english",
60
+ "gpt2",
61
+ "microsoft/DialoGPT-small",
62
+ "facebook/blenderbot-400M-distill"
63
+ ]
64
+
65
+ results = {}
66
+
67
+ for model in working_models:
68
+ result = test_inference_api_direct(model)
69
+ results[model] = result
70
+
71
+ return results
72
+
73
+ def test_chat_completion_models():
74
+ """Test models specifically for chat completion"""
75
+
76
+ chat_models = [
77
+ "microsoft/DialoGPT-medium",
78
+ "facebook/blenderbot-400M-distill",
79
+ "microsoft/DialoGPT-small"
80
+ ]
81
+
82
+ for model in chat_models:
83
+ print(f"\nπŸ’¬ Testing chat model: {model}")
84
+ test_inference_api_direct(model, "Human: Hello! How are you?\nAssistant:")
85
+
86
+ if __name__ == "__main__":
87
+ print("πŸ” HuggingFace Inference API Debug")
88
+ print("=" * 50)
89
+
90
+ if HF_TOKEN:
91
+ print(f"πŸ”‘ Using HF_TOKEN: {HF_TOKEN[:10]}...")
92
+ else:
93
+ print("⚠️ No HF_TOKEN - trying anonymous access")
94
+
95
+ # Test serverless models
96
+ print("\n" + "="*60)
97
+ print("TESTING SERVERLESS MODELS")
98
+ print("="*60)
99
+
100
+ results = test_serverless_models()
101
+
102
+ # Test chat completion models
103
+ print("\n" + "="*60)
104
+ print("TESTING CHAT MODELS")
105
+ print("="*60)
106
+
107
+ test_chat_completion_models()
108
+
109
+ # Summary
110
+ print("\n" + "="*60)
111
+ print("SUMMARY")
112
+ print("="*60)
113
+
114
+ working_models = [model for model, result in results.items() if result]
115
+
116
+ if working_models:
117
+ print("βœ… Working models:")
118
+ for model in working_models:
119
+ print(f" - {model}")
120
+ print(f"\n🎯 Recommended model to switch to: {working_models[0]}")
121
+ else:
122
+ print("❌ No models working - API might be down or authentication issue")