Spaces:

cong182
/

firstAI

Running

ndc8 commited on 12 days ago

Commit

04d695c

1 Parent(s): 2cd680b

Fix: Update to valid HuggingFace model and fix deprecation warnings

- Changed model from 'gemma-3n-E4B-it-GGUF' to 'microsoft/DialoGPT-medium'
- Fixed deprecated 'use_auth_token' parameter to 'token'
- Updated test file to use the correct model name

Files changed (2) hide show

backend_service.py +3 -3
test_hf_api.py +23 -0

backend_service.py CHANGED Viewed

@@ -75,7 +75,7 @@ class ChatMessage(BaseModel):
         return v
 class ChatCompletionRequest(BaseModel):
-    model: str = Field(default="gemma-3n-E4B-it-GGUF", description="The model to use for completion")
     messages: List[ChatMessage] = Field(..., description="List of messages in the conversation")
     max_tokens: Optional[int] = Field(default=512, ge=1, le=2048, description="Maximum tokens to generate")
     temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0, description="Sampling temperature")
@@ -124,7 +124,7 @@ class CompletionRequest(BaseModel):
 # Global variables for model management
 inference_client: Optional[InferenceClient] = None
 image_text_pipeline = None  # type: ignore
-current_model = "gemma-3n-E4B-it-GGUF"
 vision_model = "Salesforce/blip-image-captioning-base"  # Working model for image captioning
 tokenizer = None
@@ -198,7 +198,7 @@ async def lifespan(app: FastAPI):
                 if hf_token:
                     tokenizer = AutoTokenizer.from_pretrained(
                         current_model,
-                        use_auth_token=hf_token
                     )  # type: ignore
                 else:
                     tokenizer = AutoTokenizer.from_pretrained(

         return v
 class ChatCompletionRequest(BaseModel):
+    model: str = Field(default="microsoft/DialoGPT-medium", description="The model to use for completion")
     messages: List[ChatMessage] = Field(..., description="List of messages in the conversation")
     max_tokens: Optional[int] = Field(default=512, ge=1, le=2048, description="Maximum tokens to generate")
     temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0, description="Sampling temperature")
 # Global variables for model management
 inference_client: Optional[InferenceClient] = None
 image_text_pipeline = None  # type: ignore
+current_model = "microsoft/DialoGPT-medium"  # Valid HuggingFace model for chat
 vision_model = "Salesforce/blip-image-captioning-base"  # Working model for image captioning
 tokenizer = None
                 if hf_token:
                     tokenizer = AutoTokenizer.from_pretrained(
                         current_model,
+                        token=hf_token
                     )  # type: ignore
                 else:
                     tokenizer = AutoTokenizer.from_pretrained(

test_hf_api.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import requests
+# Hugging Face Space API endpoint
+API_URL = "https://cong182-firstai.hf.space/v1/chat/completions"
+# Example payload for OpenAI-compatible chat completion
+payload = {
+    "model": "microsoft/DialoGPT-medium",
+    "messages": [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Hello, who won the world cup in 2018?"}
+    ],
+    "max_tokens": 64,
+    "temperature": 0.7
+}
+try:
+    response = requests.post(API_URL, json=payload, timeout=30)
+    response.raise_for_status()
+    print("Status:", response.status_code)
+    print("Response:", response.json())
+except Exception as e:
+    print("Error during API call:", e)