ndc8 commited on
Commit
c6f7b75
·
1 Parent(s): 88c3c6c

Update model to unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF

Browse files

- Changed current_model in backend_service.py
- Updated ChatCompletionRequest default model
- Updated all test files to use new model
- Updated requirements.txt comment
- DeepSeek R1-0528 is a more modern and capable model than the previous gemma model

backend_service.py CHANGED
@@ -75,7 +75,7 @@ class ChatMessage(BaseModel):
75
  return v
76
 
77
  class ChatCompletionRequest(BaseModel):
78
- model: str = Field(default="gemma-3n-E4B-it-GGUF", description="The model to use for completion")
79
  messages: List[ChatMessage] = Field(..., description="List of messages in the conversation")
80
  max_tokens: Optional[int] = Field(default=512, ge=1, le=2048, description="Maximum tokens to generate")
81
  temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0, description="Sampling temperature")
@@ -124,7 +124,7 @@ class CompletionRequest(BaseModel):
124
  # Global variables for model management
125
  inference_client: Optional[InferenceClient] = None
126
  image_text_pipeline = None # type: ignore
127
- current_model = "gemma-3n-E4B-it-GGUF"
128
  vision_model = "Salesforce/blip-image-captioning-base" # Working model for image captioning
129
  tokenizer = None
130
 
 
75
  return v
76
 
77
  class ChatCompletionRequest(BaseModel):
78
+ model: str = Field(default="unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF", description="The model to use for completion")
79
  messages: List[ChatMessage] = Field(..., description="List of messages in the conversation")
80
  max_tokens: Optional[int] = Field(default=512, ge=1, le=2048, description="Maximum tokens to generate")
81
  temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0, description="Sampling temperature")
 
124
  # Global variables for model management
125
  inference_client: Optional[InferenceClient] = None
126
  image_text_pipeline = None # type: ignore
127
+ current_model = "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF"
128
  vision_model = "Salesforce/blip-image-captioning-base" # Working model for image captioning
129
  tokenizer = None
130
 
requirements.txt CHANGED
@@ -5,7 +5,7 @@ torch>=2.0.0
5
  Pillow>=10.0.0
6
  accelerate>=0.24.0
7
  requests>=2.31.0
8
- # NOTE: GGUF models like 'gemma-3n-E4B-it-GGUF' must be downloaded manually or referenced from HuggingFace, not pip-installed.
9
  fastapi>=0.100.0
10
  uvicorn[standard]>=0.23.0
11
  pydantic>=2.0.0
 
5
  Pillow>=10.0.0
6
  accelerate>=0.24.0
7
  requests>=2.31.0
8
+ # NOTE: GGUF models like 'unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF' must be downloaded manually or referenced from HuggingFace, not pip-installed.
9
  fastapi>=0.100.0
10
  uvicorn[standard]>=0.23.0
11
  pydantic>=2.0.0
test_hf_api.py CHANGED
@@ -5,7 +5,7 @@ API_URL = "https://cong182-firstai.hf.space/v1/chat/completions"
5
 
6
  # Example payload for OpenAI-compatible chat completion
7
  payload = {
8
- "model": "gemma-3n-E4B-it-GGUF",
9
  "messages": [
10
  {"role": "system", "content": "You are a helpful assistant."},
11
  {"role": "user", "content": "Hello, who won the world cup in 2018?"}
 
5
 
6
  # Example payload for OpenAI-compatible chat completion
7
  payload = {
8
+ "model": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF",
9
  "messages": [
10
  {"role": "system", "content": "You are a helpful assistant."},
11
  {"role": "user", "content": "Hello, who won the world cup in 2018?"}
test_multimodal.py CHANGED
@@ -42,7 +42,7 @@ def test_multimodal():
42
  print("🖼️ Testing multimodal chat completion...")
43
 
44
  payload = {
45
- "model": "unsloth/gemma-3n-E4B-it-GGUF",
46
  "messages": [
47
  {
48
  "role": "user",
 
42
  print("🖼️ Testing multimodal chat completion...")
43
 
44
  payload = {
45
+ "model": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF",
46
  "messages": [
47
  {
48
  "role": "user",
test_pipeline.py CHANGED
@@ -19,7 +19,7 @@ def test_pipeline_availability():
19
  models_to_try = [
20
  "Salesforce/blip-image-captioning-base", # More common model
21
  "microsoft/git-base-textcaps", # Alternative model
22
- "unsloth/gemma-3n-E4B-it-GGUF" # Original model
23
  ]
24
 
25
  for model_name in models_to_try:
 
19
  models_to_try = [
20
  "Salesforce/blip-image-captioning-base", # More common model
21
  "microsoft/git-base-textcaps", # Alternative model
22
+ "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF" # Updated model
23
  ]
24
 
25
  for model_name in models_to_try: