ndc8 commited on
Commit
8d962fd
·
1 Parent(s): 1f4eabe

Set gemma-3n-E4B-it-GGUF as main model for all text generation endpoints

Browse files
Files changed (2) hide show
  1. backend_service.py +2 -2
  2. requirements.txt +2 -1
backend_service.py CHANGED
@@ -70,7 +70,7 @@ class ChatMessage(BaseModel):
70
  return v
71
 
72
  class ChatCompletionRequest(BaseModel):
73
- model: str = Field(default="zephyr-7b-beta", description="The model to use for completion")
74
  messages: List[ChatMessage] = Field(..., description="List of messages in the conversation")
75
  max_tokens: Optional[int] = Field(default=512, ge=1, le=2048, description="Maximum tokens to generate")
76
  temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0, description="Sampling temperature")
@@ -119,7 +119,7 @@ class CompletionRequest(BaseModel):
119
  # Global variables for model management
120
  inference_client: Optional[InferenceClient] = None
121
  image_text_pipeline = None # type: ignore
122
- current_model = "microsoft/DialoGPT-medium"
123
  vision_model = "Salesforce/blip-image-captioning-base" # Working model for image captioning
124
  tokenizer = None
125
 
 
70
  return v
71
 
72
  class ChatCompletionRequest(BaseModel):
73
+ model: str = Field(default="gemma-3n-E4B-it-GGUF", description="The model to use for completion")
74
  messages: List[ChatMessage] = Field(..., description="List of messages in the conversation")
75
  max_tokens: Optional[int] = Field(default=512, ge=1, le=2048, description="Maximum tokens to generate")
76
  temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0, description="Sampling temperature")
 
119
  # Global variables for model management
120
  inference_client: Optional[InferenceClient] = None
121
  image_text_pipeline = None # type: ignore
122
+ current_model = "gemma-3n-E4B-it-GGUF"
123
  vision_model = "Salesforce/blip-image-captioning-base" # Working model for image captioning
124
  tokenizer = None
125
 
requirements.txt CHANGED
@@ -3,8 +3,9 @@ huggingface_hub>=0.34.0
3
  transformers>=4.36.0
4
  torch>=2.0.0
5
  Pillow>=10.0.0
6
- requests>=2.31.0
7
  accelerate>=0.24.0
 
 
8
  fastapi>=0.100.0
9
  uvicorn[standard]>=0.23.0
10
  pydantic>=2.0.0
 
3
  transformers>=4.36.0
4
  torch>=2.0.0
5
  Pillow>=10.0.0
 
6
  accelerate>=0.24.0
7
+ requests>=2.31.0
8
+ # NOTE: GGUF models like 'gemma-3n-E4B-it-GGUF' must be downloaded manually or referenced from HuggingFace, not pip-installed.
9
  fastapi>=0.100.0
10
  uvicorn[standard]>=0.23.0
11
  pydantic>=2.0.0