ndc8 commited on
Commit
994c0b4
Β·
1 Parent(s): 4b4e9ed
Files changed (3) hide show
  1. backend_service.py +12 -2
  2. requirements.txt +3 -0
  3. verify_config.py +40 -0
backend_service.py CHANGED
@@ -90,7 +90,7 @@ class ChatMessage(BaseModel):
90
  return v
91
 
92
  class ChatCompletionRequest(BaseModel):
93
- model: str = Field(default_factory=lambda: os.environ.get("AI_MODEL", "google/gemma-3n-E4B-it"), description="The model to use for completion")
94
  messages: List[ChatMessage] = Field(..., description="List of messages in the conversation")
95
  max_tokens: Optional[int] = Field(default=512, ge=1, le=2048, description="Maximum tokens to generate")
96
  temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0, description="Sampling temperature")
@@ -139,7 +139,14 @@ class CompletionRequest(BaseModel):
139
 
140
 
141
  # Model can be configured via environment variable - defaults to Gemma 3n (transformers format)
142
- current_model = os.environ.get("AI_MODEL", "google/gemma-3n-E4B-it")
 
 
 
 
 
 
 
143
  vision_model = os.environ.get("VISION_MODEL", "Salesforce/blip-image-captioning-base")
144
 
145
  # Transformers model support
@@ -194,11 +201,13 @@ async def lifespan(app: FastAPI):
194
  """Application lifespan manager for startup and shutdown events"""
195
  global processor, model, image_text_pipeline, current_model
196
  logger.info("πŸš€ Starting AI Backend Service (Hugging Face Spaces mode)...")
 
197
  try:
198
  logger.info(f"πŸ“₯ Loading model with transformers: {current_model}")
199
 
200
  # For Gemma 3n models, use the specific classes
201
  if "gemma-3n" in current_model.lower():
 
202
  processor = AutoProcessor.from_pretrained(current_model)
203
  model = Gemma3nForConditionalGeneration.from_pretrained(
204
  current_model,
@@ -208,6 +217,7 @@ async def lifespan(app: FastAPI):
208
  ).eval()
209
  else:
210
  # Fallback for other models
 
211
  processor = AutoTokenizer.from_pretrained(current_model)
212
  model = AutoModelForCausalLM.from_pretrained(
213
  current_model,
 
90
  return v
91
 
92
  class ChatCompletionRequest(BaseModel):
93
+ model: str = Field(default_factory=lambda: "google/gemma-3n-E4B-it", description="The model to use for completion")
94
  messages: List[ChatMessage] = Field(..., description="List of messages in the conversation")
95
  max_tokens: Optional[int] = Field(default=512, ge=1, le=2048, description="Maximum tokens to generate")
96
  temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0, description="Sampling temperature")
 
139
 
140
 
141
  # Model can be configured via environment variable - defaults to Gemma 3n (transformers format)
142
+ # Force the correct model for Hugging Face Spaces deployment
143
+ ai_model_env = os.environ.get("AI_MODEL", "google/gemma-3n-E4B-it")
144
+ # Override GGUF models to use transformers-compatible version
145
+ if "GGUF" in ai_model_env:
146
+ current_model = "google/gemma-3n-E4B-it"
147
+ print(f"πŸ”„ Overriding GGUF model {ai_model_env} with transformers-compatible model: {current_model}")
148
+ else:
149
+ current_model = ai_model_env
150
  vision_model = os.environ.get("VISION_MODEL", "Salesforce/blip-image-captioning-base")
151
 
152
  # Transformers model support
 
201
  """Application lifespan manager for startup and shutdown events"""
202
  global processor, model, image_text_pipeline, current_model
203
  logger.info("πŸš€ Starting AI Backend Service (Hugging Face Spaces mode)...")
204
+ logger.info(f"πŸ”§ Using model: {current_model}")
205
  try:
206
  logger.info(f"πŸ“₯ Loading model with transformers: {current_model}")
207
 
208
  # For Gemma 3n models, use the specific classes
209
  if "gemma-3n" in current_model.lower():
210
+ logger.info("πŸ” Detected Gemma 3n model - using specialized classes")
211
  processor = AutoProcessor.from_pretrained(current_model)
212
  model = Gemma3nForConditionalGeneration.from_pretrained(
213
  current_model,
 
217
  ).eval()
218
  else:
219
  # Fallback for other models
220
+ logger.info("πŸ” Using standard transformers classes")
221
  processor = AutoTokenizer.from_pretrained(current_model)
222
  model = AutoModelForCausalLM.from_pretrained(
223
  current_model,
requirements.txt CHANGED
@@ -17,5 +17,8 @@ sentencepiece>=0.2.0
17
  tokenizers
18
  regex
19
 
 
 
 
20
  # Optional: gradio for demo UI
21
  # gradio
 
17
  tokenizers
18
  regex
19
 
20
+ # Required for Gemma 3n vision components
21
+ timm
22
+
23
  # Optional: gradio for demo UI
24
  # gradio
verify_config.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Verification script to show current model configuration
4
+ """
5
+ import os
6
+
7
+ def show_model_config():
8
+ """Show what model will be used"""
9
+ print("πŸ” Model Configuration Analysis")
10
+ print("=" * 50)
11
+
12
+ # Check environment variable
13
+ ai_model_env = os.environ.get("AI_MODEL", "google/gemma-3n-E4B-it")
14
+ print(f"πŸ“ Environment variable AI_MODEL: {ai_model_env}")
15
+
16
+ # Apply override logic
17
+ if "GGUF" in ai_model_env:
18
+ current_model = "google/gemma-3n-E4B-it"
19
+ print(f"πŸ”„ OVERRIDE: GGUF model detected, using: {current_model}")
20
+ print(f" Original: {ai_model_env}")
21
+ print(f" Fixed to: {current_model}")
22
+ else:
23
+ current_model = ai_model_env
24
+ print(f"βœ… Using: {current_model}")
25
+
26
+ print(f"\n🎯 Final model that will be loaded: {current_model}")
27
+
28
+ # Check if it's Gemma 3n
29
+ is_gemma_3n = "gemma-3n" in current_model.lower()
30
+ print(f"πŸ€– Is Gemma 3n model: {is_gemma_3n}")
31
+
32
+ if is_gemma_3n:
33
+ print("πŸ“š Will use: AutoProcessor + Gemma3nForConditionalGeneration")
34
+ else:
35
+ print("πŸ“š Will use: AutoTokenizer + AutoModelForCausalLM")
36
+
37
+ return current_model
38
+
39
+ if __name__ == "__main__":
40
+ show_model_config()