ndc8 commited on
Commit
e6598e6
·
1 Parent(s): 09c9042
Files changed (1) hide show
  1. backend_service.py +40 -3
backend_service.py CHANGED
@@ -2,6 +2,15 @@
2
  FastAPI Backend AI Service converted from Gradio app
3
  Provides OpenAI-compatible chat completion endpoints
4
  """
 
 
 
 
 
 
 
 
 
5
 
6
  import asyncio
7
  import logging
@@ -32,6 +41,9 @@ except ImportError:
32
  logging.basicConfig(level=logging.INFO)
33
  logger = logging.getLogger(__name__)
34
 
 
 
 
35
  # Pydantic models for multimodal content
36
  class TextContent(BaseModel):
37
  type: str = Field(default="text", description="Content type")
@@ -164,21 +176,35 @@ async def lifespan(app: FastAPI):
164
  """Application lifespan manager for startup and shutdown events"""
165
  global inference_client, tokenizer, image_text_pipeline
166
 
 
 
 
 
167
  # Startup
168
  logger.info("🚀 Starting AI Backend Service...")
169
  try:
170
  # Initialize HuggingFace Inference Client for text generation
171
- inference_client = InferenceClient(model=current_model)
172
  logger.info(f"✅ Initialized inference client with model: {current_model}")
173
 
174
  # Initialize image-text-to-text pipeline
175
  if transformers_available and pipeline:
176
  try:
177
  logger.info(f"🖼️ Initializing image captioning pipeline with model: {vision_model}")
178
- image_text_pipeline = pipeline("image-to-text", model=vision_model) # Use image-to-text task
 
 
 
 
 
179
  logger.info("✅ Image captioning pipeline loaded successfully")
 
 
 
180
  except Exception as e:
181
  logger.warning(f"⚠️ Could not load image captioning pipeline: {e}")
 
 
182
  image_text_pipeline = None
183
  else:
184
  logger.warning("⚠️ Transformers not available, image processing disabled")
@@ -187,13 +213,24 @@ async def lifespan(app: FastAPI):
187
  # Initialize tokenizer for better text handling
188
  if transformers_available and AutoTokenizer:
189
  try:
190
- tokenizer = AutoTokenizer.from_pretrained(current_model) # type: ignore
 
 
 
 
191
  logger.info("✅ Tokenizer loaded successfully")
 
 
 
 
 
 
192
  except Exception as e:
193
  logger.warning(f"⚠️ Could not load tokenizer: {e}")
194
  tokenizer = None
195
  else:
196
  logger.info("⚠️ Tokenizer initialization skipped")
 
197
 
198
  except Exception as e:
199
  logger.error(f"❌ Failed to initialize inference client: {e}")
 
2
  FastAPI Backend AI Service converted from Gradio app
3
  Provides OpenAI-compatible chat completion endpoints
4
  """
5
+ # Configure Hugging Face cache directory and authentication before any HF imports
6
+ import os
7
+ _CACHE_DIR = os.path.join(os.getcwd(), ".hf_cache")
8
+ os.makedirs(_CACHE_DIR, exist_ok=True)
9
+ # Set environment variables early to avoid default ~/.cache permission issues
10
+ os.environ.setdefault("HF_HOME", _CACHE_DIR)
11
+ os.environ.setdefault("TRANSFORMERS_CACHE", _CACHE_DIR)
12
+ # Authentication token for private models
13
+ hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
14
 
15
  import asyncio
16
  import logging
 
41
  logging.basicConfig(level=logging.INFO)
42
  logger = logging.getLogger(__name__)
43
 
44
+ # Authentication token for private models
45
+ hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
46
+
47
  # Pydantic models for multimodal content
48
  class TextContent(BaseModel):
49
  type: str = Field(default="text", description="Content type")
 
176
  """Application lifespan manager for startup and shutdown events"""
177
  global inference_client, tokenizer, image_text_pipeline
178
 
179
+ logger.info(f"Using Hugging Face cache directory: {_CACHE_DIR}")
180
+ if not hf_token:
181
+ logger.info("No Hugging Face auth token provided; private models may fail to load.")
182
+
183
  # Startup
184
  logger.info("🚀 Starting AI Backend Service...")
185
  try:
186
  # Initialize HuggingFace Inference Client for text generation
187
+ inference_client = InferenceClient(model=current_model, token=hf_token)
188
  logger.info(f"✅ Initialized inference client with model: {current_model}")
189
 
190
  # Initialize image-text-to-text pipeline
191
  if transformers_available and pipeline:
192
  try:
193
  logger.info(f"🖼️ Initializing image captioning pipeline with model: {vision_model}")
194
+ image_text_pipeline = pipeline(
195
+ "image-to-text",
196
+ model=vision_model,
197
+ use_auth_token=hf_token,
198
+ cache_dir=_CACHE_DIR
199
+ )
200
  logger.info("✅ Image captioning pipeline loaded successfully")
201
+ except PermissionError as pe:
202
+ logger.warning(f"⚠️ Permission error while loading image captioning pipeline: {pe}. Check cache directory permissions: {_CACHE_DIR}")
203
+ image_text_pipeline = None
204
  except Exception as e:
205
  logger.warning(f"⚠️ Could not load image captioning pipeline: {e}")
206
+ if "not a local folder and is not a valid model identifier" in str(e):
207
+ logger.warning("Model identifier invalid; ensure model name is correct or you have access.")
208
  image_text_pipeline = None
209
  else:
210
  logger.warning("⚠️ Transformers not available, image processing disabled")
 
213
  # Initialize tokenizer for better text handling
214
  if transformers_available and AutoTokenizer:
215
  try:
216
+ tokenizer = AutoTokenizer.from_pretrained(
217
+ current_model,
218
+ use_auth_token=hf_token,
219
+ cache_dir=_CACHE_DIR
220
+ ) # type: ignore
221
  logger.info("✅ Tokenizer loaded successfully")
222
+ except PermissionError as pe:
223
+ logger.warning(f"⚠️ Permission error while loading tokenizer: {pe}. Check cache directory permissions: {_CACHE_DIR}")
224
+ tokenizer = None
225
+ except ValueError as ve:
226
+ logger.warning(f"⚠️ Could not load tokenizer: {ve}. If this is a private model, set HF_TOKEN env var to access it.")
227
+ tokenizer = None
228
  except Exception as e:
229
  logger.warning(f"⚠️ Could not load tokenizer: {e}")
230
  tokenizer = None
231
  else:
232
  logger.info("⚠️ Tokenizer initialization skipped")
233
+ tokenizer = None
234
 
235
  except Exception as e:
236
  logger.error(f"❌ Failed to initialize inference client: {e}")