ndc8
commited on
Commit
·
e6598e6
1
Parent(s):
09c9042
fix
Browse files- backend_service.py +40 -3
backend_service.py
CHANGED
@@ -2,6 +2,15 @@
|
|
2 |
FastAPI Backend AI Service converted from Gradio app
|
3 |
Provides OpenAI-compatible chat completion endpoints
|
4 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
import asyncio
|
7 |
import logging
|
@@ -32,6 +41,9 @@ except ImportError:
|
|
32 |
logging.basicConfig(level=logging.INFO)
|
33 |
logger = logging.getLogger(__name__)
|
34 |
|
|
|
|
|
|
|
35 |
# Pydantic models for multimodal content
|
36 |
class TextContent(BaseModel):
|
37 |
type: str = Field(default="text", description="Content type")
|
@@ -164,21 +176,35 @@ async def lifespan(app: FastAPI):
|
|
164 |
"""Application lifespan manager for startup and shutdown events"""
|
165 |
global inference_client, tokenizer, image_text_pipeline
|
166 |
|
|
|
|
|
|
|
|
|
167 |
# Startup
|
168 |
logger.info("🚀 Starting AI Backend Service...")
|
169 |
try:
|
170 |
# Initialize HuggingFace Inference Client for text generation
|
171 |
-
inference_client = InferenceClient(model=current_model)
|
172 |
logger.info(f"✅ Initialized inference client with model: {current_model}")
|
173 |
|
174 |
# Initialize image-text-to-text pipeline
|
175 |
if transformers_available and pipeline:
|
176 |
try:
|
177 |
logger.info(f"🖼️ Initializing image captioning pipeline with model: {vision_model}")
|
178 |
-
image_text_pipeline = pipeline(
|
|
|
|
|
|
|
|
|
|
|
179 |
logger.info("✅ Image captioning pipeline loaded successfully")
|
|
|
|
|
|
|
180 |
except Exception as e:
|
181 |
logger.warning(f"⚠️ Could not load image captioning pipeline: {e}")
|
|
|
|
|
182 |
image_text_pipeline = None
|
183 |
else:
|
184 |
logger.warning("⚠️ Transformers not available, image processing disabled")
|
@@ -187,13 +213,24 @@ async def lifespan(app: FastAPI):
|
|
187 |
# Initialize tokenizer for better text handling
|
188 |
if transformers_available and AutoTokenizer:
|
189 |
try:
|
190 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
|
|
|
|
|
|
191 |
logger.info("✅ Tokenizer loaded successfully")
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
except Exception as e:
|
193 |
logger.warning(f"⚠️ Could not load tokenizer: {e}")
|
194 |
tokenizer = None
|
195 |
else:
|
196 |
logger.info("⚠️ Tokenizer initialization skipped")
|
|
|
197 |
|
198 |
except Exception as e:
|
199 |
logger.error(f"❌ Failed to initialize inference client: {e}")
|
|
|
2 |
FastAPI Backend AI Service converted from Gradio app
|
3 |
Provides OpenAI-compatible chat completion endpoints
|
4 |
"""
|
5 |
+
# Configure Hugging Face cache directory and authentication before any HF imports
|
6 |
+
import os
|
7 |
+
_CACHE_DIR = os.path.join(os.getcwd(), ".hf_cache")
|
8 |
+
os.makedirs(_CACHE_DIR, exist_ok=True)
|
9 |
+
# Set environment variables early to avoid default ~/.cache permission issues
|
10 |
+
os.environ.setdefault("HF_HOME", _CACHE_DIR)
|
11 |
+
os.environ.setdefault("TRANSFORMERS_CACHE", _CACHE_DIR)
|
12 |
+
# Authentication token for private models
|
13 |
+
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
|
14 |
|
15 |
import asyncio
|
16 |
import logging
|
|
|
41 |
logging.basicConfig(level=logging.INFO)
|
42 |
logger = logging.getLogger(__name__)
|
43 |
|
44 |
+
# Authentication token for private models
|
45 |
+
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
|
46 |
+
|
47 |
# Pydantic models for multimodal content
|
48 |
class TextContent(BaseModel):
|
49 |
type: str = Field(default="text", description="Content type")
|
|
|
176 |
"""Application lifespan manager for startup and shutdown events"""
|
177 |
global inference_client, tokenizer, image_text_pipeline
|
178 |
|
179 |
+
logger.info(f"Using Hugging Face cache directory: {_CACHE_DIR}")
|
180 |
+
if not hf_token:
|
181 |
+
logger.info("No Hugging Face auth token provided; private models may fail to load.")
|
182 |
+
|
183 |
# Startup
|
184 |
logger.info("🚀 Starting AI Backend Service...")
|
185 |
try:
|
186 |
# Initialize HuggingFace Inference Client for text generation
|
187 |
+
inference_client = InferenceClient(model=current_model, token=hf_token)
|
188 |
logger.info(f"✅ Initialized inference client with model: {current_model}")
|
189 |
|
190 |
# Initialize image-text-to-text pipeline
|
191 |
if transformers_available and pipeline:
|
192 |
try:
|
193 |
logger.info(f"🖼️ Initializing image captioning pipeline with model: {vision_model}")
|
194 |
+
image_text_pipeline = pipeline(
|
195 |
+
"image-to-text",
|
196 |
+
model=vision_model,
|
197 |
+
use_auth_token=hf_token,
|
198 |
+
cache_dir=_CACHE_DIR
|
199 |
+
)
|
200 |
logger.info("✅ Image captioning pipeline loaded successfully")
|
201 |
+
except PermissionError as pe:
|
202 |
+
logger.warning(f"⚠️ Permission error while loading image captioning pipeline: {pe}. Check cache directory permissions: {_CACHE_DIR}")
|
203 |
+
image_text_pipeline = None
|
204 |
except Exception as e:
|
205 |
logger.warning(f"⚠️ Could not load image captioning pipeline: {e}")
|
206 |
+
if "not a local folder and is not a valid model identifier" in str(e):
|
207 |
+
logger.warning("Model identifier invalid; ensure model name is correct or you have access.")
|
208 |
image_text_pipeline = None
|
209 |
else:
|
210 |
logger.warning("⚠️ Transformers not available, image processing disabled")
|
|
|
213 |
# Initialize tokenizer for better text handling
|
214 |
if transformers_available and AutoTokenizer:
|
215 |
try:
|
216 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
217 |
+
current_model,
|
218 |
+
use_auth_token=hf_token,
|
219 |
+
cache_dir=_CACHE_DIR
|
220 |
+
) # type: ignore
|
221 |
logger.info("✅ Tokenizer loaded successfully")
|
222 |
+
except PermissionError as pe:
|
223 |
+
logger.warning(f"⚠️ Permission error while loading tokenizer: {pe}. Check cache directory permissions: {_CACHE_DIR}")
|
224 |
+
tokenizer = None
|
225 |
+
except ValueError as ve:
|
226 |
+
logger.warning(f"⚠️ Could not load tokenizer: {ve}. If this is a private model, set HF_TOKEN env var to access it.")
|
227 |
+
tokenizer = None
|
228 |
except Exception as e:
|
229 |
logger.warning(f"⚠️ Could not load tokenizer: {e}")
|
230 |
tokenizer = None
|
231 |
else:
|
232 |
logger.info("⚠️ Tokenizer initialization skipped")
|
233 |
+
tokenizer = None
|
234 |
|
235 |
except Exception as e:
|
236 |
logger.error(f"❌ Failed to initialize inference client: {e}")
|