Spaces:
Sleeping
Sleeping
yonnel
commited on
Commit
·
b8ca8ae
1
Parent(s):
14e32e0
Enhance environment configuration; implement lazy initialization for vector updater and improve error handling in imports
Browse files- .env.example +1 -1
- app/main.py +2 -2
- app/routers/admin.py +20 -5
- app/services/vector_storage.py +6 -2
- app/services/vector_updater.py +14 -3
- app/settings.py +33 -1
- requirements.txt +1 -0
.env.example
CHANGED
|
@@ -14,7 +14,7 @@ ENV=dev
|
|
| 14 |
LOG_LEVEL=INFO
|
| 15 |
|
| 16 |
# Remove adult content from TMDB results
|
| 17 |
-
FILTER_ADULT_CONTENT=true
|
| 18 |
|
| 19 |
# Hugging Face configuration
|
| 20 |
HF_TOKEN=your_hf_token_here
|
|
|
|
| 14 |
LOG_LEVEL=INFO
|
| 15 |
|
| 16 |
# Remove adult content from TMDB results
|
| 17 |
+
FILTER_ADULT_CONTENT=true
|
| 18 |
|
| 19 |
# Hugging Face configuration
|
| 20 |
HF_TOKEN=your_hf_token_here
|
app/main.py
CHANGED
|
@@ -61,8 +61,6 @@ movie_metadata = None
|
|
| 61 |
|
| 62 |
def load_data():
|
| 63 |
"""Load FAISS index, vectors, and metadata on startup"""
|
| 64 |
-
global vectors, id_map, faiss_index, movie_metadata
|
| 65 |
-
|
| 66 |
try:
|
| 67 |
# Load vectors
|
| 68 |
vectors = np.load("app/data/movies.npy")
|
|
@@ -82,6 +80,8 @@ def load_data():
|
|
| 82 |
movie_metadata = json.load(f)
|
| 83 |
logger.info(f"Loaded metadata for {len(movie_metadata)} movies")
|
| 84 |
|
|
|
|
|
|
|
| 85 |
except Exception as e:
|
| 86 |
logger.error(f"Failed to load data: {e}")
|
| 87 |
raise
|
|
|
|
| 61 |
|
| 62 |
def load_data():
|
| 63 |
"""Load FAISS index, vectors, and metadata on startup"""
|
|
|
|
|
|
|
| 64 |
try:
|
| 65 |
# Load vectors
|
| 66 |
vectors = np.load("app/data/movies.npy")
|
|
|
|
| 80 |
movie_metadata = json.load(f)
|
| 81 |
logger.info(f"Loaded metadata for {len(movie_metadata)} movies")
|
| 82 |
|
| 83 |
+
return vectors, id_map, faiss_index, movie_metadata
|
| 84 |
+
|
| 85 |
except Exception as e:
|
| 86 |
logger.error(f"Failed to load data: {e}")
|
| 87 |
raise
|
app/routers/admin.py
CHANGED
|
@@ -10,14 +10,27 @@ from typing import Optional
|
|
| 10 |
try:
|
| 11 |
from ..services.vector_updater import VectorUpdater
|
| 12 |
except ImportError:
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
router = APIRouter(prefix="/admin", tags=["admin"])
|
| 17 |
security = HTTPBearer()
|
| 18 |
|
| 19 |
-
# Instance globale du updater
|
| 20 |
-
vector_updater =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
def verify_admin_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
| 23 |
"""Vérification du token admin"""
|
|
@@ -285,11 +298,12 @@ async def admin_dashboard():
|
|
| 285 |
@router.get("/status")
|
| 286 |
async def get_status(token: str = Depends(verify_admin_token)):
|
| 287 |
"""Obtenir le statut du système"""
|
| 288 |
-
return
|
| 289 |
|
| 290 |
@router.post("/update-vectors")
|
| 291 |
async def update_vectors(background_tasks: BackgroundTasks, token: str = Depends(verify_admin_token)):
|
| 292 |
"""Déclencher une mise à jour si nécessaire"""
|
|
|
|
| 293 |
if vector_updater.is_updating:
|
| 294 |
return {"success": False, "message": "Une mise à jour est déjà en cours"}
|
| 295 |
|
|
@@ -301,6 +315,7 @@ async def update_vectors(background_tasks: BackgroundTasks, token: str = Depends
|
|
| 301 |
@router.post("/force-update-vectors")
|
| 302 |
async def force_update_vectors(background_tasks: BackgroundTasks, token: str = Depends(verify_admin_token)):
|
| 303 |
"""Forcer la mise à jour des vecteurs"""
|
|
|
|
| 304 |
if vector_updater.is_updating:
|
| 305 |
return {"success": False, "message": "Une mise à jour est déjà en cours"}
|
| 306 |
|
|
@@ -313,7 +328,7 @@ async def force_update_vectors(background_tasks: BackgroundTasks, token: str = D
|
|
| 313 |
async def get_logs(token: str = Depends(verify_admin_token)):
|
| 314 |
"""Obtenir les logs de mise à jour"""
|
| 315 |
try:
|
| 316 |
-
logs =
|
| 317 |
return {"logs": logs}
|
| 318 |
except Exception as e:
|
| 319 |
return {"logs": [f"Erreur de lecture des logs: {e}"]}
|
|
|
|
| 10 |
try:
|
| 11 |
from ..services.vector_updater import VectorUpdater
|
| 12 |
except ImportError:
|
| 13 |
+
try:
|
| 14 |
+
from app.services.vector_updater import VectorUpdater
|
| 15 |
+
except ImportError:
|
| 16 |
+
# Import direct pour quand l'application est lancée depuis le répertoire racine
|
| 17 |
+
import sys
|
| 18 |
+
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
| 19 |
+
from services.vector_updater import VectorUpdater
|
| 20 |
|
| 21 |
logger = logging.getLogger(__name__)
|
| 22 |
router = APIRouter(prefix="/admin", tags=["admin"])
|
| 23 |
security = HTTPBearer()
|
| 24 |
|
| 25 |
+
# Instance globale du updater - créée de manière paresseuse pour éviter les erreurs d'import
|
| 26 |
+
vector_updater = None
|
| 27 |
+
|
| 28 |
+
def get_vector_updater():
|
| 29 |
+
"""Get vector updater instance (lazy initialization)"""
|
| 30 |
+
global vector_updater
|
| 31 |
+
if vector_updater is None:
|
| 32 |
+
vector_updater = VectorUpdater()
|
| 33 |
+
return vector_updater
|
| 34 |
|
| 35 |
def verify_admin_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
| 36 |
"""Vérification du token admin"""
|
|
|
|
| 298 |
@router.get("/status")
|
| 299 |
async def get_status(token: str = Depends(verify_admin_token)):
|
| 300 |
"""Obtenir le statut du système"""
|
| 301 |
+
return get_vector_updater().get_update_status()
|
| 302 |
|
| 303 |
@router.post("/update-vectors")
|
| 304 |
async def update_vectors(background_tasks: BackgroundTasks, token: str = Depends(verify_admin_token)):
|
| 305 |
"""Déclencher une mise à jour si nécessaire"""
|
| 306 |
+
vector_updater = get_vector_updater()
|
| 307 |
if vector_updater.is_updating:
|
| 308 |
return {"success": False, "message": "Une mise à jour est déjà en cours"}
|
| 309 |
|
|
|
|
| 315 |
@router.post("/force-update-vectors")
|
| 316 |
async def force_update_vectors(background_tasks: BackgroundTasks, token: str = Depends(verify_admin_token)):
|
| 317 |
"""Forcer la mise à jour des vecteurs"""
|
| 318 |
+
vector_updater = get_vector_updater()
|
| 319 |
if vector_updater.is_updating:
|
| 320 |
return {"success": False, "message": "Une mise à jour est déjà en cours"}
|
| 321 |
|
|
|
|
| 328 |
async def get_logs(token: str = Depends(verify_admin_token)):
|
| 329 |
"""Obtenir les logs de mise à jour"""
|
| 330 |
try:
|
| 331 |
+
logs = get_vector_updater().get_logs()
|
| 332 |
return {"logs": logs}
|
| 333 |
except Exception as e:
|
| 334 |
return {"logs": [f"Erreur de lecture des logs: {e}"]}
|
app/services/vector_storage.py
CHANGED
|
@@ -13,10 +13,11 @@ class HFVectorStorage:
|
|
| 13 |
def __init__(self):
|
| 14 |
self.hf_token = os.getenv('HF_TOKEN')
|
| 15 |
self.repo_name = os.getenv('HF_DATASET_REPO')
|
| 16 |
-
self.api = HfApi(token=self.hf_token)
|
| 17 |
|
| 18 |
-
# Créer le repo s'il n'existe pas
|
| 19 |
if self.hf_token and self.repo_name:
|
|
|
|
|
|
|
|
|
|
| 20 |
try:
|
| 21 |
create_repo(
|
| 22 |
repo_id=self.repo_name,
|
|
@@ -27,6 +28,9 @@ class HFVectorStorage:
|
|
| 27 |
)
|
| 28 |
except Exception as e:
|
| 29 |
logger.warning(f"Repo creation warning: {e}")
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
def save_vectors(self, embeddings: np.ndarray, movies_data: List[Dict],
|
| 32 |
id_map: Dict, metadata: Dict) -> bool:
|
|
|
|
| 13 |
def __init__(self):
|
| 14 |
self.hf_token = os.getenv('HF_TOKEN')
|
| 15 |
self.repo_name = os.getenv('HF_DATASET_REPO')
|
|
|
|
| 16 |
|
|
|
|
| 17 |
if self.hf_token and self.repo_name:
|
| 18 |
+
self.api = HfApi(token=self.hf_token)
|
| 19 |
+
|
| 20 |
+
# Créer le repo s'il n'existe pas
|
| 21 |
try:
|
| 22 |
create_repo(
|
| 23 |
repo_id=self.repo_name,
|
|
|
|
| 28 |
)
|
| 29 |
except Exception as e:
|
| 30 |
logger.warning(f"Repo creation warning: {e}")
|
| 31 |
+
else:
|
| 32 |
+
self.api = None
|
| 33 |
+
logger.warning("HF_TOKEN or HF_DATASET_REPO not configured")
|
| 34 |
|
| 35 |
def save_vectors(self, embeddings: np.ndarray, movies_data: List[Dict],
|
| 36 |
id_map: Dict, metadata: Dict) -> bool:
|
app/services/vector_updater.py
CHANGED
|
@@ -5,9 +5,20 @@ from typing import Optional, List
|
|
| 5 |
import os
|
| 6 |
import numpy as np
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
from .
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
|
|
|
| 5 |
import os
|
| 6 |
import numpy as np
|
| 7 |
|
| 8 |
+
# Import avec gestion d'erreurs pour compatibilité
|
| 9 |
+
try:
|
| 10 |
+
from .vector_storage import HFVectorStorage
|
| 11 |
+
from .tmdb_service import TMDBService
|
| 12 |
+
from .embedding_service import EmbeddingService
|
| 13 |
+
except ImportError:
|
| 14 |
+
try:
|
| 15 |
+
from app.services.vector_storage import HFVectorStorage
|
| 16 |
+
from app.services.tmdb_service import TMDBService
|
| 17 |
+
from app.services.embedding_service import EmbeddingService
|
| 18 |
+
except ImportError:
|
| 19 |
+
from services.vector_storage import HFVectorStorage
|
| 20 |
+
from services.tmdb_service import TMDBService
|
| 21 |
+
from services.embedding_service import EmbeddingService
|
| 22 |
|
| 23 |
logger = logging.getLogger(__name__)
|
| 24 |
|
app/settings.py
CHANGED
|
@@ -4,6 +4,7 @@ Settings and environment configuration
|
|
| 4 |
import os
|
| 5 |
from functools import lru_cache
|
| 6 |
from pydantic_settings import BaseSettings
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
class Settings(BaseSettings):
|
|
@@ -25,11 +26,42 @@ class Settings(BaseSettings):
|
|
| 25 |
log_level: str = "INFO"
|
| 26 |
|
| 27 |
# Filter adult content (True = exclude adult films, False = include all)
|
| 28 |
-
filter_adult_content:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
class Config:
|
| 31 |
env_file = ".env"
|
| 32 |
env_file_encoding = "utf-8"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
|
| 35 |
@lru_cache()
|
|
|
|
| 4 |
import os
|
| 5 |
from functools import lru_cache
|
| 6 |
from pydantic_settings import BaseSettings
|
| 7 |
+
from typing import Optional
|
| 8 |
|
| 9 |
|
| 10 |
class Settings(BaseSettings):
|
|
|
|
| 26 |
log_level: str = "INFO"
|
| 27 |
|
| 28 |
# Filter adult content (True = exclude adult films, False = include all)
|
| 29 |
+
filter_adult_content: Optional[str] = "true"
|
| 30 |
+
|
| 31 |
+
# Hugging Face configuration
|
| 32 |
+
hf_token: str = ""
|
| 33 |
+
hf_dataset_repo: str = ""
|
| 34 |
+
|
| 35 |
+
# Vector update configuration
|
| 36 |
+
auto_update_vectors: Optional[str] = "true"
|
| 37 |
+
update_interval_hours: int = 24
|
| 38 |
+
batch_size: int = 100
|
| 39 |
+
max_movies_limit: int = 10000
|
| 40 |
+
|
| 41 |
+
# Admin configuration
|
| 42 |
+
admin_token: str = ""
|
| 43 |
|
| 44 |
class Config:
|
| 45 |
env_file = ".env"
|
| 46 |
env_file_encoding = "utf-8"
|
| 47 |
+
|
| 48 |
+
@property
|
| 49 |
+
def filter_adult_content_bool(self) -> bool:
|
| 50 |
+
"""Parse filter_adult_content as boolean"""
|
| 51 |
+
if isinstance(self.filter_adult_content, str):
|
| 52 |
+
# Remove any comments and strip whitespace
|
| 53 |
+
value = self.filter_adult_content.split('#')[0].strip().lower()
|
| 54 |
+
return value in ('true', '1', 'yes', 'on')
|
| 55 |
+
return bool(self.filter_adult_content)
|
| 56 |
+
|
| 57 |
+
@property
|
| 58 |
+
def auto_update_vectors_bool(self) -> bool:
|
| 59 |
+
"""Parse auto_update_vectors as boolean"""
|
| 60 |
+
if isinstance(self.auto_update_vectors, str):
|
| 61 |
+
# Remove any comments and strip whitespace
|
| 62 |
+
value = self.auto_update_vectors.split('#')[0].strip().lower()
|
| 63 |
+
return value in ('true', '1', 'yes', 'on')
|
| 64 |
+
return bool(self.auto_update_vectors)
|
| 65 |
|
| 66 |
|
| 67 |
@lru_cache()
|
requirements.txt
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
fastapi==0.104.1
|
| 2 |
uvicorn==0.24.0
|
| 3 |
pydantic==2.5.0
|
|
|
|
| 4 |
numpy==1.24.3
|
| 5 |
faiss-cpu==1.7.4
|
| 6 |
openai==1.3.5
|
|
|
|
| 1 |
fastapi==0.104.1
|
| 2 |
uvicorn==0.24.0
|
| 3 |
pydantic==2.5.0
|
| 4 |
+
pydantic-settings==2.1.0
|
| 5 |
numpy==1.24.3
|
| 6 |
faiss-cpu==1.7.4
|
| 7 |
openai==1.3.5
|