Spaces:
Sleeping
Sleeping
yonnel
commited on
Commit
·
b8ca8ae
1
Parent(s):
14e32e0
Enhance environment configuration; implement lazy initialization for vector updater and improve error handling in imports
Browse files- .env.example +1 -1
- app/main.py +2 -2
- app/routers/admin.py +20 -5
- app/services/vector_storage.py +6 -2
- app/services/vector_updater.py +14 -3
- app/settings.py +33 -1
- requirements.txt +1 -0
.env.example
CHANGED
@@ -14,7 +14,7 @@ ENV=dev
|
|
14 |
LOG_LEVEL=INFO
|
15 |
|
16 |
# Remove adult content from TMDB results
|
17 |
-
FILTER_ADULT_CONTENT=true
|
18 |
|
19 |
# Hugging Face configuration
|
20 |
HF_TOKEN=your_hf_token_here
|
|
|
14 |
LOG_LEVEL=INFO
|
15 |
|
16 |
# Remove adult content from TMDB results
|
17 |
+
FILTER_ADULT_CONTENT=true
|
18 |
|
19 |
# Hugging Face configuration
|
20 |
HF_TOKEN=your_hf_token_here
|
app/main.py
CHANGED
@@ -61,8 +61,6 @@ movie_metadata = None
|
|
61 |
|
62 |
def load_data():
|
63 |
"""Load FAISS index, vectors, and metadata on startup"""
|
64 |
-
global vectors, id_map, faiss_index, movie_metadata
|
65 |
-
|
66 |
try:
|
67 |
# Load vectors
|
68 |
vectors = np.load("app/data/movies.npy")
|
@@ -82,6 +80,8 @@ def load_data():
|
|
82 |
movie_metadata = json.load(f)
|
83 |
logger.info(f"Loaded metadata for {len(movie_metadata)} movies")
|
84 |
|
|
|
|
|
85 |
except Exception as e:
|
86 |
logger.error(f"Failed to load data: {e}")
|
87 |
raise
|
|
|
61 |
|
62 |
def load_data():
|
63 |
"""Load FAISS index, vectors, and metadata on startup"""
|
|
|
|
|
64 |
try:
|
65 |
# Load vectors
|
66 |
vectors = np.load("app/data/movies.npy")
|
|
|
80 |
movie_metadata = json.load(f)
|
81 |
logger.info(f"Loaded metadata for {len(movie_metadata)} movies")
|
82 |
|
83 |
+
return vectors, id_map, faiss_index, movie_metadata
|
84 |
+
|
85 |
except Exception as e:
|
86 |
logger.error(f"Failed to load data: {e}")
|
87 |
raise
|
app/routers/admin.py
CHANGED
@@ -10,14 +10,27 @@ from typing import Optional
|
|
10 |
try:
|
11 |
from ..services.vector_updater import VectorUpdater
|
12 |
except ImportError:
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
logger = logging.getLogger(__name__)
|
16 |
router = APIRouter(prefix="/admin", tags=["admin"])
|
17 |
security = HTTPBearer()
|
18 |
|
19 |
-
# Instance globale du updater
|
20 |
-
vector_updater =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
def verify_admin_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
23 |
"""Vérification du token admin"""
|
@@ -285,11 +298,12 @@ async def admin_dashboard():
|
|
285 |
@router.get("/status")
|
286 |
async def get_status(token: str = Depends(verify_admin_token)):
|
287 |
"""Obtenir le statut du système"""
|
288 |
-
return
|
289 |
|
290 |
@router.post("/update-vectors")
|
291 |
async def update_vectors(background_tasks: BackgroundTasks, token: str = Depends(verify_admin_token)):
|
292 |
"""Déclencher une mise à jour si nécessaire"""
|
|
|
293 |
if vector_updater.is_updating:
|
294 |
return {"success": False, "message": "Une mise à jour est déjà en cours"}
|
295 |
|
@@ -301,6 +315,7 @@ async def update_vectors(background_tasks: BackgroundTasks, token: str = Depends
|
|
301 |
@router.post("/force-update-vectors")
|
302 |
async def force_update_vectors(background_tasks: BackgroundTasks, token: str = Depends(verify_admin_token)):
|
303 |
"""Forcer la mise à jour des vecteurs"""
|
|
|
304 |
if vector_updater.is_updating:
|
305 |
return {"success": False, "message": "Une mise à jour est déjà en cours"}
|
306 |
|
@@ -313,7 +328,7 @@ async def force_update_vectors(background_tasks: BackgroundTasks, token: str = D
|
|
313 |
async def get_logs(token: str = Depends(verify_admin_token)):
|
314 |
"""Obtenir les logs de mise à jour"""
|
315 |
try:
|
316 |
-
logs =
|
317 |
return {"logs": logs}
|
318 |
except Exception as e:
|
319 |
return {"logs": [f"Erreur de lecture des logs: {e}"]}
|
|
|
10 |
try:
|
11 |
from ..services.vector_updater import VectorUpdater
|
12 |
except ImportError:
|
13 |
+
try:
|
14 |
+
from app.services.vector_updater import VectorUpdater
|
15 |
+
except ImportError:
|
16 |
+
# Import direct pour quand l'application est lancée depuis le répertoire racine
|
17 |
+
import sys
|
18 |
+
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
19 |
+
from services.vector_updater import VectorUpdater
|
20 |
|
21 |
logger = logging.getLogger(__name__)
|
22 |
router = APIRouter(prefix="/admin", tags=["admin"])
|
23 |
security = HTTPBearer()
|
24 |
|
25 |
+
# Instance globale du updater - créée de manière paresseuse pour éviter les erreurs d'import
|
26 |
+
vector_updater = None
|
27 |
+
|
28 |
+
def get_vector_updater():
|
29 |
+
"""Get vector updater instance (lazy initialization)"""
|
30 |
+
global vector_updater
|
31 |
+
if vector_updater is None:
|
32 |
+
vector_updater = VectorUpdater()
|
33 |
+
return vector_updater
|
34 |
|
35 |
def verify_admin_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
36 |
"""Vérification du token admin"""
|
|
|
298 |
@router.get("/status")
|
299 |
async def get_status(token: str = Depends(verify_admin_token)):
|
300 |
"""Obtenir le statut du système"""
|
301 |
+
return get_vector_updater().get_update_status()
|
302 |
|
303 |
@router.post("/update-vectors")
|
304 |
async def update_vectors(background_tasks: BackgroundTasks, token: str = Depends(verify_admin_token)):
|
305 |
"""Déclencher une mise à jour si nécessaire"""
|
306 |
+
vector_updater = get_vector_updater()
|
307 |
if vector_updater.is_updating:
|
308 |
return {"success": False, "message": "Une mise à jour est déjà en cours"}
|
309 |
|
|
|
315 |
@router.post("/force-update-vectors")
|
316 |
async def force_update_vectors(background_tasks: BackgroundTasks, token: str = Depends(verify_admin_token)):
|
317 |
"""Forcer la mise à jour des vecteurs"""
|
318 |
+
vector_updater = get_vector_updater()
|
319 |
if vector_updater.is_updating:
|
320 |
return {"success": False, "message": "Une mise à jour est déjà en cours"}
|
321 |
|
|
|
328 |
async def get_logs(token: str = Depends(verify_admin_token)):
|
329 |
"""Obtenir les logs de mise à jour"""
|
330 |
try:
|
331 |
+
logs = get_vector_updater().get_logs()
|
332 |
return {"logs": logs}
|
333 |
except Exception as e:
|
334 |
return {"logs": [f"Erreur de lecture des logs: {e}"]}
|
app/services/vector_storage.py
CHANGED
@@ -13,10 +13,11 @@ class HFVectorStorage:
|
|
13 |
def __init__(self):
|
14 |
self.hf_token = os.getenv('HF_TOKEN')
|
15 |
self.repo_name = os.getenv('HF_DATASET_REPO')
|
16 |
-
self.api = HfApi(token=self.hf_token)
|
17 |
|
18 |
-
# Créer le repo s'il n'existe pas
|
19 |
if self.hf_token and self.repo_name:
|
|
|
|
|
|
|
20 |
try:
|
21 |
create_repo(
|
22 |
repo_id=self.repo_name,
|
@@ -27,6 +28,9 @@ class HFVectorStorage:
|
|
27 |
)
|
28 |
except Exception as e:
|
29 |
logger.warning(f"Repo creation warning: {e}")
|
|
|
|
|
|
|
30 |
|
31 |
def save_vectors(self, embeddings: np.ndarray, movies_data: List[Dict],
|
32 |
id_map: Dict, metadata: Dict) -> bool:
|
|
|
13 |
def __init__(self):
|
14 |
self.hf_token = os.getenv('HF_TOKEN')
|
15 |
self.repo_name = os.getenv('HF_DATASET_REPO')
|
|
|
16 |
|
|
|
17 |
if self.hf_token and self.repo_name:
|
18 |
+
self.api = HfApi(token=self.hf_token)
|
19 |
+
|
20 |
+
# Créer le repo s'il n'existe pas
|
21 |
try:
|
22 |
create_repo(
|
23 |
repo_id=self.repo_name,
|
|
|
28 |
)
|
29 |
except Exception as e:
|
30 |
logger.warning(f"Repo creation warning: {e}")
|
31 |
+
else:
|
32 |
+
self.api = None
|
33 |
+
logger.warning("HF_TOKEN or HF_DATASET_REPO not configured")
|
34 |
|
35 |
def save_vectors(self, embeddings: np.ndarray, movies_data: List[Dict],
|
36 |
id_map: Dict, metadata: Dict) -> bool:
|
app/services/vector_updater.py
CHANGED
@@ -5,9 +5,20 @@ from typing import Optional, List
|
|
5 |
import os
|
6 |
import numpy as np
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
from .
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
logger = logging.getLogger(__name__)
|
13 |
|
|
|
5 |
import os
|
6 |
import numpy as np
|
7 |
|
8 |
+
# Import avec gestion d'erreurs pour compatibilité
|
9 |
+
try:
|
10 |
+
from .vector_storage import HFVectorStorage
|
11 |
+
from .tmdb_service import TMDBService
|
12 |
+
from .embedding_service import EmbeddingService
|
13 |
+
except ImportError:
|
14 |
+
try:
|
15 |
+
from app.services.vector_storage import HFVectorStorage
|
16 |
+
from app.services.tmdb_service import TMDBService
|
17 |
+
from app.services.embedding_service import EmbeddingService
|
18 |
+
except ImportError:
|
19 |
+
from services.vector_storage import HFVectorStorage
|
20 |
+
from services.tmdb_service import TMDBService
|
21 |
+
from services.embedding_service import EmbeddingService
|
22 |
|
23 |
logger = logging.getLogger(__name__)
|
24 |
|
app/settings.py
CHANGED
@@ -4,6 +4,7 @@ Settings and environment configuration
|
|
4 |
import os
|
5 |
from functools import lru_cache
|
6 |
from pydantic_settings import BaseSettings
|
|
|
7 |
|
8 |
|
9 |
class Settings(BaseSettings):
|
@@ -25,11 +26,42 @@ class Settings(BaseSettings):
|
|
25 |
log_level: str = "INFO"
|
26 |
|
27 |
# Filter adult content (True = exclude adult films, False = include all)
|
28 |
-
filter_adult_content:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
class Config:
|
31 |
env_file = ".env"
|
32 |
env_file_encoding = "utf-8"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
|
35 |
@lru_cache()
|
|
|
4 |
import os
|
5 |
from functools import lru_cache
|
6 |
from pydantic_settings import BaseSettings
|
7 |
+
from typing import Optional
|
8 |
|
9 |
|
10 |
class Settings(BaseSettings):
|
|
|
26 |
log_level: str = "INFO"
|
27 |
|
28 |
# Filter adult content (True = exclude adult films, False = include all)
|
29 |
+
filter_adult_content: Optional[str] = "true"
|
30 |
+
|
31 |
+
# Hugging Face configuration
|
32 |
+
hf_token: str = ""
|
33 |
+
hf_dataset_repo: str = ""
|
34 |
+
|
35 |
+
# Vector update configuration
|
36 |
+
auto_update_vectors: Optional[str] = "true"
|
37 |
+
update_interval_hours: int = 24
|
38 |
+
batch_size: int = 100
|
39 |
+
max_movies_limit: int = 10000
|
40 |
+
|
41 |
+
# Admin configuration
|
42 |
+
admin_token: str = ""
|
43 |
|
44 |
class Config:
|
45 |
env_file = ".env"
|
46 |
env_file_encoding = "utf-8"
|
47 |
+
|
48 |
+
@property
|
49 |
+
def filter_adult_content_bool(self) -> bool:
|
50 |
+
"""Parse filter_adult_content as boolean"""
|
51 |
+
if isinstance(self.filter_adult_content, str):
|
52 |
+
# Remove any comments and strip whitespace
|
53 |
+
value = self.filter_adult_content.split('#')[0].strip().lower()
|
54 |
+
return value in ('true', '1', 'yes', 'on')
|
55 |
+
return bool(self.filter_adult_content)
|
56 |
+
|
57 |
+
@property
|
58 |
+
def auto_update_vectors_bool(self) -> bool:
|
59 |
+
"""Parse auto_update_vectors as boolean"""
|
60 |
+
if isinstance(self.auto_update_vectors, str):
|
61 |
+
# Remove any comments and strip whitespace
|
62 |
+
value = self.auto_update_vectors.split('#')[0].strip().lower()
|
63 |
+
return value in ('true', '1', 'yes', 'on')
|
64 |
+
return bool(self.auto_update_vectors)
|
65 |
|
66 |
|
67 |
@lru_cache()
|
requirements.txt
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
fastapi==0.104.1
|
2 |
uvicorn==0.24.0
|
3 |
pydantic==2.5.0
|
|
|
4 |
numpy==1.24.3
|
5 |
faiss-cpu==1.7.4
|
6 |
openai==1.3.5
|
|
|
1 |
fastapi==0.104.1
|
2 |
uvicorn==0.24.0
|
3 |
pydantic==2.5.0
|
4 |
+
pydantic-settings==2.1.0
|
5 |
numpy==1.24.3
|
6 |
faiss-cpu==1.7.4
|
7 |
openai==1.3.5
|