Spaces:
Paused
Paused
correction NLTK
Browse files- Dockerfile +9 -14
- app.py +4 -2
- patches/llama_patch.py +20 -0
Dockerfile
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
-
# Image de base
|
2 |
FROM python:3.10-slim
|
3 |
|
4 |
-
# Dépendances système
|
5 |
RUN apt-get update && apt-get install -y \
|
6 |
build-essential \
|
7 |
cmake \
|
@@ -11,26 +11,21 @@ RUN apt-get update && apt-get install -y \
|
|
11 |
curl \
|
12 |
&& rm -rf /var/lib/apt/lists/*
|
13 |
|
14 |
-
#
|
15 |
WORKDIR /code
|
16 |
|
17 |
-
# Copier les
|
18 |
COPY requirements.txt .
|
19 |
-
|
20 |
-
# Installer les dépendances Python
|
21 |
RUN pip install --no-cache-dir -r requirements.txt
|
22 |
|
23 |
-
#
|
24 |
-
RUN mkdir -p /tmp/nltk_data
|
25 |
-
|
26 |
-
# ✅ Télécharger punkt AVANT le lancement de l'app
|
27 |
-
RUN python -m nltk.downloader -d /tmp/nltk_data punkt
|
28 |
|
29 |
-
# Copier le
|
30 |
COPY . .
|
31 |
|
32 |
-
# Exposer le port
|
33 |
EXPOSE 7860
|
34 |
|
35 |
-
#
|
36 |
CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
|
|
1 |
+
# 🐍 Image de base
|
2 |
FROM python:3.10-slim
|
3 |
|
4 |
+
# 🧱 Dépendances système
|
5 |
RUN apt-get update && apt-get install -y \
|
6 |
build-essential \
|
7 |
cmake \
|
|
|
11 |
curl \
|
12 |
&& rm -rf /var/lib/apt/lists/*
|
13 |
|
14 |
+
# 📁 Dossier de travail
|
15 |
WORKDIR /code
|
16 |
|
17 |
+
# 📝 Copier les requirements et installer les dépendances
|
18 |
COPY requirements.txt .
|
|
|
|
|
19 |
RUN pip install --no-cache-dir -r requirements.txt
|
20 |
|
21 |
+
# 📦 Préparer le cache NLTK
|
22 |
+
RUN mkdir -p /tmp/nltk_data && python -m nltk.downloader -d /tmp/nltk_data punkt
|
|
|
|
|
|
|
23 |
|
24 |
+
# 📁 Copier tout le code
|
25 |
COPY . .
|
26 |
|
27 |
+
# 📤 Exposer le port Streamlit
|
28 |
EXPOSE 7860
|
29 |
|
30 |
+
# 🚀 Lancer l'application (le patch doit être dans app.py AVANT l'import llama_index)
|
31 |
CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
app.py
CHANGED
@@ -7,10 +7,12 @@ from huggingface_hub import hf_hub_download
|
|
7 |
import time
|
8 |
|
9 |
|
10 |
-
|
11 |
os.environ["NLTK_DATA"] = "/tmp/nltk_data"
|
12 |
|
13 |
-
|
|
|
|
|
14 |
|
15 |
logger = logging.getLogger("Streamlit")
|
16 |
logger.setLevel(logging.INFO)
|
|
|
7 |
import time
|
8 |
|
9 |
|
10 |
+
import os
|
11 |
os.environ["NLTK_DATA"] = "/tmp/nltk_data"
|
12 |
|
13 |
+
# Appliquer le patch avant tout import de llama_index
|
14 |
+
from patches.llama_patch import patch_llamaindex_nltk
|
15 |
+
patch_llamaindex_nltk()
|
16 |
|
17 |
logger = logging.getLogger("Streamlit")
|
18 |
logger.setLevel(logging.INFO)
|
patches/llama_patch.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import nltk
|
3 |
+
|
4 |
+
# Forcer le cache dans /tmp
|
5 |
+
NLTK_CACHE_DIR = "/tmp/nltk_data"
|
6 |
+
|
7 |
+
def patch_llamaindex_nltk():
|
8 |
+
try:
|
9 |
+
from llama_index.core.utils import GlobalsHelper
|
10 |
+
class PatchedGlobalsHelper(GlobalsHelper):
|
11 |
+
def __init__(self):
|
12 |
+
# Rediriger vers /tmp
|
13 |
+
self._nltk_data_dir = NLTK_CACHE_DIR
|
14 |
+
# Télécharger punkt si nécessaire
|
15 |
+
try:
|
16 |
+
nltk.data.find("tokenizers/punkt")
|
17 |
+
except LookupError:
|
18 |
+
nltk.download("punkt", download_dir=self._nltk_data_dir)
|
19 |
+
except Exception as e:
|
20 |
+
print("[patch_llamaindex_nltk] Failed to patch:", e)
|