Spaces:
Runtime error
Runtime error
Update app/main.py
Browse files- app/main.py +8 -2
app/main.py
CHANGED
@@ -287,8 +287,14 @@ from huggingface_hub import hf_hub_download
|
|
287 |
from contextlib import asynccontextmanager
|
288 |
|
289 |
# === CONFIGURATION === #
|
|
|
|
|
|
|
|
|
|
|
290 |
MODEL_NAME = "FreedomIntelligence/Apollo-2B"
|
291 |
-
MODEL_FILE = "Apollo-
|
|
|
292 |
EMBEDDING_MODEL = "Omartificial-Intelligence-Space/GATE-AraBert-v1"
|
293 |
COLLECTION_NAME = "arabic_rag_collection"
|
294 |
QDRANT_URL = os.getenv("QDRANT_URL", "https://12efeef2-9f10-4402-9deb-f070977ddfc8.eu-central-1-0.aws.cloud.qdrant.io:6333")
|
@@ -328,7 +334,7 @@ async def lifespan(app: FastAPI):
|
|
328 |
|
329 |
# Load LLM model
|
330 |
model_path = hf_hub_download(
|
331 |
-
repo_id=
|
332 |
filename=MODEL_FILE,
|
333 |
local_dir="./models",
|
334 |
local_dir_use_symlinks=False
|
|
|
287 |
from contextlib import asynccontextmanager
|
288 |
|
289 |
# === CONFIGURATION === #
|
290 |
+
from llama_cpp import Llama
|
291 |
+
|
292 |
+
# REPO_ID = "FreedomIntelligence/Apollo-7B-GGUF"
|
293 |
+
REPO_ID = "RichardErkhov/FreedomIntelligence_-_Apollo-2B-gguf"
|
294 |
+
# MODEL_NAME = "FreedomIntelligence/Apollo-7B"
|
295 |
MODEL_NAME = "FreedomIntelligence/Apollo-2B"
|
296 |
+
# MODEL_FILE = "Apollo-7B.Q4_K_S.gguf"
|
297 |
+
MODEL_FILE = "Apollo-2B.IQ4_XS.gguf"
|
298 |
EMBEDDING_MODEL = "Omartificial-Intelligence-Space/GATE-AraBert-v1"
|
299 |
COLLECTION_NAME = "arabic_rag_collection"
|
300 |
QDRANT_URL = os.getenv("QDRANT_URL", "https://12efeef2-9f10-4402-9deb-f070977ddfc8.eu-central-1-0.aws.cloud.qdrant.io:6333")
|
|
|
334 |
|
335 |
# Load LLM model
|
336 |
model_path = hf_hub_download(
|
337 |
+
repo_id=REPO_ID,
|
338 |
filename=MODEL_FILE,
|
339 |
local_dir="./models",
|
340 |
local_dir_use_symlinks=False
|