khalednabawi11 commited on
Commit
ae828fb
·
verified ·
1 Parent(s): 73b0971

Update app/main.py

Browse files
Files changed (1) hide show
  1. app/main.py +8 -2
app/main.py CHANGED
@@ -287,8 +287,14 @@ from huggingface_hub import hf_hub_download
287
  from contextlib import asynccontextmanager
288
 
289
  # === CONFIGURATION === #
 
 
 
 
 
290
  MODEL_NAME = "FreedomIntelligence/Apollo-2B"
291
- MODEL_FILE = "Apollo-2B.Q4_K_S.gguf"
 
292
  EMBEDDING_MODEL = "Omartificial-Intelligence-Space/GATE-AraBert-v1"
293
  COLLECTION_NAME = "arabic_rag_collection"
294
  QDRANT_URL = os.getenv("QDRANT_URL", "https://12efeef2-9f10-4402-9deb-f070977ddfc8.eu-central-1-0.aws.cloud.qdrant.io:6333")
@@ -328,7 +334,7 @@ async def lifespan(app: FastAPI):
328
 
329
  # Load LLM model
330
  model_path = hf_hub_download(
331
- repo_id="FreedomIntelligence/Apollo-7B-GGUF",
332
  filename=MODEL_FILE,
333
  local_dir="./models",
334
  local_dir_use_symlinks=False
 
287
  from contextlib import asynccontextmanager
288
 
289
  # === CONFIGURATION === #
290
+ from llama_cpp import Llama
291
+
292
+ # REPO_ID = "FreedomIntelligence/Apollo-7B-GGUF"
293
+ REPO_ID = "RichardErkhov/FreedomIntelligence_-_Apollo-2B-gguf"
294
+ # MODEL_NAME = "FreedomIntelligence/Apollo-7B"
295
  MODEL_NAME = "FreedomIntelligence/Apollo-2B"
296
+ # MODEL_FILE = "Apollo-7B.Q4_K_S.gguf"
297
+ MODEL_FILE = "Apollo-2B.IQ4_XS.gguf"
298
  EMBEDDING_MODEL = "Omartificial-Intelligence-Space/GATE-AraBert-v1"
299
  COLLECTION_NAME = "arabic_rag_collection"
300
  QDRANT_URL = os.getenv("QDRANT_URL", "https://12efeef2-9f10-4402-9deb-f070977ddfc8.eu-central-1-0.aws.cloud.qdrant.io:6333")
 
334
 
335
  # Load LLM model
336
  model_path = hf_hub_download(
337
+ repo_id=REPO_ID,
338
  filename=MODEL_FILE,
339
  local_dir="./models",
340
  local_dir_use_symlinks=False