omaryasserhassan commited on
Commit
fc85eed
·
verified ·
1 Parent(s): e8ed38e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -10
app.py CHANGED
@@ -4,24 +4,43 @@ from pydantic import BaseModel
4
  from huggingface_hub import snapshot_download
5
  from llama_cpp import Llama
6
 
7
- # -------- Writable cache/weights dirs (HF Docker Spaces) --------
8
- HOME = os.path.expanduser("~") or "/home/user"
9
- BASE_DIR = os.getenv("SPACE_CACHE_DIR", os.path.join(HOME, ".cache"))
10
- HF_HOME = os.path.join(BASE_DIR, "huggingface")
11
- MODELS_DIR = os.path.join(BASE_DIR, "models")
 
 
 
 
 
 
 
 
 
 
12
 
13
- os.environ["HF_HOME"] = HF_HOME
14
- os.environ["HF_HUB_CACHE"] = os.path.join(HF_HOME, "hub")
 
 
 
 
 
 
15
  os.makedirs(HF_HOME, exist_ok=True)
16
  os.makedirs(MODELS_DIR, exist_ok=True)
17
 
 
 
 
18
  # ---- Model selection (override in Settings → Variables if needed) ----
19
  MODEL_REPO = os.getenv("MODEL_REPO", "Qwen/Qwen2.5-3B-Instruct-GGUF")
20
  MODEL_FILE = os.getenv("MODEL_FILE", "qwen2.5-3b-instruct-q4_k_m.gguf")
21
 
22
  # Inference knobs
23
- N_CTX = int(os.getenv("N_CTX", 2048))
24
- N_BATCH = int(os.getenv("N_BATCH", 64))
25
  N_THREADS = os.cpu_count() or 2
26
 
27
  app = FastAPI(title="Qwen Planner API (CPU)")
@@ -51,7 +70,7 @@ def ensure_model():
51
 
52
  @app.get("/healthz")
53
  def healthz():
54
- return {"status": "ok", "loaded": model_loaded, "model_file": MODEL_FILE}
55
 
56
  SYSTEM_PROMPT = "You are a concise assistant. Reply briefly in plain text."
57
 
 
4
  from huggingface_hub import snapshot_download
5
  from llama_cpp import Llama
6
 
7
+ # ---------- pick a writable cache dir ----------
8
+ def first_writable(paths):
9
+ for p in paths:
10
+ if not p:
11
+ continue
12
+ try:
13
+ os.makedirs(p, exist_ok=True)
14
+ testfile = os.path.join(p, ".write_test")
15
+ with open(testfile, "w") as f:
16
+ f.write("ok")
17
+ os.remove(testfile)
18
+ return p
19
+ except Exception:
20
+ continue
21
+ raise RuntimeError("No writable cache dir found")
22
 
23
+ CACHE_BASE = first_writable([
24
+ os.getenv("SPACE_CACHE_DIR"), # optional env override
25
+ "/app/.cache", # WORKDIR is usually writable on Spaces
26
+ "/tmp/app_cache", # always writable fallback
27
+ ])
28
+
29
+ HF_HOME = os.path.join(CACHE_BASE, "huggingface")
30
+ MODELS_DIR = os.path.join(CACHE_BASE, "models")
31
  os.makedirs(HF_HOME, exist_ok=True)
32
  os.makedirs(MODELS_DIR, exist_ok=True)
33
 
34
+ os.environ["HF_HOME"] = HF_HOME
35
+ os.environ["HF_HUB_CACHE"] = os.path.join(HF_HOME, "hub")
36
+
37
  # ---- Model selection (override in Settings → Variables if needed) ----
38
  MODEL_REPO = os.getenv("MODEL_REPO", "Qwen/Qwen2.5-3B-Instruct-GGUF")
39
  MODEL_FILE = os.getenv("MODEL_FILE", "qwen2.5-3b-instruct-q4_k_m.gguf")
40
 
41
  # Inference knobs
42
+ N_CTX = int(os.getenv("N_CTX", 2048))
43
+ N_BATCH = int(os.getenv("N_BATCH", 64))
44
  N_THREADS = os.cpu_count() or 2
45
 
46
  app = FastAPI(title="Qwen Planner API (CPU)")
 
70
 
71
  @app.get("/healthz")
72
  def healthz():
73
+ return {"status": "ok", "loaded": model_loaded, "cache_base": CACHE_BASE, "model_file": MODEL_FILE}
74
 
75
  SYSTEM_PROMPT = "You are a concise assistant. Reply briefly in plain text."
76