Spaces:

omaryasserhassan
/

qwen-planner-api

Sleeping

App Files Files Community

omaryasserhassan commited on 14 days ago

Commit

04c30fa

verified ·

1 Parent(s): dd38c20

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -16

app.py CHANGED Viewed

@@ -5,21 +5,14 @@ from typing import List, Optional
 from huggingface_hub import hf_hub_download
 from ctransformers import AutoModelForCausalLM
-# ------------------------
-# Model configuration
-# ------------------------
 REPO_ID = "bartowski/Llama-3.2-3B-Instruct-GGUF"
 FILENAME = "Llama-3.2-3B-Instruct-Q4_K_L.gguf"
 MODEL_TYPE = "llama"
-# ------------------------
-# Persistent cache (Docker Spaces -> /data)
-# ------------------------
 CACHE_DIR = os.environ.get("HUGGINGFACE_HUB_CACHE", "/data/hf_cache")
 os.makedirs(CACHE_DIR, exist_ok=True)
 app = FastAPI(title="Llama 3.2 3B Instruct (ctransformers)")
 _model = None
 def get_model():
@@ -27,7 +20,6 @@ def get_model():
     if _model is not None:
         return _model
-    # Download exact GGUF file to persistent cache
     local_path = hf_hub_download(
         repo_id=REPO_ID,
         filename=FILENAME,
@@ -35,12 +27,11 @@ def get_model():
         local_dir_use_symlinks=False,
     )
-    # Load with ctransformers (CPU by default)
     _model = AutoModelForCausalLM.from_pretrained(
         model_path_or_repo_id=os.path.dirname(local_path),
         model_file=os.path.basename(local_path),
         model_type=MODEL_TYPE,
-        gpu_layers=int(os.environ.get("GPU_LAYERS", "0")),  # set >0 on GPU Spaces
         context_length=int(os.environ.get("CTX_LEN", "4096")),
     )
     return _model
@@ -59,12 +50,7 @@ class GenerateOut(BaseModel):
 @app.get("/")
 def health():
-    return {
-        "status": "ok",
-        "model": {"repo_id": REPO_ID, "filename": FILENAME, "type": MODEL_TYPE},
-        "cache_dir": CACHE_DIR,
-        "endpoints": {"POST /generate": "Generate a completion"},
-    }
 @app.post("/generate", response_model=GenerateOut)
 def generate(body: GenerateIn):

 from huggingface_hub import hf_hub_download
 from ctransformers import AutoModelForCausalLM
 REPO_ID = "bartowski/Llama-3.2-3B-Instruct-GGUF"
 FILENAME = "Llama-3.2-3B-Instruct-Q4_K_L.gguf"
 MODEL_TYPE = "llama"
 CACHE_DIR = os.environ.get("HUGGINGFACE_HUB_CACHE", "/data/hf_cache")
 os.makedirs(CACHE_DIR, exist_ok=True)
 app = FastAPI(title="Llama 3.2 3B Instruct (ctransformers)")
 _model = None
 def get_model():
     if _model is not None:
         return _model
     local_path = hf_hub_download(
         repo_id=REPO_ID,
         filename=FILENAME,
         local_dir_use_symlinks=False,
     )
     _model = AutoModelForCausalLM.from_pretrained(
         model_path_or_repo_id=os.path.dirname(local_path),
         model_file=os.path.basename(local_path),
         model_type=MODEL_TYPE,
+        gpu_layers=int(os.environ.get("GPU_LAYERS", "0")),
         context_length=int(os.environ.get("CTX_LEN", "4096")),
     )
     return _model
 @app.get("/")
 def health():
+    return {"status": "ok", "cache_dir": CACHE_DIR}
 @app.post("/generate", response_model=GenerateOut)
 def generate(body: GenerateIn):