Spaces:

Rsnarsna
/

phi4-gguf-model

Runtime error

App Files Files

xet

Community

Rsnarsna commited on Feb 28

Commit

7def0b3

verified ·

1 Parent(s): 5657fb8

Create app.py

Browse files

Files changed (1) hide show

app.py +49 -0

app.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from fastapi import FastAPI, HTTPException
+from llama_cpp import Llama
+import os
+import requests
+app = FastAPI()
+MODEL_URL = "https://huggingface.co/microsoft/phi-4-gguf/resolve/main/phi-4-q4.gguf"
+MODEL_PATH = "model/phi-4-q4.gguf"
+# Function to download the GGUF model
+def download_model(url, path):
+    if not os.path.exists(path):
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        print(f"Downloading model from {url}...")
+        response = requests.get(url, stream=True)
+        if response.status_code == 200:
+            with open(path, "wb") as f:
+                for chunk in response.iter_content(chunk_size=1024):
+                    if chunk:
+                        f.write(chunk)
+            print("Download complete.")
+        else:
+            raise HTTPException(status_code=500, detail="Failed to download model.")
+    else:
+        print("Model already downloaded.")
+# Download the model before loading
+download_model(MODEL_URL, MODEL_PATH)
+# Load the model
+try:
+    model = Llama(model_path=MODEL_PATH)
+    print("Model Loaded Successfully")
+except Exception as e:
+    raise HTTPException(status_code=500, detail=f"Model loading failed: {str(e)}")
+@app.get("/")
+def root():
+    return {"message": "Phi-4 GGUF Model Inference API"}
+@app.post("/generate/")
+def generate(prompt: str):
+    try:
+        output = model(prompt, max_tokens=200)
+        return {"response": output["choices"][0]["text"]}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Inference failed: {str(e)}")