Rsnarsna commited on
Commit
7def0b3
·
verified ·
1 Parent(s): 5657fb8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from llama_cpp import Llama
3
+ import os
4
+ import requests
5
+
6
+ app = FastAPI()
7
+
8
+ MODEL_URL = "https://huggingface.co/microsoft/phi-4-gguf/resolve/main/phi-4-q4.gguf"
9
+ MODEL_PATH = "model/phi-4-q4.gguf"
10
+
11
+ # Function to download the GGUF model
12
+ def download_model(url, path):
13
+ if not os.path.exists(path):
14
+ os.makedirs(os.path.dirname(path), exist_ok=True)
15
+ print(f"Downloading model from {url}...")
16
+ response = requests.get(url, stream=True)
17
+ if response.status_code == 200:
18
+ with open(path, "wb") as f:
19
+ for chunk in response.iter_content(chunk_size=1024):
20
+ if chunk:
21
+ f.write(chunk)
22
+ print("Download complete.")
23
+ else:
24
+ raise HTTPException(status_code=500, detail="Failed to download model.")
25
+ else:
26
+ print("Model already downloaded.")
27
+
28
+ # Download the model before loading
29
+ download_model(MODEL_URL, MODEL_PATH)
30
+
31
+ # Load the model
32
+ try:
33
+ model = Llama(model_path=MODEL_PATH)
34
+ print("Model Loaded Successfully")
35
+ except Exception as e:
36
+ raise HTTPException(status_code=500, detail=f"Model loading failed: {str(e)}")
37
+
38
+
39
+ @app.get("/")
40
+ def root():
41
+ return {"message": "Phi-4 GGUF Model Inference API"}
42
+
43
+ @app.post("/generate/")
44
+ def generate(prompt: str):
45
+ try:
46
+ output = model(prompt, max_tokens=200)
47
+ return {"response": output["choices"][0]["text"]}
48
+ except Exception as e:
49
+ raise HTTPException(status_code=500, detail=f"Inference failed: {str(e)}")