Spaces:

Diamanta
/

JBAIP

Sleeping

Diamanta commited on Jun 1

Commit

a028adc

verified ·

1 Parent(s): c2c02ae

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -1,13 +1,15 @@
-from fastapi import FastAPI, Request
-from llama_cpp import Llama
 from pydantic import BaseModel
 from typing import List
-import uvicorn
 app = FastAPI()
-# Load small model (e.g., Phi-2 or DeepSeek)
-llm = Llama(model_path="phi-2.Q4_K_M.gguf", n_ctx=2048, n_threads=2)
 class Message(BaseModel):
     role: str
@@ -18,7 +20,6 @@ class ChatRequest(BaseModel):
     messages: List[Message]
     temperature: float = 0.7
     max_tokens: int = 256
-    stream: bool = False
 @app.post("/v1/chat/completions")
 async def chat_completions(req: ChatRequest):
@@ -40,6 +41,3 @@ async def chat_completions(req: ChatRequest):
         }],
         "model": req.model
     }
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)

+from fastapi import FastAPI
 from pydantic import BaseModel
 from typing import List
+from llama_cpp import Llama
 app = FastAPI()
+llm = Llama(
+    model_path="phi-2.Q4_K_M.gguf",
+    n_ctx=2048,
+    n_threads=2
+)
 class Message(BaseModel):
     role: str
     messages: List[Message]
     temperature: float = 0.7
     max_tokens: int = 256
 @app.post("/v1/chat/completions")
 async def chat_completions(req: ChatRequest):
         }],
         "model": req.model
     }