Diamanta commited on
Commit
a028adc
·
verified ·
1 Parent(s): c2c02ae

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +7 -9
main.py CHANGED
@@ -1,13 +1,15 @@
1
- from fastapi import FastAPI, Request
2
- from llama_cpp import Llama
3
  from pydantic import BaseModel
4
  from typing import List
5
- import uvicorn
6
 
7
  app = FastAPI()
8
 
9
- # Load small model (e.g., Phi-2 or DeepSeek)
10
- llm = Llama(model_path="phi-2.Q4_K_M.gguf", n_ctx=2048, n_threads=2)
 
 
 
11
 
12
  class Message(BaseModel):
13
  role: str
@@ -18,7 +20,6 @@ class ChatRequest(BaseModel):
18
  messages: List[Message]
19
  temperature: float = 0.7
20
  max_tokens: int = 256
21
- stream: bool = False
22
 
23
  @app.post("/v1/chat/completions")
24
  async def chat_completions(req: ChatRequest):
@@ -40,6 +41,3 @@ async def chat_completions(req: ChatRequest):
40
  }],
41
  "model": req.model
42
  }
43
-
44
- if __name__ == "__main__":
45
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
1
+ from fastapi import FastAPI
 
2
  from pydantic import BaseModel
3
  from typing import List
4
+ from llama_cpp import Llama
5
 
6
  app = FastAPI()
7
 
8
+ llm = Llama(
9
+ model_path="phi-2.Q4_K_M.gguf",
10
+ n_ctx=2048,
11
+ n_threads=2
12
+ )
13
 
14
  class Message(BaseModel):
15
  role: str
 
20
  messages: List[Message]
21
  temperature: float = 0.7
22
  max_tokens: int = 256
 
23
 
24
  @app.post("/v1/chat/completions")
25
  async def chat_completions(req: ChatRequest):
 
41
  }],
42
  "model": req.model
43
  }