File size: 3,394 Bytes
7c9f89e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse, JSONResponse
from pydantic import BaseModel
import requests
import time
import json
from typing import List, Optional
from models import AVAILABLE_MODELS
app = FastAPI()
def unix_id():
return str(int(time.time() * 1000))
class Message(BaseModel):
role: str
content: str
class ChatRequest(BaseModel):
messages: List[Message]
model: str
stream: Optional[bool] = False
@app.get("/v1/models")
async def list_models():
return {"object": "list", "data": AVAILABLE_MODELS}
@app.post("/v1/chat/completions")
async def chat_completion(request: ChatRequest):
headers = {
'accept': 'text/event-stream',
'content-type': 'application/json',
'origin': 'https://www.chatwithmono.xyz',
'referer': 'https://www.chatwithmono.xyz/',
'user-agent': 'Mozilla/5.0',
}
payload = {
"messages": [{"role": msg.role, "content": msg.content} for msg in request.messages],
"model": request.model
}
if request.stream:
def event_stream():
with requests.post(
"https://www.chatwithmono.xyz/api/chat",
headers=headers,
json=payload,
stream=True,
timeout=120
) as response:
for line in response.iter_lines(decode_unicode=True):
if line.startswith("0:"):
piece = line[2:]
yield f"data: {json.dumps({'choices': [{'delta': {'content': piece}, 'finish_reason': None}]})}\n\n"
elif line.startswith(("e:", "d:")):
yield "data: [DONE]\n\n"
return StreamingResponse(event_stream(), media_type="text/event-stream")
else:
assistant_response = ""
usage_info = {}
with requests.post(
"https://www.chatwithmono.xyz/api/chat",
headers=headers,
json=payload,
stream=True,
timeout=120
) as response:
for chunk in response.iter_lines(decode_unicode=True):
if chunk.startswith("0:"):
try:
piece = json.loads(chunk[2:])
assistant_response += piece
except:
continue
elif chunk.startswith(("e:", "d:")):
try:
data = json.loads(chunk[2:])
usage_info = data.get("usage", {})
except:
continue
return JSONResponse(content={
"id": f"chatcmpl-{unix_id()}",
"object": "chat.completion",
"created": int(time.time()),
"model": request.model,
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": assistant_response
},
"finish_reason": "stop"
}],
"usage": {
"prompt_tokens": usage_info.get("promptTokens", 0),
"completion_tokens": usage_info.get("completionTokens", 0),
"total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0),
}
})
|