|
from fastapi import FastAPI, Request |
|
from fastapi.responses import StreamingResponse, JSONResponse |
|
from pydantic import BaseModel |
|
from typing import List, Optional |
|
import time |
|
import json |
|
import httpx |
|
from models import AVAILABLE_MODELS, MODEL_ALIASES |
|
|
|
app = FastAPI() |
|
|
|
def unix_id(): |
|
return str(int(time.time() * 1000)) |
|
|
|
class Message(BaseModel): |
|
role: str |
|
content: str |
|
|
|
class ChatRequest(BaseModel): |
|
messages: List[Message] |
|
model: str |
|
stream: Optional[bool] = False |
|
|
|
@app.get("/v1/models") |
|
async def list_models(): |
|
return {"object": "list", "data": AVAILABLE_MODELS} |
|
|
|
@app.post("/v1/chat/completions") |
|
async def chat_completion(request: ChatRequest): |
|
model_id = MODEL_ALIASES.get(request.model, request.model) |
|
|
|
headers = { |
|
'accept': 'text/event-stream', |
|
'content-type': 'application/json', |
|
'origin': 'https://www.chatwithmono.xyz', |
|
'referer': 'https://www.chatwithmono.xyz/', |
|
'user-agent': 'Mozilla/5.0', |
|
} |
|
|
|
payload = { |
|
"messages": [{"role": msg.role, "content": msg.content} for msg in request.messages], |
|
"model": model_id |
|
} |
|
|
|
if request.stream: |
|
async def event_stream(): |
|
chat_id = f"chatcmpl-{unix_id()}" |
|
created = int(time.time()) |
|
sent_done = False |
|
|
|
async with httpx.AsyncClient(timeout=120) as client: |
|
async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload) as response: |
|
async for line in response.aiter_lines(): |
|
if line.startswith("0:"): |
|
try: |
|
content_piece = json.loads(line[2:]) |
|
chunk_data = { |
|
"id": chat_id, |
|
"object": "chat.completion.chunk", |
|
"created": created, |
|
"model": model_id, |
|
"choices": [{ |
|
"delta": {"content": content_piece}, |
|
"index": 0, |
|
"finish_reason": None |
|
}] |
|
} |
|
yield f"data: {json.dumps(chunk_data)}\n\n" |
|
except: |
|
continue |
|
elif line.startswith(("e:", "d:")) and not sent_done: |
|
sent_done = True |
|
done_chunk = { |
|
"id": chat_id, |
|
"object": "chat.completion.chunk", |
|
"created": created, |
|
"model": model_id, |
|
"choices": [{ |
|
"delta": {}, |
|
"index": 0, |
|
"finish_reason": "stop" |
|
}] |
|
} |
|
yield f"data: {json.dumps(done_chunk)}\n\ndata: [DONE]\n\n" |
|
return StreamingResponse(event_stream(), media_type="text/event-stream") |
|
|
|
else: |
|
assistant_response = "" |
|
usage_info = {} |
|
|
|
async with httpx.AsyncClient(timeout=120) as client: |
|
async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload) as response: |
|
async for chunk in response.aiter_lines(): |
|
if chunk.startswith("0:"): |
|
try: |
|
piece = json.loads(chunk[2:]) |
|
assistant_response += piece |
|
except: |
|
continue |
|
elif chunk.startswith(("e:", "d:")): |
|
try: |
|
data = json.loads(chunk[2:]) |
|
usage_info = data.get("usage", {}) |
|
except: |
|
continue |
|
|
|
return JSONResponse(content={ |
|
"id": f"chatcmpl-{unix_id()}", |
|
"object": "chat.completion", |
|
"created": int(time.time()), |
|
"model": model_id, |
|
"choices": [{ |
|
"index": 0, |
|
"message": { |
|
"role": "assistant", |
|
"content": assistant_response |
|
}, |
|
"finish_reason": "stop" |
|
}], |
|
"usage": { |
|
"prompt_tokens": usage_info.get("promptTokens", 0), |
|
"completion_tokens": usage_info.get("completionTokens", 0), |
|
"total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0), |
|
} |
|
}) |
|
|