from fastapi import FastAPI, Request from fastapi.responses import StreamingResponse, JSONResponse from pydantic import BaseModel from typing import List, Optional import time import json import httpx from models import AVAILABLE_MODELS, MODEL_ALIASES app = FastAPI() def unix_id(): return str(int(time.time() * 1000)) class Message(BaseModel): role: str content: str class ChatRequest(BaseModel): messages: List[Message] model: str stream: Optional[bool] = False @app.get("/v1/models") async def list_models(): return {"object": "list", "data": AVAILABLE_MODELS} @app.post("/v1/chat/completions") async def chat_completion(request: ChatRequest): model_id = MODEL_ALIASES.get(request.model, request.model) headers = { 'accept': 'text/event-stream', 'content-type': 'application/json', 'origin': 'https://www.chatwithmono.xyz', 'referer': 'https://www.chatwithmono.xyz/', 'user-agent': 'Mozilla/5.0', } payload = { "messages": [{"role": msg.role, "content": msg.content} for msg in request.messages], "model": model_id } if request.stream: async def event_stream(): chat_id = f"chatcmpl-{unix_id()}" created = int(time.time()) sent_done = False async with httpx.AsyncClient(timeout=120) as client: async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload) as response: async for line in response.aiter_lines(): if line.startswith("0:"): try: content_piece = json.loads(line[2:]) chunk_data = { "id": chat_id, "object": "chat.completion.chunk", "created": created, "model": model_id, "choices": [{ "delta": {"content": content_piece}, "index": 0, "finish_reason": None }] } yield f"data: {json.dumps(chunk_data)}\n\n" except: continue elif line.startswith(("e:", "d:")) and not sent_done: sent_done = True done_chunk = { "id": chat_id, "object": "chat.completion.chunk", "created": created, "model": model_id, "choices": [{ "delta": {}, "index": 0, "finish_reason": "stop" }] } yield f"data: {json.dumps(done_chunk)}\n\ndata: [DONE]\n\n" return StreamingResponse(event_stream(), media_type="text/event-stream") else: assistant_response = "" usage_info = {} async with httpx.AsyncClient(timeout=120) as client: async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload) as response: async for chunk in response.aiter_lines(): if chunk.startswith("0:"): try: piece = json.loads(chunk[2:]) assistant_response += piece except: continue elif chunk.startswith(("e:", "d:")): try: data = json.loads(chunk[2:]) usage_info = data.get("usage", {}) except: continue return JSONResponse(content={ "id": f"chatcmpl-{unix_id()}", "object": "chat.completion", "created": int(time.time()), "model": model_id, "choices": [{ "index": 0, "message": { "role": "assistant", "content": assistant_response }, "finish_reason": "stop" }], "usage": { "prompt_tokens": usage_info.get("promptTokens", 0), "completion_tokens": usage_info.get("completionTokens", 0), "total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0), } })