|
import os |
|
import time |
|
import json |
|
import asyncio |
|
from typing import List, Optional, Dict, Any, Union, Literal |
|
from pydantic import BaseModel, Field |
|
from fastapi import FastAPI, HTTPException, Request, Response |
|
from fastapi.middleware.cors import CORSMiddleware |
|
from sse_starlette.sse import EventSourceResponse |
|
from duckai import DuckAI |
|
|
|
|
|
SUPPORTED_MODELS = [ |
|
"gpt-4o-mini", |
|
"llama-3.3-70b", |
|
"claude-3-haiku", |
|
"o3-mini", |
|
"mistral-small-3" |
|
] |
|
|
|
app = FastAPI(title="DuckAI OpenAI Adapter API") |
|
|
|
|
|
app.add_middleware( |
|
CORSMiddleware, |
|
allow_origins=["*"], |
|
allow_credentials=True, |
|
allow_methods=["*"], |
|
allow_headers=["*"], |
|
) |
|
|
|
|
|
class Message(BaseModel): |
|
role: Literal["system", "user", "assistant"] |
|
content: str |
|
|
|
class ChatCompletionRequest(BaseModel): |
|
model: str |
|
messages: List[Message] |
|
temperature: Optional[float] = 1.0 |
|
max_tokens: Optional[int] = None |
|
stream: Optional[bool] = False |
|
|
|
class ChatCompletionResponse(BaseModel): |
|
id: str = Field(default_factory=lambda: f"chatcmpl-{os.urandom(12).hex()}") |
|
object: str = "chat.completion" |
|
created: int = Field(default_factory=lambda: int(time.time())) |
|
model: str |
|
choices: List[Dict[str, Any]] |
|
usage: Dict[str, int] |
|
|
|
def format_chat_history(messages: List[Message]) -> str: |
|
""" |
|
Chuyển đổi danh sách tin nhắn từ định dạng OpenAI sang định dạng |
|
mà DuckAI có thể xử lý (string với các dòng "user: " và "assistant: ") |
|
""" |
|
formatted_history = "" |
|
|
|
for message in messages: |
|
if message.role == "system": |
|
|
|
formatted_history += f"user: [SYSTEM] {message.content}\n" |
|
else: |
|
formatted_history += f"{message.role}: {message.content}\n" |
|
|
|
return formatted_history.strip() |
|
|
|
async def stream_response_character_by_character(content: str, request: Request, response_id: str, model: str): |
|
""" |
|
Generator để stream phản hồi theo từng ký tự với tốc độ phù hợp |
|
""" |
|
if await request.is_disconnected(): |
|
return |
|
|
|
|
|
data = { |
|
"id": response_id, |
|
"object": "chat.completion.chunk", |
|
"created": int(time.time()), |
|
"model": model, |
|
"choices": [ |
|
{ |
|
"index": 0, |
|
"delta": { |
|
"role": "assistant", |
|
}, |
|
"finish_reason": None |
|
} |
|
] |
|
} |
|
yield json.dumps(data) |
|
await asyncio.sleep(0.001) |
|
|
|
|
|
buffer = "" |
|
for char in content: |
|
if await request.is_disconnected(): |
|
break |
|
|
|
buffer += char |
|
|
|
|
|
|
|
if len(buffer) >= 3 or char in [' ', '\n', '.', '!', '?', ',']: |
|
data = { |
|
"id": response_id, |
|
"object": "chat.completion.chunk", |
|
"created": int(time.time()), |
|
"model": model, |
|
"choices": [ |
|
{ |
|
"index": 0, |
|
"delta": { |
|
"content": buffer |
|
}, |
|
"finish_reason": None |
|
} |
|
] |
|
} |
|
yield json.dumps(data) |
|
buffer = "" |
|
|
|
|
|
|
|
delay = 0.05 if char == '\n' else 0.01 |
|
await asyncio.sleep(delay) |
|
|
|
|
|
if buffer: |
|
data = { |
|
"id": response_id, |
|
"object": "chat.completion.chunk", |
|
"created": int(time.time()), |
|
"model": model, |
|
"choices": [ |
|
{ |
|
"index": 0, |
|
"delta": { |
|
"content": buffer |
|
}, |
|
"finish_reason": None |
|
} |
|
] |
|
} |
|
yield json.dumps(data) |
|
|
|
|
|
data = { |
|
"id": response_id, |
|
"object": "chat.completion.chunk", |
|
"created": int(time.time()), |
|
"model": model, |
|
"choices": [ |
|
{ |
|
"index": 0, |
|
"delta": {}, |
|
"finish_reason": "stop" |
|
} |
|
] |
|
} |
|
yield json.dumps(data) |
|
|
|
|
|
yield "[DONE]" |
|
|
|
@app.post("/v1/chat/completions") |
|
async def create_chat_completion(request: Request, response: Response): |
|
""" |
|
Endpoint tạo chat completion với khả năng streaming |
|
""" |
|
try: |
|
|
|
body = await request.json() |
|
|
|
|
|
completion_request = ChatCompletionRequest(**body) |
|
|
|
|
|
if completion_request.model not in SUPPORTED_MODELS: |
|
supported_models_str = ", ".join(SUPPORTED_MODELS) |
|
raise HTTPException( |
|
status_code=400, |
|
detail=f"Model '{completion_request.model}' không được hỗ trợ. Các models được hỗ trợ: {supported_models_str}" |
|
) |
|
|
|
|
|
chat_history = format_chat_history(completion_request.messages) |
|
|
|
|
|
response_id = f"chatcmpl-{os.urandom(12).hex()}" |
|
|
|
|
|
duck_response = DuckAI().chat(chat_history, model=completion_request.model) |
|
duck_response = duck_response.strip() |
|
|
|
|
|
if completion_request.stream: |
|
return EventSourceResponse( |
|
stream_response_character_by_character( |
|
duck_response, |
|
request, |
|
response_id, |
|
completion_request.model |
|
), |
|
media_type="text/event-stream" |
|
) |
|
|
|
|
|
response_data = { |
|
"id": response_id, |
|
"object": "chat.completion", |
|
"created": int(time.time()), |
|
"model": completion_request.model, |
|
"choices": [ |
|
{ |
|
"index": 0, |
|
"message": { |
|
"role": "assistant", |
|
"content": duck_response |
|
}, |
|
"finish_reason": "stop" |
|
} |
|
], |
|
"usage": { |
|
"prompt_tokens": len(chat_history) // 4, |
|
"completion_tokens": len(duck_response) // 4, |
|
"total_tokens": (len(chat_history) + len(duck_response)) // 4 |
|
} |
|
} |
|
|
|
return response_data |
|
|
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail=f"Error: {str(e)}") |
|
|
|
@app.get("/v1/models") |
|
async def list_models(): |
|
"""Endpoint để liệt kê các model được hỗ trợ""" |
|
models_data = [] |
|
|
|
for model_id in SUPPORTED_MODELS: |
|
models_data.append({ |
|
"id": model_id, |
|
"object": "model", |
|
"created": int(time.time()), |
|
"owned_by": "duckai" |
|
}) |
|
|
|
return { |
|
"object": "list", |
|
"data": models_data |
|
} |
|
|
|
@app.get("/") |
|
async def root(): |
|
return { |
|
"message": "DuckAI OpenAI Adapter API is running. Send requests to /v1/chat/completions", |
|
"supported_models": SUPPORTED_MODELS |
|
} |
|
|
|
if __name__ == "__main__": |
|
import uvicorn |
|
|
|
uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True) |