File size: 2,403 Bytes
7a4b76e
14cd2f6
9edc2f1
7a4b76e
 
08606b0
5bf1b3e
 
7a4b76e
14cd2f6
7a4b76e
14cd2f6
7a4b76e
14cd2f6
 
 
 
 
 
7a4b76e
 
 
 
 
14cd2f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a4b76e
 
 
 
 
 
14cd2f6
9edc2f1
 
14cd2f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
745d3f1
7a4b76e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import time
from fastapi import FastAPI, Request, HTTPException
from pydantic import BaseModel
from duckai import DuckAI
import uvicorn

app = FastAPI()

API_PREFIX = "/"

# Middleware for logging request time
@app.middleware("http")
async def log_process_time(request: Request, call_next):
    start_time = time.time()
    response = await call_next(request)
    process_time = time.time() - start_time
    print(f"{request.method} {response.status_code} {request.url.path} {process_time*1000:.2f} ms")
    return response

# Request body model
class ChatCompletionRequest(BaseModel):
    model: str
    messages: list[dict]

@app.get("/")
async def root():
    return {"message": "API server running"}

@app.get("/ping")
async def ping():
    return {"message": "pong"}

@app.get(f"{API_PREFIX}v1/models")
async def get_models():
    return {
        "object": "list",
        "data": [
            {"id": "gpt-4o-mini", "object": "model", "owned_by": "ddg"},
            {"id": "claude-3-haiku", "object": "model", "owned_by": "ddg"},
            {"id": "llama-3.1-70b", "object": "model", "owned_by": "ddg"},
            {"id": "mixtral-8x7b", "object": "model", "owned_by": "ddg"},
            {"id": "o3-mini", "object": "model", "owned_by": "ddg"},
        ],
    }

@app.post(f"{API_PREFIX}v1/chat/completions")
async def chat_completions(request: ChatCompletionRequest):
    try:
        # Only using DuckAI directly
        content = " ".join([msg.get("content", "") for msg in request.messages])
        duck = DuckAI()
        results = duck.chat(content, model=request.model)
        response = create_complete_response(results, request.model)
        return response

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

def create_complete_response(text: str, model: str) -> dict:
    """Create a complete non-streaming response"""
    return {
        "id": "chatcmpl-123",
        "object": "chat.completion",
        "created": int(time.time()),
        "model": model,
        "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
        "choices": [
            {
                "message": {"content": text, "role": "assistant"},
                "index": 0,
                "finish_reason": "stop",
            },
        ],
    }

if __name__ == "__main__":
    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)