Create main.py
Browse files
main.py
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, Request
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
from pydantic import BaseModel, Field
|
4 |
+
from typing import List, Optional, Literal, Dict, Any, Union
|
5 |
+
import uvicorn
|
6 |
+
from duckai import DuckAI
|
7 |
+
|
8 |
+
app = FastAPI(title="DuckAI OpenAI Compatible API")
|
9 |
+
|
10 |
+
# Add CORS middleware
|
11 |
+
app.add_middleware(
|
12 |
+
CORSMiddleware,
|
13 |
+
allow_origins=["*"],
|
14 |
+
allow_credentials=True,
|
15 |
+
allow_methods=["*"],
|
16 |
+
allow_headers=["*"],
|
17 |
+
)
|
18 |
+
|
19 |
+
# Models for the OpenAI-compatible API
|
20 |
+
class Message(BaseModel):
|
21 |
+
role: Literal["system", "user", "assistant"]
|
22 |
+
content: str
|
23 |
+
|
24 |
+
class ChatCompletionRequest(BaseModel):
|
25 |
+
model: str
|
26 |
+
messages: List[Message]
|
27 |
+
temperature: Optional[float] = 1.0
|
28 |
+
max_tokens: Optional[int] = None
|
29 |
+
stream: Optional[bool] = False
|
30 |
+
|
31 |
+
class ChatCompletionChoice(BaseModel):
|
32 |
+
index: int = 0
|
33 |
+
message: Message
|
34 |
+
finish_reason: str = "stop"
|
35 |
+
|
36 |
+
class ChatCompletionUsage(BaseModel):
|
37 |
+
prompt_tokens: int
|
38 |
+
completion_tokens: int
|
39 |
+
total_tokens: int
|
40 |
+
|
41 |
+
class ChatCompletionResponse(BaseModel):
|
42 |
+
id: str
|
43 |
+
object: str = "chat.completion"
|
44 |
+
created: int
|
45 |
+
model: str
|
46 |
+
choices: List[ChatCompletionChoice]
|
47 |
+
usage: ChatCompletionUsage
|
48 |
+
|
49 |
+
# DuckAI Parser
|
50 |
+
class DuckAIParser:
|
51 |
+
@staticmethod
|
52 |
+
def parse_conversation_history(messages: List[Message]) -> str:
|
53 |
+
"""
|
54 |
+
Convert OpenAI message format to DuckAI's expected format with user: and assistant: prefixes
|
55 |
+
"""
|
56 |
+
# Skip system messages as they aren't part of the core conversation
|
57 |
+
conversation = []
|
58 |
+
|
59 |
+
for msg in messages:
|
60 |
+
if msg.role != "system":
|
61 |
+
conversation.append(f"{msg.role}: {msg.content}")
|
62 |
+
|
63 |
+
return "\n".join(conversation)
|
64 |
+
|
65 |
+
@staticmethod
|
66 |
+
def estimate_tokens(text: str) -> int:
|
67 |
+
"""
|
68 |
+
Estimate token count in text - rough approximation
|
69 |
+
"""
|
70 |
+
# Very simple estimation - about 4 characters per token on average
|
71 |
+
return len(text) // 4
|
72 |
+
|
73 |
+
@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
|
74 |
+
async def create_chat_completion(request: ChatCompletionRequest):
|
75 |
+
try:
|
76 |
+
# Parse the conversation history into DuckAI format
|
77 |
+
conversation_text = DuckAIParser.parse_conversation_history(request.messages)
|
78 |
+
|
79 |
+
# Call DuckAI with the formatted conversation
|
80 |
+
duck_ai = DuckAI()
|
81 |
+
result = duck_ai.chat(conversation_text, model=request.model)
|
82 |
+
|
83 |
+
# Extract the assistant's response
|
84 |
+
assistant_response = result.strip()
|
85 |
+
|
86 |
+
# Estimate token usage
|
87 |
+
prompt_tokens = DuckAIParser.estimate_tokens(conversation_text)
|
88 |
+
completion_tokens = DuckAIParser.estimate_tokens(assistant_response)
|
89 |
+
|
90 |
+
# Create OpenAI-compatible response
|
91 |
+
response = ChatCompletionResponse(
|
92 |
+
id=f"chatcmpl-duck-{hash(conversation_text) % 10000}",
|
93 |
+
created=int(__import__('time').time()),
|
94 |
+
model=request.model,
|
95 |
+
choices=[
|
96 |
+
ChatCompletionChoice(
|
97 |
+
message=Message(
|
98 |
+
role="assistant",
|
99 |
+
content=assistant_response
|
100 |
+
)
|
101 |
+
)
|
102 |
+
],
|
103 |
+
usage=ChatCompletionUsage(
|
104 |
+
prompt_tokens=prompt_tokens,
|
105 |
+
completion_tokens=completion_tokens,
|
106 |
+
total_tokens=prompt_tokens + completion_tokens
|
107 |
+
)
|
108 |
+
)
|
109 |
+
|
110 |
+
return response
|
111 |
+
|
112 |
+
except Exception as e:
|
113 |
+
raise HTTPException(status_code=500, detail=str(e))
|
114 |
+
|
115 |
+
@app.get("/v1/models")
|
116 |
+
async def list_models():
|
117 |
+
"""Return a list of available models"""
|
118 |
+
current_time = int(__import__('time').time())
|
119 |
+
return {
|
120 |
+
"object": "list",
|
121 |
+
"data": [
|
122 |
+
{
|
123 |
+
"id": "gpt-4o-mini",
|
124 |
+
"object": "model",
|
125 |
+
"created": current_time,
|
126 |
+
"owned_by": "DuckAI"
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"id": "llama-3.3-70b",
|
130 |
+
"object": "model",
|
131 |
+
"created": current_time,
|
132 |
+
"owned_by": "DuckAI"
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"id": "claude-3-haiku",
|
136 |
+
"object": "model",
|
137 |
+
"created": current_time,
|
138 |
+
"owned_by": "DuckAI"
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"id": "o3-mini",
|
142 |
+
"object": "model",
|
143 |
+
"created": current_time,
|
144 |
+
"owned_by": "DuckAI"
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"id": "mistral-small-3",
|
148 |
+
"object": "model",
|
149 |
+
"created": current_time,
|
150 |
+
"owned_by": "DuckAI"
|
151 |
+
}
|
152 |
+
]
|
153 |
+
}
|
154 |
+
|
155 |
+
# Adding a simple root endpoint for health check
|
156 |
+
@app.get("/")
|
157 |
+
async def root():
|
158 |
+
return {"status": "ok", "message": "DuckAI OpenAI Compatible API is running"}
|
159 |
+
|
160 |
+
if __name__ == "__main__":
|
161 |
+
uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)
|