Update main.py
Browse files
main.py
CHANGED
@@ -1,8 +1,12 @@
|
|
1 |
-
from fastapi import FastAPI, HTTPException, Request
|
2 |
from fastapi.middleware.cors import CORSMiddleware
|
3 |
from pydantic import BaseModel, Field
|
4 |
-
from typing import List, Optional, Literal, Dict, Any, Union
|
5 |
import uvicorn
|
|
|
|
|
|
|
|
|
6 |
from duckai import DuckAI
|
7 |
|
8 |
app = FastAPI(title="DuckAI OpenAI Compatible API")
|
@@ -69,10 +73,64 @@ class DuckAIParser:
|
|
69 |
"""
|
70 |
# Very simple estimation - about 4 characters per token on average
|
71 |
return len(text) // 4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
-
@app.post("/v1/chat/completions"
|
74 |
-
async def create_chat_completion(request: ChatCompletionRequest):
|
75 |
try:
|
|
|
|
|
|
|
76 |
# Parse the conversation history into DuckAI format
|
77 |
conversation_text = DuckAIParser.parse_conversation_history(request.messages)
|
78 |
|
@@ -80,17 +138,26 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
80 |
duck_ai = DuckAI()
|
81 |
result = duck_ai.chat(conversation_text, model=request.model)
|
82 |
|
83 |
-
#
|
84 |
assistant_response = result.strip()
|
85 |
|
86 |
# Estimate token usage
|
87 |
prompt_tokens = DuckAIParser.estimate_tokens(conversation_text)
|
88 |
completion_tokens = DuckAIParser.estimate_tokens(assistant_response)
|
89 |
|
90 |
-
#
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
model=request.model,
|
95 |
choices=[
|
96 |
ChatCompletionChoice(
|
@@ -106,8 +173,6 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
106 |
total_tokens=prompt_tokens + completion_tokens
|
107 |
)
|
108 |
)
|
109 |
-
|
110 |
-
return response
|
111 |
|
112 |
except Exception as e:
|
113 |
raise HTTPException(status_code=500, detail=str(e))
|
@@ -115,7 +180,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
115 |
@app.get("/v1/models")
|
116 |
async def list_models():
|
117 |
"""Return a list of available models"""
|
118 |
-
current_time = int(
|
119 |
return {
|
120 |
"object": "list",
|
121 |
"data": [
|
@@ -152,10 +217,10 @@ async def list_models():
|
|
152 |
]
|
153 |
}
|
154 |
|
155 |
-
#
|
156 |
-
@app.get("/")
|
157 |
-
async def
|
158 |
-
return {"status": "ok"
|
159 |
|
160 |
if __name__ == "__main__":
|
161 |
uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, Request, Response
|
2 |
from fastapi.middleware.cors import CORSMiddleware
|
3 |
from pydantic import BaseModel, Field
|
4 |
+
from typing import List, Optional, Literal, Dict, Any, Union, Generator
|
5 |
import uvicorn
|
6 |
+
import time
|
7 |
+
import asyncio
|
8 |
+
import json
|
9 |
+
import re
|
10 |
from duckai import DuckAI
|
11 |
|
12 |
app = FastAPI(title="DuckAI OpenAI Compatible API")
|
|
|
73 |
"""
|
74 |
# Very simple estimation - about 4 characters per token on average
|
75 |
return len(text) // 4
|
76 |
+
|
77 |
+
@staticmethod
|
78 |
+
def stream_response(response_text: str, request_id: str, model: str) -> Generator[str, None, None]:
|
79 |
+
"""
|
80 |
+
Stream the response with simulated typing effect
|
81 |
+
"""
|
82 |
+
# Split by words to simulate streaming
|
83 |
+
words = re.findall(r'\S+\s*', response_text)
|
84 |
+
accumulated_text = ""
|
85 |
+
chunk_id = 0
|
86 |
+
|
87 |
+
for word in words:
|
88 |
+
accumulated_text += word
|
89 |
+
|
90 |
+
# Create the delta message structure (OpenAI compatible)
|
91 |
+
delta_data = {
|
92 |
+
"id": request_id,
|
93 |
+
"object": "chat.completion.chunk",
|
94 |
+
"created": int(time.time()),
|
95 |
+
"model": model,
|
96 |
+
"choices": [
|
97 |
+
{
|
98 |
+
"index": 0,
|
99 |
+
"delta": {"content": word},
|
100 |
+
"finish_reason": None
|
101 |
+
}
|
102 |
+
]
|
103 |
+
}
|
104 |
+
|
105 |
+
yield f"data: {json.dumps(delta_data)}\n\n"
|
106 |
+
|
107 |
+
# Sleep for 0.06 seconds between words
|
108 |
+
time.sleep(0.06)
|
109 |
+
|
110 |
+
# Send the final message with finish_reason
|
111 |
+
end_data = {
|
112 |
+
"id": request_id,
|
113 |
+
"object": "chat.completion.chunk",
|
114 |
+
"created": int(time.time()),
|
115 |
+
"model": model,
|
116 |
+
"choices": [
|
117 |
+
{
|
118 |
+
"index": 0,
|
119 |
+
"delta": {},
|
120 |
+
"finish_reason": "stop"
|
121 |
+
}
|
122 |
+
]
|
123 |
+
}
|
124 |
+
|
125 |
+
yield f"data: {json.dumps(end_data)}\n\n"
|
126 |
+
yield "data: [DONE]\n\n"
|
127 |
|
128 |
+
@app.post("/v1/chat/completions")
|
129 |
+
async def create_chat_completion(request: ChatCompletionRequest, response: Response):
|
130 |
try:
|
131 |
+
# Generate a request ID
|
132 |
+
request_id = f"chatcmpl-duck-{hash(str(request.messages)) % 10000}"
|
133 |
+
|
134 |
# Parse the conversation history into DuckAI format
|
135 |
conversation_text = DuckAIParser.parse_conversation_history(request.messages)
|
136 |
|
|
|
138 |
duck_ai = DuckAI()
|
139 |
result = duck_ai.chat(conversation_text, model=request.model)
|
140 |
|
141 |
+
# Clean up the response
|
142 |
assistant_response = result.strip()
|
143 |
|
144 |
# Estimate token usage
|
145 |
prompt_tokens = DuckAIParser.estimate_tokens(conversation_text)
|
146 |
completion_tokens = DuckAIParser.estimate_tokens(assistant_response)
|
147 |
|
148 |
+
# Handle streaming if requested
|
149 |
+
if request.stream:
|
150 |
+
response.headers["Content-Type"] = "text/event-stream"
|
151 |
+
return DuckAIParser.stream_response(
|
152 |
+
assistant_response,
|
153 |
+
request_id,
|
154 |
+
request.model
|
155 |
+
)
|
156 |
+
|
157 |
+
# Regular response (non-streaming)
|
158 |
+
return ChatCompletionResponse(
|
159 |
+
id=request_id,
|
160 |
+
created=int(time.time()),
|
161 |
model=request.model,
|
162 |
choices=[
|
163 |
ChatCompletionChoice(
|
|
|
173 |
total_tokens=prompt_tokens + completion_tokens
|
174 |
)
|
175 |
)
|
|
|
|
|
176 |
|
177 |
except Exception as e:
|
178 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
180 |
@app.get("/v1/models")
|
181 |
async def list_models():
|
182 |
"""Return a list of available models"""
|
183 |
+
current_time = int(time.time())
|
184 |
return {
|
185 |
"object": "list",
|
186 |
"data": [
|
|
|
217 |
]
|
218 |
}
|
219 |
|
220 |
+
# Health check endpoint
|
221 |
+
@app.get("/health")
|
222 |
+
async def health_check():
|
223 |
+
return {"status": "ok"}
|
224 |
|
225 |
if __name__ == "__main__":
|
226 |
uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)
|