wynai commited on
Commit
64ed020
·
verified ·
1 Parent(s): ff58f93

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +156 -163
main.py CHANGED
@@ -1,17 +1,26 @@
1
- from fastapi import FastAPI, HTTPException, Request, Response
2
- from fastapi.middleware.cors import CORSMiddleware
3
- from pydantic import BaseModel, Field
4
- from typing import List, Optional, Literal, Dict, Any, Union, Generator
5
- import uvicorn
6
  import time
7
- import asyncio
8
  import json
9
- import re
 
 
 
 
 
10
  from duckai import DuckAI
11
 
12
- app = FastAPI(title="DuckAI OpenAI Compatible API")
 
 
 
 
 
 
 
 
 
13
 
14
- # Add CORS middleware
15
  app.add_middleware(
16
  CORSMiddleware,
17
  allow_origins=["*"],
@@ -20,7 +29,7 @@ app.add_middleware(
20
  allow_headers=["*"],
21
  )
22
 
23
- # Models for the OpenAI-compatible API
24
  class Message(BaseModel):
25
  role: Literal["system", "user", "assistant"]
26
  content: str
@@ -31,196 +40,180 @@ class ChatCompletionRequest(BaseModel):
31
  temperature: Optional[float] = 1.0
32
  max_tokens: Optional[int] = None
33
  stream: Optional[bool] = False
34
-
35
- class ChatCompletionChoice(BaseModel):
36
- index: int = 0
37
- message: Message
38
- finish_reason: str = "stop"
39
-
40
- class ChatCompletionUsage(BaseModel):
41
- prompt_tokens: int
42
- completion_tokens: int
43
- total_tokens: int
44
-
45
  class ChatCompletionResponse(BaseModel):
46
- id: str
47
  object: str = "chat.completion"
48
- created: int
49
  model: str
50
- choices: List[ChatCompletionChoice]
51
- usage: ChatCompletionUsage
52
 
53
- # DuckAI Parser
54
- class DuckAIParser:
55
- @staticmethod
56
- def parse_conversation_history(messages: List[Message]) -> str:
57
- """
58
- Convert OpenAI message format to DuckAI's expected format with user: and assistant: prefixes
59
- """
60
- # Skip system messages as they aren't part of the core conversation
61
- conversation = []
62
-
63
- for msg in messages:
64
- if msg.role != "system":
65
- conversation.append(f"{msg.role}: {msg.content}")
66
-
67
- return "\n".join(conversation)
68
 
69
- @staticmethod
70
- def estimate_tokens(text: str) -> int:
71
- """
72
- Estimate token count in text - rough approximation
73
- """
74
- # Very simple estimation - about 4 characters per token on average
75
- return len(text) // 4
76
 
77
- @staticmethod
78
- def stream_response(response_text: str, request_id: str, model: str) -> Generator[str, None, None]:
79
- """
80
- Stream the response with simulated typing effect
81
- """
82
- # Split by words to simulate streaming
83
- words = re.findall(r'\S+\s*', response_text)
84
- accumulated_text = ""
85
- chunk_id = 0
86
-
87
- for word in words:
88
- accumulated_text += word
89
-
90
- # Create the delta message structure (OpenAI compatible)
91
- delta_data = {
92
- "id": request_id,
93
- "object": "chat.completion.chunk",
94
- "created": int(time.time()),
95
- "model": model,
96
- "choices": [
97
- {
98
- "index": 0,
99
- "delta": {"content": word},
100
- "finish_reason": None
101
- }
102
- ]
103
- }
104
-
105
- yield f"data: {json.dumps(delta_data)}\n\n"
106
 
107
- # Sleep for 0.06 seconds between words
108
- time.sleep(0.06)
109
 
110
- # Send the final message with finish_reason
111
- end_data = {
112
- "id": request_id,
113
  "object": "chat.completion.chunk",
114
  "created": int(time.time()),
115
  "model": model,
116
  "choices": [
117
  {
118
  "index": 0,
119
- "delta": {},
120
- "finish_reason": "stop"
 
 
121
  }
122
  ]
123
  }
124
 
125
- yield f"data: {json.dumps(end_data)}\n\n"
126
- yield "data: [DONE]\n\n"
 
 
127
 
128
  @app.post("/v1/chat/completions")
129
- async def create_chat_completion(request: ChatCompletionRequest, response: Response):
 
 
 
130
  try:
131
- # Generate a request ID
132
- request_id = f"chatcmpl-duck-{hash(str(request.messages)) % 10000}"
133
 
134
- # Parse the conversation history into DuckAI format
135
- conversation_text = DuckAIParser.parse_conversation_history(request.messages)
136
 
137
- # Call DuckAI with the formatted conversation
138
- duck_ai = DuckAI()
139
- result = duck_ai.chat(conversation_text, model=request.model)
 
 
 
 
 
 
 
140
 
141
- # Clean up the response
142
- assistant_response = result.strip()
143
 
144
- # Estimate token usage
145
- prompt_tokens = DuckAIParser.estimate_tokens(conversation_text)
146
- completion_tokens = DuckAIParser.estimate_tokens(assistant_response)
147
 
148
- # Handle streaming if requested
149
- if request.stream:
150
- response.headers["Content-Type"] = "text/event-stream"
151
- return DuckAIParser.stream_response(
152
- assistant_response,
153
- request_id,
154
- request.model
 
 
 
155
  )
156
 
157
- # Regular response (non-streaming)
158
- return ChatCompletionResponse(
159
- id=request_id,
160
- created=int(time.time()),
161
- model=request.model,
162
- choices=[
163
- ChatCompletionChoice(
164
- message=Message(
165
- role="assistant",
166
- content=assistant_response
167
- )
168
- )
 
 
 
169
  ],
170
- usage=ChatCompletionUsage(
171
- prompt_tokens=prompt_tokens,
172
- completion_tokens=completion_tokens,
173
- total_tokens=prompt_tokens + completion_tokens
174
- )
175
- )
176
-
 
 
177
  except Exception as e:
178
- raise HTTPException(status_code=500, detail=str(e))
179
 
180
  @app.get("/v1/models")
181
  async def list_models():
182
- """Return a list of available models"""
183
- current_time = int(time.time())
 
 
 
 
 
 
 
 
 
184
  return {
185
  "object": "list",
186
- "data": [
187
- {
188
- "id": "gpt-4o-mini",
189
- "object": "model",
190
- "created": current_time,
191
- "owned_by": "DuckAI"
192
- },
193
- {
194
- "id": "llama-3.3-70b",
195
- "object": "model",
196
- "created": current_time,
197
- "owned_by": "DuckAI"
198
- },
199
- {
200
- "id": "claude-3-haiku",
201
- "object": "model",
202
- "created": current_time,
203
- "owned_by": "DuckAI"
204
- },
205
- {
206
- "id": "o3-mini",
207
- "object": "model",
208
- "created": current_time,
209
- "owned_by": "DuckAI"
210
- },
211
- {
212
- "id": "mistral-small-3",
213
- "object": "model",
214
- "created": current_time,
215
- "owned_by": "DuckAI"
216
- }
217
- ]
218
  }
219
 
220
- # Health check endpoint
221
- @app.get("/health")
222
- async def health_check():
223
- return {"status": "ok"}
 
 
224
 
225
  if __name__ == "__main__":
 
 
226
  uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)
 
1
+ import os
 
 
 
 
2
  import time
 
3
  import json
4
+ import asyncio
5
+ from typing import List, Optional, Dict, Any, Union, Literal
6
+ from pydantic import BaseModel, Field
7
+ from fastapi import FastAPI, HTTPException, Request, Response
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ from sse_starlette.sse import EventSourceResponse
10
  from duckai import DuckAI
11
 
12
+ # Danh sách các model được hỗ trợ
13
+ SUPPORTED_MODELS = [
14
+ "gpt-4o-mini",
15
+ "llama-3.3-70b",
16
+ "claude-3-haiku",
17
+ "o3-mini",
18
+ "mistral-small-3"
19
+ ]
20
+
21
+ app = FastAPI(title="DuckAI OpenAI Adapter API")
22
 
23
+ # Thêm CORS middleware
24
  app.add_middleware(
25
  CORSMiddleware,
26
  allow_origins=["*"],
 
29
  allow_headers=["*"],
30
  )
31
 
32
+ # Models cho OpenAI API format
33
  class Message(BaseModel):
34
  role: Literal["system", "user", "assistant"]
35
  content: str
 
40
  temperature: Optional[float] = 1.0
41
  max_tokens: Optional[int] = None
42
  stream: Optional[bool] = False
43
+
 
 
 
 
 
 
 
 
 
 
44
  class ChatCompletionResponse(BaseModel):
45
+ id: str = Field(default_factory=lambda: f"chatcmpl-{os.urandom(12).hex()}")
46
  object: str = "chat.completion"
47
+ created: int = Field(default_factory=lambda: int(time.time()))
48
  model: str
49
+ choices: List[Dict[str, Any]]
50
+ usage: Dict[str, int]
51
 
52
+ def format_chat_history(messages: List[Message]) -> str:
53
+ """
54
+ Chuyển đổi danh sách tin nhắn từ định dạng OpenAI sang định dạng
55
+ DuckAI có thể xử lý (string với các dòng "user: " "assistant: ")
56
+ """
57
+ formatted_history = ""
 
 
 
 
 
 
 
 
 
58
 
59
+ for message in messages:
60
+ if message.role == "system":
61
+ # Xử lý tin nhắn system như một tin nhắn user đặc biệt
62
+ formatted_history += f"user: [SYSTEM] {message.content}\n"
63
+ else:
64
+ formatted_history += f"{message.role}: {message.content}\n"
 
65
 
66
+ return formatted_history.strip()
67
+
68
+ def generate_streaming_chunks(text: str, words_per_chunk: int = 2, delay: float = 0.06):
69
+ """
70
+ Mô phỏng streaming bằng cách chia văn bản thành các chunk
71
+
72
+ Args:
73
+ text: Văn bản cần chia
74
+ words_per_chunk: Số từ trong mỗi chunk
75
+ delay: Thời gian trễ giữa các chunk (giây)
76
+ """
77
+ words = text.split()
78
+ chunks = []
79
+
80
+ for i in range(0, len(words), words_per_chunk):
81
+ chunk = ' '.join(words[i:i+words_per_chunk])
82
+ chunks.append(chunk)
83
+
84
+ return chunks, delay
85
+
86
+ async def stream_response(content: str, request: Request, response_id: str, model: str):
87
+ """
88
+ Generator để stream phản hồi theo từng chunk nhỏ
89
+ """
90
+ chunks, delay = generate_streaming_chunks(content)
91
+
92
+ for i, chunk in enumerate(chunks):
93
+ if await request.is_disconnected():
94
+ break
95
 
96
+ # Đợi một khoảng thời gian để tạo hiệu ứng streaming
97
+ await asyncio.sleep(delay)
98
 
99
+ # Format dữ liệu theo chuẩn SSE của OpenAI
100
+ data = {
101
+ "id": response_id,
102
  "object": "chat.completion.chunk",
103
  "created": int(time.time()),
104
  "model": model,
105
  "choices": [
106
  {
107
  "index": 0,
108
+ "delta": {
109
+ "content": chunk + (" " if i < len(chunks) - 1 else "")
110
+ },
111
+ "finish_reason": None if i < len(chunks) - 1 else "stop"
112
  }
113
  ]
114
  }
115
 
116
+ yield json.dumps(data)
117
+
118
+ # Thêm [DONE] để đánh dấu kết thúc stream (theo chuẩn OpenAI)
119
+ yield "[DONE]"
120
 
121
  @app.post("/v1/chat/completions")
122
+ async def create_chat_completion(request: Request, response: Response):
123
+ """
124
+ Endpoint tạo chat completion với khả năng streaming
125
+ """
126
  try:
127
+ # Đọc request body
128
+ body = await request.json()
129
 
130
+ # Tạo request object từ body
131
+ completion_request = ChatCompletionRequest(**body)
132
 
133
+ # Kiểm tra model
134
+ if completion_request.model not in SUPPORTED_MODELS:
135
+ supported_models_str = ", ".join(SUPPORTED_MODELS)
136
+ raise HTTPException(
137
+ status_code=400,
138
+ detail=f"Model '{completion_request.model}' không được hỗ trợ. Các models được hỗ trợ: {supported_models_str}"
139
+ )
140
+
141
+ # Chuyển đổi danh sách tin nhắn sang định dạng DuckAI
142
+ chat_history = format_chat_history(completion_request.messages)
143
 
144
+ # Tạo ID phản hồi
145
+ response_id = f"chatcmpl-{os.urandom(12).hex()}"
146
 
147
+ # Gọi DuckAI API (không hỗ trợ streaming nên chúng ta sẽ lấy toàn bộ phản hồi)
148
+ duck_response = DuckAI().chat(chat_history, model=completion_request.model)
149
+ duck_response = duck_response.strip()
150
 
151
+ # Nếu request yêu cầu streaming
152
+ if completion_request.stream:
153
+ return EventSourceResponse(
154
+ stream_response(
155
+ duck_response,
156
+ request,
157
+ response_id,
158
+ completion_request.model
159
+ ),
160
+ media_type="text/event-stream"
161
  )
162
 
163
+ # Trả về phản hồi thông thường (không streaming)
164
+ response_data = {
165
+ "id": response_id,
166
+ "object": "chat.completion",
167
+ "created": int(time.time()),
168
+ "model": completion_request.model,
169
+ "choices": [
170
+ {
171
+ "index": 0,
172
+ "message": {
173
+ "role": "assistant",
174
+ "content": duck_response
175
+ },
176
+ "finish_reason": "stop"
177
+ }
178
  ],
179
+ "usage": {
180
+ "prompt_tokens": len(chat_history) // 4, # Ước tính đơn giản
181
+ "completion_tokens": len(duck_response) // 4,
182
+ "total_tokens": (len(chat_history) + len(duck_response)) // 4
183
+ }
184
+ }
185
+
186
+ return response_data
187
+
188
  except Exception as e:
189
+ raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
190
 
191
  @app.get("/v1/models")
192
  async def list_models():
193
+ """Endpoint để liệt các model được hỗ trợ"""
194
+ models_data = []
195
+
196
+ for model_id in SUPPORTED_MODELS:
197
+ models_data.append({
198
+ "id": model_id,
199
+ "object": "model",
200
+ "created": int(time.time()),
201
+ "owned_by": "duckai"
202
+ })
203
+
204
  return {
205
  "object": "list",
206
+ "data": models_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  }
208
 
209
+ @app.get("/")
210
+ async def root():
211
+ return {
212
+ "message": "DuckAI OpenAI Adapter API is running. Send requests to /v1/chat/completions",
213
+ "supported_models": SUPPORTED_MODELS
214
+ }
215
 
216
  if __name__ == "__main__":
217
+ import uvicorn
218
+ # Cấu hình port và host cho Hugging Face Spaces
219
  uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)