duckai

Running

App Files Files Community

wynai commited on May 13

Commit

64ed020

verified ·

1 Parent(s): ff58f93

Update main.py

Browse files

Files changed (1) hide show

main.py +156 -163

main.py CHANGED Viewed

@@ -1,17 +1,26 @@
-from fastapi import FastAPI, HTTPException, Request, Response
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel, Field
-from typing import List, Optional, Literal, Dict, Any, Union, Generator
-import uvicorn
 import time
-import asyncio
 import json
-import re
 from duckai import DuckAI
-app = FastAPI(title="DuckAI OpenAI Compatible API")
-# Add CORS middleware
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -20,7 +29,7 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Models for the OpenAI-compatible API
 class Message(BaseModel):
     role: Literal["system", "user", "assistant"]
     content: str
@@ -31,196 +40,180 @@ class ChatCompletionRequest(BaseModel):
     temperature: Optional[float] = 1.0
     max_tokens: Optional[int] = None
     stream: Optional[bool] = False
-class ChatCompletionChoice(BaseModel):
-    index: int = 0
-    message: Message
-    finish_reason: str = "stop"
-class ChatCompletionUsage(BaseModel):
-    prompt_tokens: int
-    completion_tokens: int
-    total_tokens: int
 class ChatCompletionResponse(BaseModel):
-    id: str
     object: str = "chat.completion"
-    created: int
     model: str
-    choices: List[ChatCompletionChoice]
-    usage: ChatCompletionUsage
-# DuckAI Parser
-class DuckAIParser:
-    @staticmethod
-    def parse_conversation_history(messages: List[Message]) -> str:
-        """
-        Convert OpenAI message format to DuckAI's expected format with user: and assistant: prefixes
-        """
-        # Skip system messages as they aren't part of the core conversation
-        conversation = []
-        for msg in messages:
-            if msg.role != "system":
-                conversation.append(f"{msg.role}: {msg.content}")
-        return "\n".join(conversation)
-    @staticmethod
-    def estimate_tokens(text: str) -> int:
-        """
-        Estimate token count in text - rough approximation
-        """
-        # Very simple estimation - about 4 characters per token on average
-        return len(text) // 4
-    @staticmethod
-    def stream_response(response_text: str, request_id: str, model: str) -> Generator[str, None, None]:
-        """
-        Stream the response with simulated typing effect
-        """
-        # Split by words to simulate streaming
-        words = re.findall(r'\S+\s*', response_text)
-        accumulated_text = ""
-        chunk_id = 0
-        for word in words:
-            accumulated_text += word
-            # Create the delta message structure (OpenAI compatible)
-            delta_data = {
-                "id": request_id,
-                "object": "chat.completion.chunk",
-                "created": int(time.time()),
-                "model": model,
-                "choices": [
-                    {
-                        "index": 0,
-                        "delta": {"content": word},
-                        "finish_reason": None
-                    }
-                ]
-            }
-            yield f"data: {json.dumps(delta_data)}\n\n"
-            # Sleep for 0.06 seconds between words
-            time.sleep(0.06)
-        # Send the final message with finish_reason
-        end_data = {
-            "id": request_id,
             "object": "chat.completion.chunk",
             "created": int(time.time()),
             "model": model,
             "choices": [
                 {
                     "index": 0,
-                    "delta": {},
-                    "finish_reason": "stop"
                 }
             ]
         }
-        yield f"data: {json.dumps(end_data)}\n\n"
-        yield "data: [DONE]\n\n"
 @app.post("/v1/chat/completions")
-async def create_chat_completion(request: ChatCompletionRequest, response: Response):
     try:
-        # Generate a request ID
-        request_id = f"chatcmpl-duck-{hash(str(request.messages)) % 10000}"
-        # Parse the conversation history into DuckAI format
-        conversation_text = DuckAIParser.parse_conversation_history(request.messages)
-        # Call DuckAI with the formatted conversation
-        duck_ai = DuckAI()
-        result = duck_ai.chat(conversation_text, model=request.model)
-        # Clean up the response
-        assistant_response = result.strip()
-        # Estimate token usage
-        prompt_tokens = DuckAIParser.estimate_tokens(conversation_text)
-        completion_tokens = DuckAIParser.estimate_tokens(assistant_response)
-        # Handle streaming if requested
-        if request.stream:
-            response.headers["Content-Type"] = "text/event-stream"
-            return DuckAIParser.stream_response(
-                assistant_response,
-                request_id,
-                request.model
             )
-        # Regular response (non-streaming)
-        return ChatCompletionResponse(
-            id=request_id,
-            created=int(time.time()),
-            model=request.model,
-            choices=[
-                ChatCompletionChoice(
-                    message=Message(
-                        role="assistant",
-                        content=assistant_response
-                    )
-                )
             ],
-            usage=ChatCompletionUsage(
-                prompt_tokens=prompt_tokens,
-                completion_tokens=completion_tokens,
-                total_tokens=prompt_tokens + completion_tokens
-            )
-        )
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
 @app.get("/v1/models")
 async def list_models():
-    """Return a list of available models"""
-    current_time = int(time.time())
     return {
         "object": "list",
-        "data": [
-            {
-                "id": "gpt-4o-mini",
-                "object": "model",
-                "created": current_time,
-                "owned_by": "DuckAI"
-            },
-            {
-                "id": "llama-3.3-70b",
-                "object": "model",
-                "created": current_time,
-                "owned_by": "DuckAI"
-            },
-            {
-                "id": "claude-3-haiku",
-                "object": "model",
-                "created": current_time,
-                "owned_by": "DuckAI"
-            },
-            {
-                "id": "o3-mini",
-                "object": "model",
-                "created": current_time,
-                "owned_by": "DuckAI"
-            },
-            {
-                "id": "mistral-small-3",
-                "object": "model",
-                "created": current_time,
-                "owned_by": "DuckAI"
-            }
-        ]
     }
-# Health check endpoint
-@app.get("/health")
-async def health_check():
-    return {"status": "ok"}
 if __name__ == "__main__":
     uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)

+import os
 import time
 import json
+import asyncio
+from typing import List, Optional, Dict, Any, Union, Literal
+from pydantic import BaseModel, Field
+from fastapi import FastAPI, HTTPException, Request, Response
+from fastapi.middleware.cors import CORSMiddleware
+from sse_starlette.sse import EventSourceResponse
 from duckai import DuckAI
+# Danh sách các model được hỗ trợ
+SUPPORTED_MODELS = [
+    "gpt-4o-mini",
+    "llama-3.3-70b",
+    "claude-3-haiku",
+    "o3-mini",
+    "mistral-small-3"
+]
+app = FastAPI(title="DuckAI OpenAI Adapter API")
+# Thêm CORS middleware
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# Models cho OpenAI API format
 class Message(BaseModel):
     role: Literal["system", "user", "assistant"]
     content: str
     temperature: Optional[float] = 1.0
     max_tokens: Optional[int] = None
     stream: Optional[bool] = False
 class ChatCompletionResponse(BaseModel):
+    id: str = Field(default_factory=lambda: f"chatcmpl-{os.urandom(12).hex()}")
     object: str = "chat.completion"
+    created: int = Field(default_factory=lambda: int(time.time()))
     model: str
+    choices: List[Dict[str, Any]]
+    usage: Dict[str, int]
+def format_chat_history(messages: List[Message]) -> str:
+    """
+    Chuyển đổi danh sách tin nhắn từ định dạng OpenAI sang định dạng
+    mà DuckAI có thể xử lý (string với các dòng "user: " và "assistant: ")
+    """
+    formatted_history = ""
+    for message in messages:
+        if message.role == "system":
+            # Xử lý tin nhắn system như một tin nhắn user đặc biệt
+            formatted_history += f"user: [SYSTEM] {message.content}\n"
+        else:
+            formatted_history += f"{message.role}: {message.content}\n"
+    return formatted_history.strip()
+def generate_streaming_chunks(text: str, words_per_chunk: int = 2, delay: float = 0.06):
+    """
+    Mô phỏng streaming bằng cách chia văn bản thành các chunk
+    Args:
+        text: Văn bản cần chia
+        words_per_chunk: Số từ trong mỗi chunk
+        delay: Thời gian trễ giữa các chunk (giây)
+    """
+    words = text.split()
+    chunks = []
+    for i in range(0, len(words), words_per_chunk):
+        chunk = ' '.join(words[i:i+words_per_chunk])
+        chunks.append(chunk)
+    return chunks, delay
+async def stream_response(content: str, request: Request, response_id: str, model: str):
+    """
+    Generator để stream phản hồi theo từng chunk nhỏ
+    """
+    chunks, delay = generate_streaming_chunks(content)
+    for i, chunk in enumerate(chunks):
+        if await request.is_disconnected():
+            break
+        # Đợi một khoảng thời gian để tạo hiệu ứng streaming
+        await asyncio.sleep(delay)
+        # Format dữ liệu theo chuẩn SSE của OpenAI
+        data = {
+            "id": response_id,
             "object": "chat.completion.chunk",
             "created": int(time.time()),
             "model": model,
             "choices": [
                 {
                     "index": 0,
+                    "delta": {
+                        "content": chunk + (" " if i < len(chunks) - 1 else "")
+                    },
+                    "finish_reason": None if i < len(chunks) - 1 else "stop"
                 }
             ]
         }
+        yield json.dumps(data)
+    # Thêm [DONE] để đánh dấu kết thúc stream (theo chuẩn OpenAI)
+    yield "[DONE]"
 @app.post("/v1/chat/completions")
+async def create_chat_completion(request: Request, response: Response):
+    """
+    Endpoint tạo chat completion với khả năng streaming
+    """
     try:
+        # Đọc request body
+        body = await request.json()
+        # Tạo request object từ body
+        completion_request = ChatCompletionRequest(**body)
+        # Kiểm tra model
+        if completion_request.model not in SUPPORTED_MODELS:
+            supported_models_str = ", ".join(SUPPORTED_MODELS)
+            raise HTTPException(
+                status_code=400,
+                detail=f"Model '{completion_request.model}' không được hỗ trợ. Các models được hỗ trợ: {supported_models_str}"
+            )
+        # Chuyển đổi danh sách tin nhắn sang định dạng DuckAI
+        chat_history = format_chat_history(completion_request.messages)
+        # Tạo ID phản hồi
+        response_id = f"chatcmpl-{os.urandom(12).hex()}"
+        # Gọi DuckAI API (không hỗ trợ streaming nên chúng ta sẽ lấy toàn bộ phản hồi)
+        duck_response = DuckAI().chat(chat_history, model=completion_request.model)
+        duck_response = duck_response.strip()
+        # Nếu request yêu cầu streaming
+        if completion_request.stream:
+            return EventSourceResponse(
+                stream_response(
+                    duck_response,
+                    request,
+                    response_id,
+                    completion_request.model
+                ),
+                media_type="text/event-stream"
             )
+        # Trả về phản hồi thông thường (không streaming)
+        response_data = {
+            "id": response_id,
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": completion_request.model,
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": duck_response
+                    },
+                    "finish_reason": "stop"
+                }
             ],
+            "usage": {
+                "prompt_tokens": len(chat_history) // 4,  # Ước tính đơn giản
+                "completion_tokens": len(duck_response) // 4,
+                "total_tokens": (len(chat_history) + len(duck_response)) // 4
+            }
+        }
+        return response_data
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
 @app.get("/v1/models")
 async def list_models():
+    """Endpoint để liệt kê các model được hỗ trợ"""
+    models_data = []
+    for model_id in SUPPORTED_MODELS:
+        models_data.append({
+            "id": model_id,
+            "object": "model",
+            "created": int(time.time()),
+            "owned_by": "duckai"
+        })
     return {
         "object": "list",
+        "data": models_data
     }
+@app.get("/")
+async def root():
+    return {
+        "message": "DuckAI OpenAI Adapter API is running. Send requests to /v1/chat/completions",
+        "supported_models": SUPPORTED_MODELS
+    }
 if __name__ == "__main__":
+    import uvicorn
+    # Cấu hình port và host cho Hugging Face Spaces
     uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)