wynai commited on
Commit
ff58f93
·
verified ·
1 Parent(s): 66c4662

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +81 -16
main.py CHANGED
@@ -1,8 +1,12 @@
1
- from fastapi import FastAPI, HTTPException, Request
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel, Field
4
- from typing import List, Optional, Literal, Dict, Any, Union
5
  import uvicorn
 
 
 
 
6
  from duckai import DuckAI
7
 
8
  app = FastAPI(title="DuckAI OpenAI Compatible API")
@@ -69,10 +73,64 @@ class DuckAIParser:
69
  """
70
  # Very simple estimation - about 4 characters per token on average
71
  return len(text) // 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- @app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
74
- async def create_chat_completion(request: ChatCompletionRequest):
75
  try:
 
 
 
76
  # Parse the conversation history into DuckAI format
77
  conversation_text = DuckAIParser.parse_conversation_history(request.messages)
78
 
@@ -80,17 +138,26 @@ async def create_chat_completion(request: ChatCompletionRequest):
80
  duck_ai = DuckAI()
81
  result = duck_ai.chat(conversation_text, model=request.model)
82
 
83
- # Extract the assistant's response
84
  assistant_response = result.strip()
85
 
86
  # Estimate token usage
87
  prompt_tokens = DuckAIParser.estimate_tokens(conversation_text)
88
  completion_tokens = DuckAIParser.estimate_tokens(assistant_response)
89
 
90
- # Create OpenAI-compatible response
91
- response = ChatCompletionResponse(
92
- id=f"chatcmpl-duck-{hash(conversation_text) % 10000}",
93
- created=int(__import__('time').time()),
 
 
 
 
 
 
 
 
 
94
  model=request.model,
95
  choices=[
96
  ChatCompletionChoice(
@@ -106,8 +173,6 @@ async def create_chat_completion(request: ChatCompletionRequest):
106
  total_tokens=prompt_tokens + completion_tokens
107
  )
108
  )
109
-
110
- return response
111
 
112
  except Exception as e:
113
  raise HTTPException(status_code=500, detail=str(e))
@@ -115,7 +180,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
115
  @app.get("/v1/models")
116
  async def list_models():
117
  """Return a list of available models"""
118
- current_time = int(__import__('time').time())
119
  return {
120
  "object": "list",
121
  "data": [
@@ -152,10 +217,10 @@ async def list_models():
152
  ]
153
  }
154
 
155
- # Adding a simple root endpoint for health check
156
- @app.get("/")
157
- async def root():
158
- return {"status": "ok", "message": "DuckAI OpenAI Compatible API is running"}
159
 
160
  if __name__ == "__main__":
161
  uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)
 
1
+ from fastapi import FastAPI, HTTPException, Request, Response
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel, Field
4
+ from typing import List, Optional, Literal, Dict, Any, Union, Generator
5
  import uvicorn
6
+ import time
7
+ import asyncio
8
+ import json
9
+ import re
10
  from duckai import DuckAI
11
 
12
  app = FastAPI(title="DuckAI OpenAI Compatible API")
 
73
  """
74
  # Very simple estimation - about 4 characters per token on average
75
  return len(text) // 4
76
+
77
+ @staticmethod
78
+ def stream_response(response_text: str, request_id: str, model: str) -> Generator[str, None, None]:
79
+ """
80
+ Stream the response with simulated typing effect
81
+ """
82
+ # Split by words to simulate streaming
83
+ words = re.findall(r'\S+\s*', response_text)
84
+ accumulated_text = ""
85
+ chunk_id = 0
86
+
87
+ for word in words:
88
+ accumulated_text += word
89
+
90
+ # Create the delta message structure (OpenAI compatible)
91
+ delta_data = {
92
+ "id": request_id,
93
+ "object": "chat.completion.chunk",
94
+ "created": int(time.time()),
95
+ "model": model,
96
+ "choices": [
97
+ {
98
+ "index": 0,
99
+ "delta": {"content": word},
100
+ "finish_reason": None
101
+ }
102
+ ]
103
+ }
104
+
105
+ yield f"data: {json.dumps(delta_data)}\n\n"
106
+
107
+ # Sleep for 0.06 seconds between words
108
+ time.sleep(0.06)
109
+
110
+ # Send the final message with finish_reason
111
+ end_data = {
112
+ "id": request_id,
113
+ "object": "chat.completion.chunk",
114
+ "created": int(time.time()),
115
+ "model": model,
116
+ "choices": [
117
+ {
118
+ "index": 0,
119
+ "delta": {},
120
+ "finish_reason": "stop"
121
+ }
122
+ ]
123
+ }
124
+
125
+ yield f"data: {json.dumps(end_data)}\n\n"
126
+ yield "data: [DONE]\n\n"
127
 
128
+ @app.post("/v1/chat/completions")
129
+ async def create_chat_completion(request: ChatCompletionRequest, response: Response):
130
  try:
131
+ # Generate a request ID
132
+ request_id = f"chatcmpl-duck-{hash(str(request.messages)) % 10000}"
133
+
134
  # Parse the conversation history into DuckAI format
135
  conversation_text = DuckAIParser.parse_conversation_history(request.messages)
136
 
 
138
  duck_ai = DuckAI()
139
  result = duck_ai.chat(conversation_text, model=request.model)
140
 
141
+ # Clean up the response
142
  assistant_response = result.strip()
143
 
144
  # Estimate token usage
145
  prompt_tokens = DuckAIParser.estimate_tokens(conversation_text)
146
  completion_tokens = DuckAIParser.estimate_tokens(assistant_response)
147
 
148
+ # Handle streaming if requested
149
+ if request.stream:
150
+ response.headers["Content-Type"] = "text/event-stream"
151
+ return DuckAIParser.stream_response(
152
+ assistant_response,
153
+ request_id,
154
+ request.model
155
+ )
156
+
157
+ # Regular response (non-streaming)
158
+ return ChatCompletionResponse(
159
+ id=request_id,
160
+ created=int(time.time()),
161
  model=request.model,
162
  choices=[
163
  ChatCompletionChoice(
 
173
  total_tokens=prompt_tokens + completion_tokens
174
  )
175
  )
 
 
176
 
177
  except Exception as e:
178
  raise HTTPException(status_code=500, detail=str(e))
 
180
  @app.get("/v1/models")
181
  async def list_models():
182
  """Return a list of available models"""
183
+ current_time = int(time.time())
184
  return {
185
  "object": "list",
186
  "data": [
 
217
  ]
218
  }
219
 
220
+ # Health check endpoint
221
+ @app.get("/health")
222
+ async def health_check():
223
+ return {"status": "ok"}
224
 
225
  if __name__ == "__main__":
226
  uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)