duckai

Running

App Files Files Community

wynai commited on May 13

Commit

27f2594

verified ·

1 Parent(s): 04e4a62

Update main.py

Browse files

Files changed (1) hide show

main.py +75 -30

main.py CHANGED Viewed

@@ -65,38 +65,68 @@ def format_chat_history(messages: List[Message]) -> str:
     return formatted_history.strip()
-def generate_streaming_chunks(text: str, words_per_chunk: int = 2, delay: float = 0.06):
     """
-    Mô phỏng streaming bằng cách chia văn bản thành các chunk
-    Args:
-        text: Văn bản cần chia
-        words_per_chunk: Số từ trong mỗi chunk
-        delay: Thời gian trễ giữa các chunk (giây)
     """
-    words = text.split()
-    chunks = []
-    for i in range(0, len(words), words_per_chunk):
-        chunk = ' '.join(words[i:i+words_per_chunk])
-        chunks.append(chunk)
-    return chunks, delay
-async def stream_response(content: str, request: Request, response_id: str, model: str):
-    """
-    Generator để stream phản hồi theo từng chunk nhỏ
-    """
-    chunks, delay = generate_streaming_chunks(content)
-    for i, chunk in enumerate(chunks):
         if await request.is_disconnected():
             break
-        # Đợi một khoảng thời gian để tạo hiệu ứng streaming
-        await asyncio.sleep(delay)
-        # Format dữ liệu theo chuẩn SSE của OpenAI
         data = {
             "id": response_id,
             "object": "chat.completion.chunk",
@@ -106,15 +136,30 @@ async def stream_response(content: str, request: Request, response_id: str, mode
                 {
                     "index": 0,
                     "delta": {
-                        "content": chunk + (" " if i < len(chunks) - 1 else "")
                     },
-                    "finish_reason": None if i < len(chunks) - 1 else "stop"
                 }
             ]
         }
         yield json.dumps(data)
     # Thêm [DONE] để đánh dấu kết thúc stream (theo chuẩn OpenAI)
     yield "[DONE]"
@@ -151,7 +196,7 @@ async def create_chat_completion(request: Request, response: Response):
         # Nếu request yêu cầu streaming
         if completion_request.stream:
             return EventSourceResponse(
-                stream_response(
                     duck_response,
                     request,
                     response_id,

     return formatted_history.strip()
+async def stream_response_character_by_character(content: str, request: Request, response_id: str, model: str):
     """
+    Generator để stream phản hồi theo từng ký tự với tốc độ phù hợp
     """
+    if await request.is_disconnected():
+        return
+    # Gửi tin nhắn đầu tiên với role và content rỗng
+    data = {
+        "id": response_id,
+        "object": "chat.completion.chunk",
+        "created": int(time.time()),
+        "model": model,
+        "choices": [
+            {
+                "index": 0,
+                "delta": {
+                    "role": "assistant",
+                },
+                "finish_reason": None
+            }
+        ]
+    }
+    yield json.dumps(data)
+    await asyncio.sleep(0.01)  # Đợi một chút trước khi bắt đầu nội dung
+    # Stream nội dung theo từng ký tự
+    buffer = ""
+    for char in content:
         if await request.is_disconnected():
             break
+        buffer += char
+        # Tích lũy ký tự trong buffer và gửi theo các đơn vị có ý nghĩa
+        # (giúp tránh gửi quá nhiều sự kiện nhỏ và đảm bảo hiển thị tốt hơn)
+        if len(buffer) >= 3 or char in [' ', '\n', '.', '!', '?', ',']:
+            data = {
+                "id": response_id,
+                "object": "chat.completion.chunk",
+                "created": int(time.time()),
+                "model": model,
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": {
+                            "content": buffer
+                        },
+                        "finish_reason": None
+                    }
+                ]
+            }
+            yield json.dumps(data)
+            buffer = ""
+            # Điều chỉnh thời gian delay để có tốc độ stream hợp lý
+            # Ký tự xuống dòng sẽ có thời gian delay lâu hơn một chút
+            delay = 0.05 if char == '\n' else 0.01
+            await asyncio.sleep(delay)
+    # Gửi nốt buffer nếu còn
+    if buffer:
         data = {
             "id": response_id,
             "object": "chat.completion.chunk",
                 {
                     "index": 0,
                     "delta": {
+                        "content": buffer
                     },
+                    "finish_reason": None
                 }
             ]
         }
         yield json.dumps(data)
+    # Gửi message cuối cùng để đánh dấu kết thúc
+    data = {
+        "id": response_id,
+        "object": "chat.completion.chunk",
+        "created": int(time.time()),
+        "model": model,
+        "choices": [
+            {
+                "index": 0,
+                "delta": {},
+                "finish_reason": "stop"
+            }
+        ]
+    }
+    yield json.dumps(data)
     # Thêm [DONE] để đánh dấu kết thúc stream (theo chuẩn OpenAI)
     yield "[DONE]"
         # Nếu request yêu cầu streaming
         if completion_request.stream:
             return EventSourceResponse(
+                stream_response_character_by_character(
                     duck_response,
                     request,
                     response_id,