ds

Sleeping

App Files Files Community

yangtb24 commited on Jan 20

Commit

0f85a86

verified ·

1 Parent(s): 8d0c64e

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -4

app.py CHANGED Viewed

@@ -463,6 +463,7 @@ def handsome_chat_completions():
             def generate():
                 first_chunk_time = None
                 full_response_content = ""
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
@@ -496,14 +497,20 @@ def handsome_chat_completions():
                                     "usage"
                                 ]["completion_tokens"]
-                            # Special handling for deepseek-reasoner in streaming mode
                             if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
                                 delta = response_json["choices"][0].get("delta", {})
                                 if "reasoning_content" in delta and delta["reasoning_content"]:
-                                    reasoning_lines = delta["reasoning_content"].splitlines()
-                                    formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
-                                    response_content += formatted_reasoning + "\n"
                                 if "content" in delta and delta["content"]:
                                     response_content += delta["content"]
                             elif "choices" in response_json and len(response_json["choices"]) > 0:
                                 delta = response_json["choices"][0].get("delta", {})
@@ -527,6 +534,13 @@ def handsome_chat_completions():
                                 f"解析流式响应单行 JSON 失败: {e}, "
                                 f"行内容: {line}"
                             )
                 user_content = ""
                 messages = data.get("messages", [])
@@ -574,6 +588,7 @@ def handsome_chat_completions():
                 content_type=response.headers['Content-Type']
             )
         else:
             response.raise_for_status()
             end_time = time.time()
             response_json = response.json()

             def generate():
                 first_chunk_time = None
                 full_response_content = ""
+                pending_reasoning_lines = []  # Store incomplete reasoning lines
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                                     "usage"
                                 ]["completion_tokens"]
+                            # Improved special handling for deepseek-reasoner in streaming mode
                             if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
                                 delta = response_json["choices"][0].get("delta", {})
                                 if "reasoning_content" in delta and delta["reasoning_content"]:
+                                    pending_reasoning_lines.extend(delta["reasoning_content"].splitlines(keepends=True))
                                 if "content" in delta and delta["content"]:
+                                    # Process any pending reasoning lines before the content
+                                    if pending_reasoning_lines:
+                                        for reasoning_line in pending_reasoning_lines:
+                                            if reasoning_line.endswith("\n"):
+                                                response_content += f"> {reasoning_line}"
+                                            else:
+                                                response_content += f"> {reasoning_line}\n"
+                                        pending_reasoning_lines = []  # Clear pending lines
                                     response_content += delta["content"]
                             elif "choices" in response_json and len(response_json["choices"]) > 0:
                                 delta = response_json["choices"][0].get("delta", {})
                                 f"解析流式响应单行 JSON 失败: {e}, "
                                 f"行内容: {line}"
                             )
+                # Process any remaining reasoning lines after all chunks are received
+                if pending_reasoning_lines:
+                    for reasoning_line in pending_reasoning_lines:
+                        response_content += f"> {reasoning_line}"
+                    if not response_content.endswith("\n"):
+                        response_content += "\n"
                 user_content = ""
                 messages = data.get("messages", [])
                 content_type=response.headers['Content-Type']
             )
         else:
+            # ... (rest of the code for non-streaming mode remains the same)
             response.raise_for_status()
             end_time = time.time()
             response_json = response.json()