ds

Running

App Files Files Community

yangtb24 commited on Jan 20

Commit

f5fbdd1

verified ·

1 Parent(s): 26000f8

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -27

app.py CHANGED Viewed

@@ -431,13 +431,61 @@ def handsome_chat_completions():
             def generate():
                 first_chunk_time = None
                 full_response_content = ""
-                accumulated_reasoning = ""  # Accumulate reasoning content
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
-                        yield chunk
                 end_time = time.time()
                 first_token_time = (
@@ -448,7 +496,7 @@ def handsome_chat_completions():
                 prompt_tokens = 0
                 completion_tokens = 0
-                response_content = ""
                 for line in full_response_content.splitlines():
                     if line.startswith("data:"):
                         line = line[5:].strip()
@@ -456,7 +504,6 @@ def handsome_chat_completions():
                             continue
                         try:
                             response_json = json.loads(line)
                             if (
                                 "usage" in response_json and
                                 "completion_tokens" in response_json["usage"]
@@ -464,25 +511,6 @@ def handsome_chat_completions():
                                 completion_tokens = response_json[
                                     "usage"
                                 ]["completion_tokens"]
-                            # Improved handling for deepseek-reasoner in streaming mode
-                            if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
-                                delta = response_json["choices"][0].get("delta", {})
-                                if "reasoning_content" in delta:
-                                    accumulated_reasoning += delta["reasoning_content"]
-                                if "content" in delta and delta["content"]:
-                                    # Prepend accumulated reasoning before content
-                                    if accumulated_reasoning:
-                                        reasoning_lines = accumulated_reasoning.splitlines()
-                                        formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
-                                        response_content += formatted_reasoning + "\n"
-                                        accumulated_reasoning = ""  # Reset
-                                    response_content += delta["content"]
-                            elif "choices" in response_json and len(response_json["choices"]) > 0:
-                                delta = response_json["choices"][0].get("delta", {})
-                                if "content" in delta and delta["content"]:
-                                    response_content += delta["content"]
                             if (
                                 "usage" in response_json and
                                 "prompt_tokens" in response_json["usage"]
@@ -490,7 +518,6 @@ def handsome_chat_completions():
                                 prompt_tokens = response_json[
                                     "usage"
                                 ]["prompt_tokens"]
                         except (
                             KeyError,
                             ValueError,
@@ -500,7 +527,7 @@ def handsome_chat_completions():
                                 f"解析流式响应单行 JSON 失败: {e}, "
                                 f"行内容: {line}"
                             )
                 user_content = ""
                 messages = data.get("messages", [])
                 for message in messages:
@@ -523,7 +550,7 @@ def handsome_chat_completions():
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
-                response_content_replaced = response_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
@@ -547,7 +574,6 @@ def handsome_chat_completions():
                 content_type=response.headers['Content-Type']
             )
         else:
-            # ... (rest of the code for non-streaming mode remains the same)
             response.raise_for_status()
             end_time = time.time()
             response_json = response.json()

             def generate():
                 first_chunk_time = None
                 full_response_content = ""
+                reasoning_content_buffer = ""
+                content_buffer = ""
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
+                        for line in chunk.decode("utf-8").splitlines():
+                            if line.startswith("data:"):
+                                line = line[5:].strip()
+                                if line == "[DONE]":
+                                    continue
+                                try:
+                                    response_json = json.loads(line)
+                                    if (
+                                        "usage" in response_json and
+                                        "completion_tokens" in response_json["usage"]
+                                    ):
+                                        completion_tokens = response_json[
+                                            "usage"
+                                        ]["completion_tokens"]
+                                    if "choices" in response_json and len(response_json["choices"]) > 0:
+                                        delta = response_json["choices"][0].get("delta", {})
+                                        if "reasoning_content" in delta and delta["reasoning_content"] is not None:
+                                            reasoning_content_buffer += delta["reasoning_content"]
+                                        if "content" in delta and delta["content"] is not None:
+                                            content_buffer += delta["content"]
+                                    if (
+                                        "usage" in response_json and
+                                        "prompt_tokens" in response_json["usage"]
+                                    ):
+                                        prompt_tokens = response_json[
+                                            "usage"
+                                        ]["prompt_tokens"]
+                                except (
+                                    KeyError,
+                                    ValueError,
+                                    IndexError
+                                ) as e:
+                                    logging.error(
+                                        f"解析流式响应单行 JSON 失败: {e}, "
+                                        f"行内容: {line}"
+                                    )
+                        # Format and yield the accumulated content
+                        formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_content_buffer.splitlines())
+                        combined_content = formatted_reasoning + "\n" + content_buffer
+                        yield combined_content.encode("utf-8")
+                        reasoning_content_buffer = ""
+                        content_buffer = ""
                 end_time = time.time()
                 first_token_time = (
                 prompt_tokens = 0
                 completion_tokens = 0
                 for line in full_response_content.splitlines():
                     if line.startswith("data:"):
                         line = line[5:].strip()
                             continue
                         try:
                             response_json = json.loads(line)
                             if (
                                 "usage" in response_json and
                                 "completion_tokens" in response_json["usage"]
                                 completion_tokens = response_json[
                                     "usage"
                                 ]["completion_tokens"]
                             if (
                                 "usage" in response_json and
                                 "prompt_tokens" in response_json["usage"]
                                 prompt_tokens = response_json[
                                     "usage"
                                 ]["prompt_tokens"]
                         except (
                             KeyError,
                             ValueError,
                                 f"解析流式响应单行 JSON 失败: {e}, "
                                 f"行内容: {line}"
                             )
                 user_content = ""
                 messages = data.get("messages", [])
                 for message in messages:
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
+                response_content_replaced = (formatted_reasoning + "\n" + content_buffer).replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
                 content_type=response.headers['Content-Type']
             )
         else:
             response.raise_for_status()
             end_time = time.time()
             response_json = response.json()