ds

Sleeping

App Files Files Community

yangtb24 commited on Jan 20

Commit

b0f9287

verified ·

1 Parent(s): f5fbdd1

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -54

app.py CHANGED Viewed

@@ -431,61 +431,15 @@ def handsome_chat_completions():
             def generate():
                 first_chunk_time = None
                 full_response_content = ""
-                reasoning_content_buffer = ""
-                content_buffer = ""
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
-                        for line in chunk.decode("utf-8").splitlines():
-                            if line.startswith("data:"):
-                                line = line[5:].strip()
-                                if line == "[DONE]":
-                                    continue
-                                try:
-                                    response_json = json.loads(line)
-                                    if (
-                                        "usage" in response_json and
-                                        "completion_tokens" in response_json["usage"]
-                                    ):
-                                        completion_tokens = response_json[
-                                            "usage"
-                                        ]["completion_tokens"]
-                                    if "choices" in response_json and len(response_json["choices"]) > 0:
-                                        delta = response_json["choices"][0].get("delta", {})
-                                        if "reasoning_content" in delta and delta["reasoning_content"] is not None:
-                                            reasoning_content_buffer += delta["reasoning_content"]
-                                        if "content" in delta and delta["content"] is not None:
-                                            content_buffer += delta["content"]
-                                    if (
-                                        "usage" in response_json and
-                                        "prompt_tokens" in response_json["usage"]
-                                    ):
-                                        prompt_tokens = response_json[
-                                            "usage"
-                                        ]["prompt_tokens"]
-                                except (
-                                    KeyError,
-                                    ValueError,
-                                    IndexError
-                                ) as e:
-                                    logging.error(
-                                        f"解析流式响应单行 JSON 失败: {e}, "
-                                        f"行内容: {line}"
-                                    )
-                        # Format and yield the accumulated content
-                        formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_content_buffer.splitlines())
-                        combined_content = formatted_reasoning + "\n" + content_buffer
-                        yield combined_content.encode("utf-8")
-                        reasoning_content_buffer = ""
-                        content_buffer = ""
                 end_time = time.time()
                 first_token_time = (
@@ -496,7 +450,6 @@ def handsome_chat_completions():
                 prompt_tokens = 0
                 completion_tokens = 0
                 for line in full_response_content.splitlines():
                     if line.startswith("data:"):
                         line = line[5:].strip()
@@ -504,13 +457,28 @@ def handsome_chat_completions():
                             continue
                         try:
                             response_json = json.loads(line)
                             if (
                                 "usage" in response_json and
                                 "completion_tokens" in response_json["usage"]
                             ):
-                                completion_tokens = response_json[
                                     "usage"
                                 ]["completion_tokens"]
                             if (
                                 "usage" in response_json and
                                 "prompt_tokens" in response_json["usage"]
@@ -518,6 +486,7 @@ def handsome_chat_completions():
                                 prompt_tokens = response_json[
                                     "usage"
                                 ]["prompt_tokens"]
                         except (
                             KeyError,
                             ValueError,
@@ -527,7 +496,15 @@ def handsome_chat_completions():
                                 f"解析流式响应单行 JSON 失败: {e}, "
                                 f"行内容: {line}"
                             )
                 user_content = ""
                 messages = data.get("messages", [])
                 for message in messages:
@@ -550,7 +527,7 @@ def handsome_chat_completions():
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
-                response_content_replaced = (formatted_reasoning + "\n" + content_buffer).replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
@@ -574,6 +551,7 @@ def handsome_chat_completions():
                 content_type=response.headers['Content-Type']
             )
         else:
             response.raise_for_status()
             end_time = time.time()
             response_json = response.json()
@@ -675,6 +653,7 @@ def handsome_chat_completions():
         logging.error(f"请求转发异常: {e}")
         return jsonify({"error": str(e)}), 500
 if __name__ == '__main__':
     logging.info(f"环境变量：{os.environ}")

             def generate():
                 first_chunk_time = None
                 full_response_content = ""
+                reasoning_content_accumulated = ""  # Accumulate reasoning content
+                content_accumulated = ""  # Accumulate regular content
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
+                        yield chunk
                 end_time = time.time()
                 first_token_time = (
                 prompt_tokens = 0
                 completion_tokens = 0
                 for line in full_response_content.splitlines():
                     if line.startswith("data:"):
                         line = line[5:].strip()
                             continue
                         try:
                             response_json = json.loads(line)
                             if (
                                 "usage" in response_json and
                                 "completion_tokens" in response_json["usage"]
                             ):
+                                completion_tokens += response_json[
                                     "usage"
                                 ]["completion_tokens"]
+                            # Special handling for deepseek-reasoner in streaming mode
+                            if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
+                                delta = response_json["choices"][0].get("delta", {})
+                                if "reasoning_content" in delta:
+                                    reasoning_content_accumulated += delta["reasoning_content"]
+                                if "content" in delta:
+                                    content_accumulated += delta["content"]
+                            elif "choices" in response_json and len(response_json["choices"]) > 0:
+                                # Handle other models normally
+                                delta = response_json["choices"][0].get("delta", {})
+                                if "content" in delta:
+                                    content_accumulated += delta["content"]
                             if (
                                 "usage" in response_json and
                                 "prompt_tokens" in response_json["usage"]
                                 prompt_tokens = response_json[
                                     "usage"
                                 ]["prompt_tokens"]
                         except (
                             KeyError,
                             ValueError,
                                 f"解析流式响应单行 JSON 失败: {e}, "
                                 f"行内容: {line}"
                             )
+                # Format the accumulated reasoning content after processing all chunks
+                if model_name == "deepseek-reasoner":
+                    reasoning_lines = reasoning_content_accumulated.splitlines()
+                    formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
+                    response_content = formatted_reasoning + "\n" + content_accumulated
+                else:
+                    response_content = content_accumulated
                 user_content = ""
                 messages = data.get("messages", [])
                 for message in messages:
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
+                response_content_replaced = response_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
                 content_type=response.headers['Content-Type']
             )
         else:
+            # ... (Non-streaming part remains the same as in the previous response)
             response.raise_for_status()
             end_time = time.time()
             response_json = response.json()
         logging.error(f"请求转发异常: {e}")
         return jsonify({"error": str(e)}), 500
 if __name__ == '__main__':
     logging.info(f"环境变量：{os.environ}")