ds

Sleeping

App Files Files Community

yangtb24 commited on Jan 20

Commit

95146e7

verified ·

1 Parent(s): b0f9287

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -31

app.py CHANGED Viewed

@@ -431,15 +431,51 @@ def handsome_chat_completions():
             def generate():
                 first_chunk_time = None
                 full_response_content = ""
-                reasoning_content_accumulated = ""  # Accumulate reasoning content
-                content_accumulated = ""  # Accumulate regular content
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
-                        yield chunk
                 end_time = time.time()
                 first_token_time = (
@@ -466,19 +502,6 @@ def handsome_chat_completions():
                                     "usage"
                                 ]["completion_tokens"]
-                            # Special handling for deepseek-reasoner in streaming mode
-                            if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
-                                delta = response_json["choices"][0].get("delta", {})
-                                if "reasoning_content" in delta:
-                                    reasoning_content_accumulated += delta["reasoning_content"]
-                                if "content" in delta:
-                                    content_accumulated += delta["content"]
-                            elif "choices" in response_json and len(response_json["choices"]) > 0:
-                                # Handle other models normally
-                                delta = response_json["choices"][0].get("delta", {})
-                                if "content" in delta:
-                                    content_accumulated += delta["content"]
                             if (
                                 "usage" in response_json and
                                 "prompt_tokens" in response_json["usage"]
@@ -497,14 +520,6 @@ def handsome_chat_completions():
                                 f"行内容: {line}"
                             )
-                # Format the accumulated reasoning content after processing all chunks
-                if model_name == "deepseek-reasoner":
-                    reasoning_lines = reasoning_content_accumulated.splitlines()
-                    formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
-                    response_content = formatted_reasoning + "\n" + content_accumulated
-                else:
-                    response_content = content_accumulated
                 user_content = ""
                 messages = data.get("messages", [])
                 for message in messages:
@@ -527,9 +542,6 @@ def handsome_chat_completions():
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
-                response_content_replaced = response_content.replace(
-                    '\n', '\\n'
-                ).replace('\r', '\\n')
                 logging.info(
                     f"使用的key: {api_key}, "
@@ -538,8 +550,7 @@ def handsome_chat_completions():
                     f"首字用时: {first_token_time:.4f}秒, "
                     f"总共用时: {total_time:.4f}秒, "
                     f"使用的模型: {model_name}, "
-                    f"用户的内容: {user_content_replaced}, "
-                    f"输出的内容: {response_content_replaced}"
                 )
                 with data_lock:
@@ -551,7 +562,6 @@ def handsome_chat_completions():
                 content_type=response.headers['Content-Type']
             )
         else:
-            # ... (Non-streaming part remains the same as in the previous response)
             response.raise_for_status()
             end_time = time.time()
             response_json = response.json()
@@ -653,7 +663,6 @@ def handsome_chat_completions():
         logging.error(f"请求转发异常: {e}")
         return jsonify({"error": str(e)}), 500
 if __name__ == '__main__':
     logging.info(f"环境变量：{os.environ}")

             def generate():
                 first_chunk_time = None
                 full_response_content = ""
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
+                        chunk_data_list = chunk.decode("utf-8").split("\n\n")
+                        for chunk_data in chunk_data_list:
+                            if not chunk_data:
+                                continue
+                            if chunk_data.startswith("data:"):
+                                chunk_data = chunk_data[5:].strip()
+                                if chunk_data == "[DONE]":
+                                    continue
+                                try:
+                                    response_json = json.loads(chunk_data)
+                                    if (
+                                        model_name == "deepseek-reasoner" and
+                                        "choices" in response_json and
+                                        len(response_json["choices"]) > 0
+                                    ):
+                                        delta = response_json["choices"][0].get("delta", {})
+                                        new_content = ""
+                                        if "reasoning_content" in delta:
+                                            new_content += "> " + delta["reasoning_content"]
+                                        if "content" in delta:
+                                            new_content += delta["content"]
+                                        if new_content:
+                                            response_json["choices"][0]["delta"] = {"content": new_content}
+                                            yield f"data: {json.dumps(response_json)}\n\n".encode("utf-8")
+                                    else:
+                                        yield f"data: {chunk_data}\n\n".encode("utf-8")
+                                except (
+                                    KeyError,
+                                    ValueError,
+                                    IndexError
+                                ) as e:
+                                    logging.error(
+                                        f"解析流式响应单行 JSON 失败: {e}, "
+                                        f"行内容: {chunk_data}"
+                                    )
                 end_time = time.time()
                 first_token_time = (
                                     "usage"
                                 ]["completion_tokens"]
                             if (
                                 "usage" in response_json and
                                 "prompt_tokens" in response_json["usage"]
                                 f"行内容: {line}"
                             )
                 user_content = ""
                 messages = data.get("messages", [])
                 for message in messages:
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
                 logging.info(
                     f"使用的key: {api_key}, "
                     f"首字用时: {first_token_time:.4f}秒, "
                     f"总共用时: {total_time:.4f}秒, "
                     f"使用的模型: {model_name}, "
+                    f"用户的内容: {user_content_replaced}"
                 )
                 with data_lock:
                 content_type=response.headers['Content-Type']
             )
         else:
             response.raise_for_status()
             end_time = time.time()
             response_json = response.json()
         logging.error(f"请求转发异常: {e}")
         return jsonify({"error": str(e)}), 500
 if __name__ == '__main__':
     logging.info(f"环境变量：{os.environ}")