Update app.py
Browse files
app.py
CHANGED
@@ -431,15 +431,51 @@ def handsome_chat_completions():
|
|
431 |
def generate():
|
432 |
first_chunk_time = None
|
433 |
full_response_content = ""
|
434 |
-
reasoning_content_accumulated = "" # Accumulate reasoning content
|
435 |
-
content_accumulated = "" # Accumulate regular content
|
436 |
|
437 |
for chunk in response.iter_content(chunk_size=1024):
|
438 |
if chunk:
|
439 |
if first_chunk_time is None:
|
440 |
first_chunk_time = time.time()
|
441 |
full_response_content += chunk.decode("utf-8")
|
442 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
443 |
|
444 |
end_time = time.time()
|
445 |
first_token_time = (
|
@@ -466,19 +502,6 @@ def handsome_chat_completions():
|
|
466 |
"usage"
|
467 |
]["completion_tokens"]
|
468 |
|
469 |
-
# Special handling for deepseek-reasoner in streaming mode
|
470 |
-
if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
|
471 |
-
delta = response_json["choices"][0].get("delta", {})
|
472 |
-
if "reasoning_content" in delta:
|
473 |
-
reasoning_content_accumulated += delta["reasoning_content"]
|
474 |
-
if "content" in delta:
|
475 |
-
content_accumulated += delta["content"]
|
476 |
-
elif "choices" in response_json and len(response_json["choices"]) > 0:
|
477 |
-
# Handle other models normally
|
478 |
-
delta = response_json["choices"][0].get("delta", {})
|
479 |
-
if "content" in delta:
|
480 |
-
content_accumulated += delta["content"]
|
481 |
-
|
482 |
if (
|
483 |
"usage" in response_json and
|
484 |
"prompt_tokens" in response_json["usage"]
|
@@ -497,14 +520,6 @@ def handsome_chat_completions():
|
|
497 |
f"行内容: {line}"
|
498 |
)
|
499 |
|
500 |
-
# Format the accumulated reasoning content after processing all chunks
|
501 |
-
if model_name == "deepseek-reasoner":
|
502 |
-
reasoning_lines = reasoning_content_accumulated.splitlines()
|
503 |
-
formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
|
504 |
-
response_content = formatted_reasoning + "\n" + content_accumulated
|
505 |
-
else:
|
506 |
-
response_content = content_accumulated
|
507 |
-
|
508 |
user_content = ""
|
509 |
messages = data.get("messages", [])
|
510 |
for message in messages:
|
@@ -527,9 +542,6 @@ def handsome_chat_completions():
|
|
527 |
user_content_replaced = user_content.replace(
|
528 |
'\n', '\\n'
|
529 |
).replace('\r', '\\n')
|
530 |
-
response_content_replaced = response_content.replace(
|
531 |
-
'\n', '\\n'
|
532 |
-
).replace('\r', '\\n')
|
533 |
|
534 |
logging.info(
|
535 |
f"使用的key: {api_key}, "
|
@@ -538,8 +550,7 @@ def handsome_chat_completions():
|
|
538 |
f"首字用时: {first_token_time:.4f}秒, "
|
539 |
f"总共用时: {total_time:.4f}秒, "
|
540 |
f"使用的模型: {model_name}, "
|
541 |
-
f"用户的内容: {user_content_replaced}
|
542 |
-
f"输出的内容: {response_content_replaced}"
|
543 |
)
|
544 |
|
545 |
with data_lock:
|
@@ -551,7 +562,6 @@ def handsome_chat_completions():
|
|
551 |
content_type=response.headers['Content-Type']
|
552 |
)
|
553 |
else:
|
554 |
-
# ... (Non-streaming part remains the same as in the previous response)
|
555 |
response.raise_for_status()
|
556 |
end_time = time.time()
|
557 |
response_json = response.json()
|
@@ -653,7 +663,6 @@ def handsome_chat_completions():
|
|
653 |
logging.error(f"请求转发异常: {e}")
|
654 |
return jsonify({"error": str(e)}), 500
|
655 |
|
656 |
-
|
657 |
if __name__ == '__main__':
|
658 |
logging.info(f"环境变量:{os.environ}")
|
659 |
|
|
|
431 |
def generate():
|
432 |
first_chunk_time = None
|
433 |
full_response_content = ""
|
|
|
|
|
434 |
|
435 |
for chunk in response.iter_content(chunk_size=1024):
|
436 |
if chunk:
|
437 |
if first_chunk_time is None:
|
438 |
first_chunk_time = time.time()
|
439 |
full_response_content += chunk.decode("utf-8")
|
440 |
+
|
441 |
+
chunk_data_list = chunk.decode("utf-8").split("\n\n")
|
442 |
+
|
443 |
+
for chunk_data in chunk_data_list:
|
444 |
+
if not chunk_data:
|
445 |
+
continue
|
446 |
+
if chunk_data.startswith("data:"):
|
447 |
+
chunk_data = chunk_data[5:].strip()
|
448 |
+
if chunk_data == "[DONE]":
|
449 |
+
continue
|
450 |
+
try:
|
451 |
+
response_json = json.loads(chunk_data)
|
452 |
+
if (
|
453 |
+
model_name == "deepseek-reasoner" and
|
454 |
+
"choices" in response_json and
|
455 |
+
len(response_json["choices"]) > 0
|
456 |
+
):
|
457 |
+
delta = response_json["choices"][0].get("delta", {})
|
458 |
+
new_content = ""
|
459 |
+
if "reasoning_content" in delta:
|
460 |
+
new_content += "> " + delta["reasoning_content"]
|
461 |
+
if "content" in delta:
|
462 |
+
new_content += delta["content"]
|
463 |
+
|
464 |
+
if new_content:
|
465 |
+
response_json["choices"][0]["delta"] = {"content": new_content}
|
466 |
+
yield f"data: {json.dumps(response_json)}\n\n".encode("utf-8")
|
467 |
+
else:
|
468 |
+
yield f"data: {chunk_data}\n\n".encode("utf-8")
|
469 |
+
|
470 |
+
except (
|
471 |
+
KeyError,
|
472 |
+
ValueError,
|
473 |
+
IndexError
|
474 |
+
) as e:
|
475 |
+
logging.error(
|
476 |
+
f"解析流式响应单行 JSON 失败: {e}, "
|
477 |
+
f"行内容: {chunk_data}"
|
478 |
+
)
|
479 |
|
480 |
end_time = time.time()
|
481 |
first_token_time = (
|
|
|
502 |
"usage"
|
503 |
]["completion_tokens"]
|
504 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
505 |
if (
|
506 |
"usage" in response_json and
|
507 |
"prompt_tokens" in response_json["usage"]
|
|
|
520 |
f"行内容: {line}"
|
521 |
)
|
522 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
523 |
user_content = ""
|
524 |
messages = data.get("messages", [])
|
525 |
for message in messages:
|
|
|
542 |
user_content_replaced = user_content.replace(
|
543 |
'\n', '\\n'
|
544 |
).replace('\r', '\\n')
|
|
|
|
|
|
|
545 |
|
546 |
logging.info(
|
547 |
f"使用的key: {api_key}, "
|
|
|
550 |
f"首字用时: {first_token_time:.4f}秒, "
|
551 |
f"总共用时: {total_time:.4f}秒, "
|
552 |
f"使用的模型: {model_name}, "
|
553 |
+
f"用户的内容: {user_content_replaced}"
|
|
|
554 |
)
|
555 |
|
556 |
with data_lock:
|
|
|
562 |
content_type=response.headers['Content-Type']
|
563 |
)
|
564 |
else:
|
|
|
565 |
response.raise_for_status()
|
566 |
end_time = time.time()
|
567 |
response_json = response.json()
|
|
|
663 |
logging.error(f"请求转发异常: {e}")
|
664 |
return jsonify({"error": str(e)}), 500
|
665 |
|
|
|
666 |
if __name__ == '__main__':
|
667 |
logging.info(f"环境变量:{os.environ}")
|
668 |
|