Update app.py
Browse files
app.py
CHANGED
|
@@ -421,7 +421,7 @@ def handsome_chat_completions():
|
|
| 421 |
return jsonify({"error": "Invalid request data"}), 400
|
| 422 |
|
| 423 |
model_name = data['model']
|
| 424 |
-
|
| 425 |
api_key = select_key(model_name)
|
| 426 |
|
| 427 |
if not api_key:
|
|
@@ -434,7 +434,8 @@ def handsome_chat_completions():
|
|
| 434 |
)
|
| 435 |
}
|
| 436 |
), 429
|
| 437 |
-
|
|
|
|
| 438 |
if model_name == "deepseek-reasoner":
|
| 439 |
for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
|
| 440 |
if param in data:
|
|
@@ -444,7 +445,7 @@ def handsome_chat_completions():
|
|
| 444 |
"Authorization": f"Bearer {api_key}",
|
| 445 |
"Content-Type": "application/json"
|
| 446 |
}
|
| 447 |
-
|
| 448 |
try:
|
| 449 |
start_time = time.time()
|
| 450 |
response = requests.post(
|
|
@@ -495,12 +496,17 @@ def handsome_chat_completions():
|
|
| 495 |
"usage"
|
| 496 |
]["completion_tokens"]
|
| 497 |
|
| 498 |
-
|
|
|
|
| 499 |
delta = response_json["choices"][0].get("delta", {})
|
| 500 |
if "reasoning_content" in delta and delta["reasoning_content"]:
|
| 501 |
reasoning_lines = delta["reasoning_content"].splitlines()
|
| 502 |
formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
|
| 503 |
-
response_content += formatted_reasoning + "\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
if "content" in delta and delta["content"]:
|
| 505 |
response_content += delta["content"]
|
| 506 |
|
|
@@ -561,7 +567,7 @@ def handsome_chat_completions():
|
|
| 561 |
|
| 562 |
with data_lock:
|
| 563 |
request_timestamps.append(time.time())
|
| 564 |
-
token_counts.append(prompt_tokens+completion_tokens)
|
| 565 |
|
| 566 |
return Response(
|
| 567 |
stream_with_context(generate()),
|
|
@@ -575,18 +581,22 @@ def handsome_chat_completions():
|
|
| 575 |
|
| 576 |
try:
|
| 577 |
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
| 578 |
-
completion_tokens = response_json[
|
| 579 |
-
"usage"
|
| 580 |
-
]["completion_tokens"]
|
| 581 |
response_content = ""
|
| 582 |
-
|
|
|
|
|
|
|
| 583 |
choice = response_json["choices"][0]
|
| 584 |
-
if "
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 590 |
except (KeyError, ValueError, IndexError) as e:
|
| 591 |
logging.error(
|
| 592 |
f"解析非流式响应 JSON 失败: {e}, "
|
|
@@ -609,7 +619,8 @@ def handsome_chat_completions():
|
|
| 609 |
item.get("type") == "text"
|
| 610 |
):
|
| 611 |
user_content += (
|
| 612 |
-
item.get("text", "") +
|
|
|
|
| 613 |
)
|
| 614 |
|
| 615 |
user_content = user_content.strip()
|
|
@@ -633,11 +644,8 @@ def handsome_chat_completions():
|
|
| 633 |
)
|
| 634 |
with data_lock:
|
| 635 |
request_timestamps.append(time.time())
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
else:
|
| 639 |
-
token_counts.append(0)
|
| 640 |
-
|
| 641 |
# Reformat the response to standard OpenAI format for non-streaming responses
|
| 642 |
formatted_response = {
|
| 643 |
"id": response_json.get("id", ""),
|
|
|
|
| 421 |
return jsonify({"error": "Invalid request data"}), 400
|
| 422 |
|
| 423 |
model_name = data['model']
|
| 424 |
+
|
| 425 |
api_key = select_key(model_name)
|
| 426 |
|
| 427 |
if not api_key:
|
|
|
|
| 434 |
)
|
| 435 |
}
|
| 436 |
), 429
|
| 437 |
+
|
| 438 |
+
# Special handling for deepseek-reasoner
|
| 439 |
if model_name == "deepseek-reasoner":
|
| 440 |
for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
|
| 441 |
if param in data:
|
|
|
|
| 445 |
"Authorization": f"Bearer {api_key}",
|
| 446 |
"Content-Type": "application/json"
|
| 447 |
}
|
| 448 |
+
|
| 449 |
try:
|
| 450 |
start_time = time.time()
|
| 451 |
response = requests.post(
|
|
|
|
| 496 |
"usage"
|
| 497 |
]["completion_tokens"]
|
| 498 |
|
| 499 |
+
# Special handling for deepseek-reasoner in streaming mode
|
| 500 |
+
if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
|
| 501 |
delta = response_json["choices"][0].get("delta", {})
|
| 502 |
if "reasoning_content" in delta and delta["reasoning_content"]:
|
| 503 |
reasoning_lines = delta["reasoning_content"].splitlines()
|
| 504 |
formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
|
| 505 |
+
response_content += formatted_reasoning + "\n"
|
| 506 |
+
if "content" in delta and delta["content"]:
|
| 507 |
+
response_content += delta["content"]
|
| 508 |
+
elif "choices" in response_json and len(response_json["choices"]) > 0:
|
| 509 |
+
delta = response_json["choices"][0].get("delta", {})
|
| 510 |
if "content" in delta and delta["content"]:
|
| 511 |
response_content += delta["content"]
|
| 512 |
|
|
|
|
| 567 |
|
| 568 |
with data_lock:
|
| 569 |
request_timestamps.append(time.time())
|
| 570 |
+
token_counts.append(prompt_tokens + completion_tokens)
|
| 571 |
|
| 572 |
return Response(
|
| 573 |
stream_with_context(generate()),
|
|
|
|
| 581 |
|
| 582 |
try:
|
| 583 |
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
| 584 |
+
completion_tokens = response_json["usage"]["completion_tokens"]
|
|
|
|
|
|
|
| 585 |
response_content = ""
|
| 586 |
+
|
| 587 |
+
# Special handling for deepseek-reasoner in non-streaming mode
|
| 588 |
+
if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
|
| 589 |
choice = response_json["choices"][0]
|
| 590 |
+
if "message" in choice:
|
| 591 |
+
if "reasoning_content" in choice["message"]:
|
| 592 |
+
reasoning_lines = choice["message"]["reasoning_content"].splitlines()
|
| 593 |
+
formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
|
| 594 |
+
response_content += formatted_reasoning + "\n"
|
| 595 |
+
if "content" in choice["message"]:
|
| 596 |
+
response_content += choice["message"]["content"]
|
| 597 |
+
elif "choices" in response_json and len(response_json["choices"]) > 0:
|
| 598 |
+
response_content = response_json["choices"][0]["message"]["content"]
|
| 599 |
+
|
| 600 |
except (KeyError, ValueError, IndexError) as e:
|
| 601 |
logging.error(
|
| 602 |
f"解析非流式响应 JSON 失败: {e}, "
|
|
|
|
| 619 |
item.get("type") == "text"
|
| 620 |
):
|
| 621 |
user_content += (
|
| 622 |
+
item.get("text", "") +
|
| 623 |
+
" "
|
| 624 |
)
|
| 625 |
|
| 626 |
user_content = user_content.strip()
|
|
|
|
| 644 |
)
|
| 645 |
with data_lock:
|
| 646 |
request_timestamps.append(time.time())
|
| 647 |
+
token_counts.append(prompt_tokens + completion_tokens)
|
| 648 |
+
|
|
|
|
|
|
|
|
|
|
| 649 |
# Reformat the response to standard OpenAI format for non-streaming responses
|
| 650 |
formatted_response = {
|
| 651 |
"id": response_json.get("id", ""),
|