Update app.py
Browse files
app.py
CHANGED
@@ -421,7 +421,7 @@ def handsome_chat_completions():
|
|
421 |
return jsonify({"error": "Invalid request data"}), 400
|
422 |
|
423 |
model_name = data['model']
|
424 |
-
|
425 |
api_key = select_key(model_name)
|
426 |
|
427 |
if not api_key:
|
@@ -434,7 +434,8 @@ def handsome_chat_completions():
|
|
434 |
)
|
435 |
}
|
436 |
), 429
|
437 |
-
|
|
|
438 |
if model_name == "deepseek-reasoner":
|
439 |
for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
|
440 |
if param in data:
|
@@ -444,7 +445,7 @@ def handsome_chat_completions():
|
|
444 |
"Authorization": f"Bearer {api_key}",
|
445 |
"Content-Type": "application/json"
|
446 |
}
|
447 |
-
|
448 |
try:
|
449 |
start_time = time.time()
|
450 |
response = requests.post(
|
@@ -495,12 +496,17 @@ def handsome_chat_completions():
|
|
495 |
"usage"
|
496 |
]["completion_tokens"]
|
497 |
|
498 |
-
|
|
|
499 |
delta = response_json["choices"][0].get("delta", {})
|
500 |
if "reasoning_content" in delta and delta["reasoning_content"]:
|
501 |
reasoning_lines = delta["reasoning_content"].splitlines()
|
502 |
formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
|
503 |
-
response_content += formatted_reasoning + "\n"
|
|
|
|
|
|
|
|
|
504 |
if "content" in delta and delta["content"]:
|
505 |
response_content += delta["content"]
|
506 |
|
@@ -561,7 +567,7 @@ def handsome_chat_completions():
|
|
561 |
|
562 |
with data_lock:
|
563 |
request_timestamps.append(time.time())
|
564 |
-
token_counts.append(prompt_tokens+completion_tokens)
|
565 |
|
566 |
return Response(
|
567 |
stream_with_context(generate()),
|
@@ -575,18 +581,22 @@ def handsome_chat_completions():
|
|
575 |
|
576 |
try:
|
577 |
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
578 |
-
completion_tokens = response_json[
|
579 |
-
"usage"
|
580 |
-
]["completion_tokens"]
|
581 |
response_content = ""
|
582 |
-
|
|
|
|
|
583 |
choice = response_json["choices"][0]
|
584 |
-
if "
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
|
|
|
|
|
|
|
|
590 |
except (KeyError, ValueError, IndexError) as e:
|
591 |
logging.error(
|
592 |
f"解析非流式响应 JSON 失败: {e}, "
|
@@ -609,7 +619,8 @@ def handsome_chat_completions():
|
|
609 |
item.get("type") == "text"
|
610 |
):
|
611 |
user_content += (
|
612 |
-
item.get("text", "") +
|
|
|
613 |
)
|
614 |
|
615 |
user_content = user_content.strip()
|
@@ -633,11 +644,8 @@ def handsome_chat_completions():
|
|
633 |
)
|
634 |
with data_lock:
|
635 |
request_timestamps.append(time.time())
|
636 |
-
|
637 |
-
|
638 |
-
else:
|
639 |
-
token_counts.append(0)
|
640 |
-
|
641 |
# Reformat the response to standard OpenAI format for non-streaming responses
|
642 |
formatted_response = {
|
643 |
"id": response_json.get("id", ""),
|
|
|
421 |
return jsonify({"error": "Invalid request data"}), 400
|
422 |
|
423 |
model_name = data['model']
|
424 |
+
|
425 |
api_key = select_key(model_name)
|
426 |
|
427 |
if not api_key:
|
|
|
434 |
)
|
435 |
}
|
436 |
), 429
|
437 |
+
|
438 |
+
# Special handling for deepseek-reasoner
|
439 |
if model_name == "deepseek-reasoner":
|
440 |
for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
|
441 |
if param in data:
|
|
|
445 |
"Authorization": f"Bearer {api_key}",
|
446 |
"Content-Type": "application/json"
|
447 |
}
|
448 |
+
|
449 |
try:
|
450 |
start_time = time.time()
|
451 |
response = requests.post(
|
|
|
496 |
"usage"
|
497 |
]["completion_tokens"]
|
498 |
|
499 |
+
# Special handling for deepseek-reasoner in streaming mode
|
500 |
+
if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
|
501 |
delta = response_json["choices"][0].get("delta", {})
|
502 |
if "reasoning_content" in delta and delta["reasoning_content"]:
|
503 |
reasoning_lines = delta["reasoning_content"].splitlines()
|
504 |
formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
|
505 |
+
response_content += formatted_reasoning + "\n"
|
506 |
+
if "content" in delta and delta["content"]:
|
507 |
+
response_content += delta["content"]
|
508 |
+
elif "choices" in response_json and len(response_json["choices"]) > 0:
|
509 |
+
delta = response_json["choices"][0].get("delta", {})
|
510 |
if "content" in delta and delta["content"]:
|
511 |
response_content += delta["content"]
|
512 |
|
|
|
567 |
|
568 |
with data_lock:
|
569 |
request_timestamps.append(time.time())
|
570 |
+
token_counts.append(prompt_tokens + completion_tokens)
|
571 |
|
572 |
return Response(
|
573 |
stream_with_context(generate()),
|
|
|
581 |
|
582 |
try:
|
583 |
prompt_tokens = response_json["usage"]["prompt_tokens"]
|
584 |
+
completion_tokens = response_json["usage"]["completion_tokens"]
|
|
|
|
|
585 |
response_content = ""
|
586 |
+
|
587 |
+
# Special handling for deepseek-reasoner in non-streaming mode
|
588 |
+
if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
|
589 |
choice = response_json["choices"][0]
|
590 |
+
if "message" in choice:
|
591 |
+
if "reasoning_content" in choice["message"]:
|
592 |
+
reasoning_lines = choice["message"]["reasoning_content"].splitlines()
|
593 |
+
formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
|
594 |
+
response_content += formatted_reasoning + "\n"
|
595 |
+
if "content" in choice["message"]:
|
596 |
+
response_content += choice["message"]["content"]
|
597 |
+
elif "choices" in response_json and len(response_json["choices"]) > 0:
|
598 |
+
response_content = response_json["choices"][0]["message"]["content"]
|
599 |
+
|
600 |
except (KeyError, ValueError, IndexError) as e:
|
601 |
logging.error(
|
602 |
f"解析非流式响应 JSON 失败: {e}, "
|
|
|
619 |
item.get("type") == "text"
|
620 |
):
|
621 |
user_content += (
|
622 |
+
item.get("text", "") +
|
623 |
+
" "
|
624 |
)
|
625 |
|
626 |
user_content = user_content.strip()
|
|
|
644 |
)
|
645 |
with data_lock:
|
646 |
request_timestamps.append(time.time())
|
647 |
+
token_counts.append(prompt_tokens + completion_tokens)
|
648 |
+
|
|
|
|
|
|
|
649 |
# Reformat the response to standard OpenAI format for non-streaming responses
|
650 |
formatted_response = {
|
651 |
"id": response_json.get("id", ""),
|