Update app.py
Browse files
app.py
CHANGED
@@ -433,13 +433,30 @@ def handsome_chat_completions():
|
|
433 |
full_response_content = ""
|
434 |
reasoning_content_accumulated = "" # Accumulate reasoning content
|
435 |
content_accumulated = "" # Accumulate regular content
|
436 |
-
|
437 |
for chunk in response.iter_content(chunk_size=1024):
|
438 |
if chunk:
|
439 |
if first_chunk_time is None:
|
440 |
first_chunk_time = time.time()
|
441 |
full_response_content += chunk.decode("utf-8")
|
442 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
443 |
|
444 |
end_time = time.time()
|
445 |
first_token_time = (
|
@@ -465,20 +482,6 @@ def handsome_chat_completions():
|
|
465 |
completion_tokens += response_json[
|
466 |
"usage"
|
467 |
]["completion_tokens"]
|
468 |
-
|
469 |
-
# Special handling for deepseek-reasoner in streaming mode
|
470 |
-
if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
|
471 |
-
delta = response_json["choices"][0].get("delta", {})
|
472 |
-
if "reasoning_content" in delta:
|
473 |
-
reasoning_content_accumulated += delta["reasoning_content"]
|
474 |
-
if "content" in delta:
|
475 |
-
content_accumulated += delta["content"]
|
476 |
-
elif "choices" in response_json and len(response_json["choices"]) > 0:
|
477 |
-
# Handle other models normally
|
478 |
-
delta = response_json["choices"][0].get("delta", {})
|
479 |
-
if "content" in delta:
|
480 |
-
content_accumulated += delta["content"]
|
481 |
-
|
482 |
if (
|
483 |
"usage" in response_json and
|
484 |
"prompt_tokens" in response_json["usage"]
|
@@ -497,13 +500,6 @@ def handsome_chat_completions():
|
|
497 |
f"行内容: {line}"
|
498 |
)
|
499 |
|
500 |
-
# Format the accumulated reasoning content after processing all chunks
|
501 |
-
if model_name == "deepseek-reasoner":
|
502 |
-
formatted_reasoning = f"```Thinking\n{reasoning_content_accumulated}\n```"
|
503 |
-
response_content = formatted_reasoning + "\n" + content_accumulated
|
504 |
-
else:
|
505 |
-
response_content = content_accumulated
|
506 |
-
|
507 |
user_content = ""
|
508 |
messages = data.get("messages", [])
|
509 |
for message in messages:
|
@@ -526,7 +522,8 @@ def handsome_chat_completions():
|
|
526 |
user_content_replaced = user_content.replace(
|
527 |
'\n', '\\n'
|
528 |
).replace('\r', '\\n')
|
529 |
-
response_content_replaced =
|
|
|
530 |
'\n', '\\n'
|
531 |
).replace('\r', '\\n')
|
532 |
|
@@ -544,10 +541,8 @@ def handsome_chat_completions():
|
|
544 |
with data_lock:
|
545 |
request_timestamps.append(time.time())
|
546 |
token_counts.append(prompt_tokens + completion_tokens)
|
547 |
-
|
548 |
-
yield f"data: {json.dumps({'choices': [{'delta': {'content': response_content}, 'index': 0, 'finish_reason': None}]})}\n\n"
|
549 |
-
yield "data: [DONE]\n\n"
|
550 |
|
|
|
551 |
|
552 |
return Response(
|
553 |
stream_with_context(generate()),
|
|
|
433 |
full_response_content = ""
|
434 |
reasoning_content_accumulated = "" # Accumulate reasoning content
|
435 |
content_accumulated = "" # Accumulate regular content
|
436 |
+
|
437 |
for chunk in response.iter_content(chunk_size=1024):
|
438 |
if chunk:
|
439 |
if first_chunk_time is None:
|
440 |
first_chunk_time = time.time()
|
441 |
full_response_content += chunk.decode("utf-8")
|
442 |
+
|
443 |
+
try:
|
444 |
+
chunk_json = json.loads(chunk.decode("utf-8").lstrip("data: ").strip())
|
445 |
+
if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
|
446 |
+
delta = chunk_json["choices"][0].get("delta", {})
|
447 |
+
if "reasoning_content" in delta:
|
448 |
+
reasoning_content_accumulated += delta["reasoning_content"]
|
449 |
+
formatted_reasoning = f"```Thinking\n{reasoning_content_accumulated}\n```"
|
450 |
+
yield f"data: {json.dumps({'choices': [{'delta': {'content': formatted_reasoning}, 'index': 0, 'finish_reason': None}]})}\n\n"
|
451 |
+
reasoning_content_accumulated = ""
|
452 |
+
if "content" in delta:
|
453 |
+
content_accumulated += delta["content"]
|
454 |
+
yield f"data: {json.dumps({'choices': [{'delta': {'content': content_accumulated}, 'index': 0, 'finish_reason': None}]})}\n\n"
|
455 |
+
content_accumulated = ""
|
456 |
+
|
457 |
+
except (KeyError, ValueError, json.JSONDecodeError) as e:
|
458 |
+
logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {chunk.decode('utf-8')}")
|
459 |
+
continue
|
460 |
|
461 |
end_time = time.time()
|
462 |
first_token_time = (
|
|
|
482 |
completion_tokens += response_json[
|
483 |
"usage"
|
484 |
]["completion_tokens"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
485 |
if (
|
486 |
"usage" in response_json and
|
487 |
"prompt_tokens" in response_json["usage"]
|
|
|
500 |
f"行内容: {line}"
|
501 |
)
|
502 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
503 |
user_content = ""
|
504 |
messages = data.get("messages", [])
|
505 |
for message in messages:
|
|
|
522 |
user_content_replaced = user_content.replace(
|
523 |
'\n', '\\n'
|
524 |
).replace('\r', '\\n')
|
525 |
+
response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
|
526 |
+
response_content_replaced = response_content_replaced.replace(
|
527 |
'\n', '\\n'
|
528 |
).replace('\r', '\\n')
|
529 |
|
|
|
541 |
with data_lock:
|
542 |
request_timestamps.append(time.time())
|
543 |
token_counts.append(prompt_tokens + completion_tokens)
|
|
|
|
|
|
|
544 |
|
545 |
+
yield "data: [DONE]\n\n"
|
546 |
|
547 |
return Response(
|
548 |
stream_with_context(generate()),
|