yangtb24 commited on
Commit
c861b0f
·
verified ·
1 Parent(s): 868e37f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -27
app.py CHANGED
@@ -433,13 +433,30 @@ def handsome_chat_completions():
433
  full_response_content = ""
434
  reasoning_content_accumulated = "" # Accumulate reasoning content
435
  content_accumulated = "" # Accumulate regular content
436
-
437
  for chunk in response.iter_content(chunk_size=1024):
438
  if chunk:
439
  if first_chunk_time is None:
440
  first_chunk_time = time.time()
441
  full_response_content += chunk.decode("utf-8")
442
- yield chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
 
444
  end_time = time.time()
445
  first_token_time = (
@@ -465,20 +482,6 @@ def handsome_chat_completions():
465
  completion_tokens += response_json[
466
  "usage"
467
  ]["completion_tokens"]
468
-
469
- # Special handling for deepseek-reasoner in streaming mode
470
- if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
471
- delta = response_json["choices"][0].get("delta", {})
472
- if "reasoning_content" in delta:
473
- reasoning_content_accumulated += delta["reasoning_content"]
474
- if "content" in delta:
475
- content_accumulated += delta["content"]
476
- elif "choices" in response_json and len(response_json["choices"]) > 0:
477
- # Handle other models normally
478
- delta = response_json["choices"][0].get("delta", {})
479
- if "content" in delta:
480
- content_accumulated += delta["content"]
481
-
482
  if (
483
  "usage" in response_json and
484
  "prompt_tokens" in response_json["usage"]
@@ -497,13 +500,6 @@ def handsome_chat_completions():
497
  f"行内容: {line}"
498
  )
499
 
500
- # Format the accumulated reasoning content after processing all chunks
501
- if model_name == "deepseek-reasoner":
502
- formatted_reasoning = f"```Thinking\n{reasoning_content_accumulated}\n```"
503
- response_content = formatted_reasoning + "\n" + content_accumulated
504
- else:
505
- response_content = content_accumulated
506
-
507
  user_content = ""
508
  messages = data.get("messages", [])
509
  for message in messages:
@@ -526,7 +522,8 @@ def handsome_chat_completions():
526
  user_content_replaced = user_content.replace(
527
  '\n', '\\n'
528
  ).replace('\r', '\\n')
529
- response_content_replaced = response_content.replace(
 
530
  '\n', '\\n'
531
  ).replace('\r', '\\n')
532
 
@@ -544,10 +541,8 @@ def handsome_chat_completions():
544
  with data_lock:
545
  request_timestamps.append(time.time())
546
  token_counts.append(prompt_tokens + completion_tokens)
547
-
548
- yield f"data: {json.dumps({'choices': [{'delta': {'content': response_content}, 'index': 0, 'finish_reason': None}]})}\n\n"
549
- yield "data: [DONE]\n\n"
550
 
 
551
 
552
  return Response(
553
  stream_with_context(generate()),
 
433
  full_response_content = ""
434
  reasoning_content_accumulated = "" # Accumulate reasoning content
435
  content_accumulated = "" # Accumulate regular content
436
+
437
  for chunk in response.iter_content(chunk_size=1024):
438
  if chunk:
439
  if first_chunk_time is None:
440
  first_chunk_time = time.time()
441
  full_response_content += chunk.decode("utf-8")
442
+
443
+ try:
444
+ chunk_json = json.loads(chunk.decode("utf-8").lstrip("data: ").strip())
445
+ if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
446
+ delta = chunk_json["choices"][0].get("delta", {})
447
+ if "reasoning_content" in delta:
448
+ reasoning_content_accumulated += delta["reasoning_content"]
449
+ formatted_reasoning = f"```Thinking\n{reasoning_content_accumulated}\n```"
450
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': formatted_reasoning}, 'index': 0, 'finish_reason': None}]})}\n\n"
451
+ reasoning_content_accumulated = ""
452
+ if "content" in delta:
453
+ content_accumulated += delta["content"]
454
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': content_accumulated}, 'index': 0, 'finish_reason': None}]})}\n\n"
455
+ content_accumulated = ""
456
+
457
+ except (KeyError, ValueError, json.JSONDecodeError) as e:
458
+ logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {chunk.decode('utf-8')}")
459
+ continue
460
 
461
  end_time = time.time()
462
  first_token_time = (
 
482
  completion_tokens += response_json[
483
  "usage"
484
  ]["completion_tokens"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
  if (
486
  "usage" in response_json and
487
  "prompt_tokens" in response_json["usage"]
 
500
  f"行内容: {line}"
501
  )
502
 
 
 
 
 
 
 
 
503
  user_content = ""
504
  messages = data.get("messages", [])
505
  for message in messages:
 
522
  user_content_replaced = user_content.replace(
523
  '\n', '\\n'
524
  ).replace('\r', '\\n')
525
+ response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
526
+ response_content_replaced = response_content_replaced.replace(
527
  '\n', '\\n'
528
  ).replace('\r', '\\n')
529
 
 
541
  with data_lock:
542
  request_timestamps.append(time.time())
543
  token_counts.append(prompt_tokens + completion_tokens)
 
 
 
544
 
545
+ yield "data: [DONE]\n\n"
546
 
547
  return Response(
548
  stream_with_context(generate()),