yangtb24 commited on
Commit
95146e7
·
verified ·
1 Parent(s): b0f9287

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -31
app.py CHANGED
@@ -431,15 +431,51 @@ def handsome_chat_completions():
431
  def generate():
432
  first_chunk_time = None
433
  full_response_content = ""
434
- reasoning_content_accumulated = "" # Accumulate reasoning content
435
- content_accumulated = "" # Accumulate regular content
436
 
437
  for chunk in response.iter_content(chunk_size=1024):
438
  if chunk:
439
  if first_chunk_time is None:
440
  first_chunk_time = time.time()
441
  full_response_content += chunk.decode("utf-8")
442
- yield chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
 
444
  end_time = time.time()
445
  first_token_time = (
@@ -466,19 +502,6 @@ def handsome_chat_completions():
466
  "usage"
467
  ]["completion_tokens"]
468
 
469
- # Special handling for deepseek-reasoner in streaming mode
470
- if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
471
- delta = response_json["choices"][0].get("delta", {})
472
- if "reasoning_content" in delta:
473
- reasoning_content_accumulated += delta["reasoning_content"]
474
- if "content" in delta:
475
- content_accumulated += delta["content"]
476
- elif "choices" in response_json and len(response_json["choices"]) > 0:
477
- # Handle other models normally
478
- delta = response_json["choices"][0].get("delta", {})
479
- if "content" in delta:
480
- content_accumulated += delta["content"]
481
-
482
  if (
483
  "usage" in response_json and
484
  "prompt_tokens" in response_json["usage"]
@@ -497,14 +520,6 @@ def handsome_chat_completions():
497
  f"行内容: {line}"
498
  )
499
 
500
- # Format the accumulated reasoning content after processing all chunks
501
- if model_name == "deepseek-reasoner":
502
- reasoning_lines = reasoning_content_accumulated.splitlines()
503
- formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
504
- response_content = formatted_reasoning + "\n" + content_accumulated
505
- else:
506
- response_content = content_accumulated
507
-
508
  user_content = ""
509
  messages = data.get("messages", [])
510
  for message in messages:
@@ -527,9 +542,6 @@ def handsome_chat_completions():
527
  user_content_replaced = user_content.replace(
528
  '\n', '\\n'
529
  ).replace('\r', '\\n')
530
- response_content_replaced = response_content.replace(
531
- '\n', '\\n'
532
- ).replace('\r', '\\n')
533
 
534
  logging.info(
535
  f"使用的key: {api_key}, "
@@ -538,8 +550,7 @@ def handsome_chat_completions():
538
  f"首字用时: {first_token_time:.4f}秒, "
539
  f"总共用时: {total_time:.4f}秒, "
540
  f"使用的模型: {model_name}, "
541
- f"用户的内容: {user_content_replaced}, "
542
- f"输出的内容: {response_content_replaced}"
543
  )
544
 
545
  with data_lock:
@@ -551,7 +562,6 @@ def handsome_chat_completions():
551
  content_type=response.headers['Content-Type']
552
  )
553
  else:
554
- # ... (Non-streaming part remains the same as in the previous response)
555
  response.raise_for_status()
556
  end_time = time.time()
557
  response_json = response.json()
@@ -653,7 +663,6 @@ def handsome_chat_completions():
653
  logging.error(f"请求转发异常: {e}")
654
  return jsonify({"error": str(e)}), 500
655
 
656
-
657
  if __name__ == '__main__':
658
  logging.info(f"环境变量:{os.environ}")
659
 
 
431
  def generate():
432
  first_chunk_time = None
433
  full_response_content = ""
 
 
434
 
435
  for chunk in response.iter_content(chunk_size=1024):
436
  if chunk:
437
  if first_chunk_time is None:
438
  first_chunk_time = time.time()
439
  full_response_content += chunk.decode("utf-8")
440
+
441
+ chunk_data_list = chunk.decode("utf-8").split("\n\n")
442
+
443
+ for chunk_data in chunk_data_list:
444
+ if not chunk_data:
445
+ continue
446
+ if chunk_data.startswith("data:"):
447
+ chunk_data = chunk_data[5:].strip()
448
+ if chunk_data == "[DONE]":
449
+ continue
450
+ try:
451
+ response_json = json.loads(chunk_data)
452
+ if (
453
+ model_name == "deepseek-reasoner" and
454
+ "choices" in response_json and
455
+ len(response_json["choices"]) > 0
456
+ ):
457
+ delta = response_json["choices"][0].get("delta", {})
458
+ new_content = ""
459
+ if "reasoning_content" in delta:
460
+ new_content += "> " + delta["reasoning_content"]
461
+ if "content" in delta:
462
+ new_content += delta["content"]
463
+
464
+ if new_content:
465
+ response_json["choices"][0]["delta"] = {"content": new_content}
466
+ yield f"data: {json.dumps(response_json)}\n\n".encode("utf-8")
467
+ else:
468
+ yield f"data: {chunk_data}\n\n".encode("utf-8")
469
+
470
+ except (
471
+ KeyError,
472
+ ValueError,
473
+ IndexError
474
+ ) as e:
475
+ logging.error(
476
+ f"解析流式响应单行 JSON 失败: {e}, "
477
+ f"行内容: {chunk_data}"
478
+ )
479
 
480
  end_time = time.time()
481
  first_token_time = (
 
502
  "usage"
503
  ]["completion_tokens"]
504
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  if (
506
  "usage" in response_json and
507
  "prompt_tokens" in response_json["usage"]
 
520
  f"行内容: {line}"
521
  )
522
 
 
 
 
 
 
 
 
 
523
  user_content = ""
524
  messages = data.get("messages", [])
525
  for message in messages:
 
542
  user_content_replaced = user_content.replace(
543
  '\n', '\\n'
544
  ).replace('\r', '\\n')
 
 
 
545
 
546
  logging.info(
547
  f"使用的key: {api_key}, "
 
550
  f"首字用时: {first_token_time:.4f}秒, "
551
  f"总共用时: {total_time:.4f}秒, "
552
  f"使用的模型: {model_name}, "
553
+ f"用户的内容: {user_content_replaced}"
 
554
  )
555
 
556
  with data_lock:
 
562
  content_type=response.headers['Content-Type']
563
  )
564
  else:
 
565
  response.raise_for_status()
566
  end_time = time.time()
567
  response_json = response.json()
 
663
  logging.error(f"请求转发异常: {e}")
664
  return jsonify({"error": str(e)}), 500
665
 
 
666
  if __name__ == '__main__':
667
  logging.info(f"环境变量:{os.environ}")
668