yangtb24 commited on
Commit
b0f9287
·
verified ·
1 Parent(s): f5fbdd1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -54
app.py CHANGED
@@ -431,61 +431,15 @@ def handsome_chat_completions():
431
  def generate():
432
  first_chunk_time = None
433
  full_response_content = ""
434
- reasoning_content_buffer = ""
435
- content_buffer = ""
 
436
  for chunk in response.iter_content(chunk_size=1024):
437
  if chunk:
438
  if first_chunk_time is None:
439
  first_chunk_time = time.time()
440
  full_response_content += chunk.decode("utf-8")
441
-
442
- for line in chunk.decode("utf-8").splitlines():
443
- if line.startswith("data:"):
444
- line = line[5:].strip()
445
- if line == "[DONE]":
446
- continue
447
- try:
448
- response_json = json.loads(line)
449
-
450
- if (
451
- "usage" in response_json and
452
- "completion_tokens" in response_json["usage"]
453
- ):
454
- completion_tokens = response_json[
455
- "usage"
456
- ]["completion_tokens"]
457
-
458
- if "choices" in response_json and len(response_json["choices"]) > 0:
459
- delta = response_json["choices"][0].get("delta", {})
460
- if "reasoning_content" in delta and delta["reasoning_content"] is not None:
461
- reasoning_content_buffer += delta["reasoning_content"]
462
- if "content" in delta and delta["content"] is not None:
463
- content_buffer += delta["content"]
464
-
465
-
466
- if (
467
- "usage" in response_json and
468
- "prompt_tokens" in response_json["usage"]
469
- ):
470
- prompt_tokens = response_json[
471
- "usage"
472
- ]["prompt_tokens"]
473
- except (
474
- KeyError,
475
- ValueError,
476
- IndexError
477
- ) as e:
478
- logging.error(
479
- f"解析流式响应单行 JSON 失败: {e}, "
480
- f"行内容: {line}"
481
- )
482
-
483
- # Format and yield the accumulated content
484
- formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_content_buffer.splitlines())
485
- combined_content = formatted_reasoning + "\n" + content_buffer
486
- yield combined_content.encode("utf-8")
487
- reasoning_content_buffer = ""
488
- content_buffer = ""
489
 
490
  end_time = time.time()
491
  first_token_time = (
@@ -496,7 +450,6 @@ def handsome_chat_completions():
496
 
497
  prompt_tokens = 0
498
  completion_tokens = 0
499
-
500
  for line in full_response_content.splitlines():
501
  if line.startswith("data:"):
502
  line = line[5:].strip()
@@ -504,13 +457,28 @@ def handsome_chat_completions():
504
  continue
505
  try:
506
  response_json = json.loads(line)
 
507
  if (
508
  "usage" in response_json and
509
  "completion_tokens" in response_json["usage"]
510
  ):
511
- completion_tokens = response_json[
512
  "usage"
513
  ]["completion_tokens"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
  if (
515
  "usage" in response_json and
516
  "prompt_tokens" in response_json["usage"]
@@ -518,6 +486,7 @@ def handsome_chat_completions():
518
  prompt_tokens = response_json[
519
  "usage"
520
  ]["prompt_tokens"]
 
521
  except (
522
  KeyError,
523
  ValueError,
@@ -527,7 +496,15 @@ def handsome_chat_completions():
527
  f"解析流式响应单行 JSON 失败: {e}, "
528
  f"行内容: {line}"
529
  )
530
-
 
 
 
 
 
 
 
 
531
  user_content = ""
532
  messages = data.get("messages", [])
533
  for message in messages:
@@ -550,7 +527,7 @@ def handsome_chat_completions():
550
  user_content_replaced = user_content.replace(
551
  '\n', '\\n'
552
  ).replace('\r', '\\n')
553
- response_content_replaced = (formatted_reasoning + "\n" + content_buffer).replace(
554
  '\n', '\\n'
555
  ).replace('\r', '\\n')
556
 
@@ -574,6 +551,7 @@ def handsome_chat_completions():
574
  content_type=response.headers['Content-Type']
575
  )
576
  else:
 
577
  response.raise_for_status()
578
  end_time = time.time()
579
  response_json = response.json()
@@ -675,6 +653,7 @@ def handsome_chat_completions():
675
  logging.error(f"请求转发异常: {e}")
676
  return jsonify({"error": str(e)}), 500
677
 
 
678
  if __name__ == '__main__':
679
  logging.info(f"环境变量:{os.environ}")
680
 
 
431
  def generate():
432
  first_chunk_time = None
433
  full_response_content = ""
434
+ reasoning_content_accumulated = "" # Accumulate reasoning content
435
+ content_accumulated = "" # Accumulate regular content
436
+
437
  for chunk in response.iter_content(chunk_size=1024):
438
  if chunk:
439
  if first_chunk_time is None:
440
  first_chunk_time = time.time()
441
  full_response_content += chunk.decode("utf-8")
442
+ yield chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
 
444
  end_time = time.time()
445
  first_token_time = (
 
450
 
451
  prompt_tokens = 0
452
  completion_tokens = 0
 
453
  for line in full_response_content.splitlines():
454
  if line.startswith("data:"):
455
  line = line[5:].strip()
 
457
  continue
458
  try:
459
  response_json = json.loads(line)
460
+
461
  if (
462
  "usage" in response_json and
463
  "completion_tokens" in response_json["usage"]
464
  ):
465
+ completion_tokens += response_json[
466
  "usage"
467
  ]["completion_tokens"]
468
+
469
+ # Special handling for deepseek-reasoner in streaming mode
470
+ if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
471
+ delta = response_json["choices"][0].get("delta", {})
472
+ if "reasoning_content" in delta:
473
+ reasoning_content_accumulated += delta["reasoning_content"]
474
+ if "content" in delta:
475
+ content_accumulated += delta["content"]
476
+ elif "choices" in response_json and len(response_json["choices"]) > 0:
477
+ # Handle other models normally
478
+ delta = response_json["choices"][0].get("delta", {})
479
+ if "content" in delta:
480
+ content_accumulated += delta["content"]
481
+
482
  if (
483
  "usage" in response_json and
484
  "prompt_tokens" in response_json["usage"]
 
486
  prompt_tokens = response_json[
487
  "usage"
488
  ]["prompt_tokens"]
489
+
490
  except (
491
  KeyError,
492
  ValueError,
 
496
  f"解析流式响应单行 JSON 失败: {e}, "
497
  f"行内容: {line}"
498
  )
499
+
500
+ # Format the accumulated reasoning content after processing all chunks
501
+ if model_name == "deepseek-reasoner":
502
+ reasoning_lines = reasoning_content_accumulated.splitlines()
503
+ formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
504
+ response_content = formatted_reasoning + "\n" + content_accumulated
505
+ else:
506
+ response_content = content_accumulated
507
+
508
  user_content = ""
509
  messages = data.get("messages", [])
510
  for message in messages:
 
527
  user_content_replaced = user_content.replace(
528
  '\n', '\\n'
529
  ).replace('\r', '\\n')
530
+ response_content_replaced = response_content.replace(
531
  '\n', '\\n'
532
  ).replace('\r', '\\n')
533
 
 
551
  content_type=response.headers['Content-Type']
552
  )
553
  else:
554
+ # ... (Non-streaming part remains the same as in the previous response)
555
  response.raise_for_status()
556
  end_time = time.time()
557
  response_json = response.json()
 
653
  logging.error(f"请求转发异常: {e}")
654
  return jsonify({"error": str(e)}), 500
655
 
656
+
657
  if __name__ == '__main__':
658
  logging.info(f"环境变量:{os.environ}")
659