yangtb24 commited on
Commit
f5fbdd1
·
verified ·
1 Parent(s): 26000f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -27
app.py CHANGED
@@ -431,13 +431,61 @@ def handsome_chat_completions():
431
  def generate():
432
  first_chunk_time = None
433
  full_response_content = ""
434
- accumulated_reasoning = "" # Accumulate reasoning content
 
435
  for chunk in response.iter_content(chunk_size=1024):
436
  if chunk:
437
  if first_chunk_time is None:
438
  first_chunk_time = time.time()
439
  full_response_content += chunk.decode("utf-8")
440
- yield chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
 
442
  end_time = time.time()
443
  first_token_time = (
@@ -448,7 +496,7 @@ def handsome_chat_completions():
448
 
449
  prompt_tokens = 0
450
  completion_tokens = 0
451
- response_content = ""
452
  for line in full_response_content.splitlines():
453
  if line.startswith("data:"):
454
  line = line[5:].strip()
@@ -456,7 +504,6 @@ def handsome_chat_completions():
456
  continue
457
  try:
458
  response_json = json.loads(line)
459
-
460
  if (
461
  "usage" in response_json and
462
  "completion_tokens" in response_json["usage"]
@@ -464,25 +511,6 @@ def handsome_chat_completions():
464
  completion_tokens = response_json[
465
  "usage"
466
  ]["completion_tokens"]
467
-
468
- # Improved handling for deepseek-reasoner in streaming mode
469
- if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
470
- delta = response_json["choices"][0].get("delta", {})
471
- if "reasoning_content" in delta:
472
- accumulated_reasoning += delta["reasoning_content"]
473
- if "content" in delta and delta["content"]:
474
- # Prepend accumulated reasoning before content
475
- if accumulated_reasoning:
476
- reasoning_lines = accumulated_reasoning.splitlines()
477
- formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
478
- response_content += formatted_reasoning + "\n"
479
- accumulated_reasoning = "" # Reset
480
- response_content += delta["content"]
481
- elif "choices" in response_json and len(response_json["choices"]) > 0:
482
- delta = response_json["choices"][0].get("delta", {})
483
- if "content" in delta and delta["content"]:
484
- response_content += delta["content"]
485
-
486
  if (
487
  "usage" in response_json and
488
  "prompt_tokens" in response_json["usage"]
@@ -490,7 +518,6 @@ def handsome_chat_completions():
490
  prompt_tokens = response_json[
491
  "usage"
492
  ]["prompt_tokens"]
493
-
494
  except (
495
  KeyError,
496
  ValueError,
@@ -500,7 +527,7 @@ def handsome_chat_completions():
500
  f"解析流式响应单行 JSON 失败: {e}, "
501
  f"行内容: {line}"
502
  )
503
-
504
  user_content = ""
505
  messages = data.get("messages", [])
506
  for message in messages:
@@ -523,7 +550,7 @@ def handsome_chat_completions():
523
  user_content_replaced = user_content.replace(
524
  '\n', '\\n'
525
  ).replace('\r', '\\n')
526
- response_content_replaced = response_content.replace(
527
  '\n', '\\n'
528
  ).replace('\r', '\\n')
529
 
@@ -547,7 +574,6 @@ def handsome_chat_completions():
547
  content_type=response.headers['Content-Type']
548
  )
549
  else:
550
- # ... (rest of the code for non-streaming mode remains the same)
551
  response.raise_for_status()
552
  end_time = time.time()
553
  response_json = response.json()
 
431
  def generate():
432
  first_chunk_time = None
433
  full_response_content = ""
434
+ reasoning_content_buffer = ""
435
+ content_buffer = ""
436
  for chunk in response.iter_content(chunk_size=1024):
437
  if chunk:
438
  if first_chunk_time is None:
439
  first_chunk_time = time.time()
440
  full_response_content += chunk.decode("utf-8")
441
+
442
+ for line in chunk.decode("utf-8").splitlines():
443
+ if line.startswith("data:"):
444
+ line = line[5:].strip()
445
+ if line == "[DONE]":
446
+ continue
447
+ try:
448
+ response_json = json.loads(line)
449
+
450
+ if (
451
+ "usage" in response_json and
452
+ "completion_tokens" in response_json["usage"]
453
+ ):
454
+ completion_tokens = response_json[
455
+ "usage"
456
+ ]["completion_tokens"]
457
+
458
+ if "choices" in response_json and len(response_json["choices"]) > 0:
459
+ delta = response_json["choices"][0].get("delta", {})
460
+ if "reasoning_content" in delta and delta["reasoning_content"] is not None:
461
+ reasoning_content_buffer += delta["reasoning_content"]
462
+ if "content" in delta and delta["content"] is not None:
463
+ content_buffer += delta["content"]
464
+
465
+
466
+ if (
467
+ "usage" in response_json and
468
+ "prompt_tokens" in response_json["usage"]
469
+ ):
470
+ prompt_tokens = response_json[
471
+ "usage"
472
+ ]["prompt_tokens"]
473
+ except (
474
+ KeyError,
475
+ ValueError,
476
+ IndexError
477
+ ) as e:
478
+ logging.error(
479
+ f"解析流式响应单行 JSON 失败: {e}, "
480
+ f"行内容: {line}"
481
+ )
482
+
483
+ # Format and yield the accumulated content
484
+ formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_content_buffer.splitlines())
485
+ combined_content = formatted_reasoning + "\n" + content_buffer
486
+ yield combined_content.encode("utf-8")
487
+ reasoning_content_buffer = ""
488
+ content_buffer = ""
489
 
490
  end_time = time.time()
491
  first_token_time = (
 
496
 
497
  prompt_tokens = 0
498
  completion_tokens = 0
499
+
500
  for line in full_response_content.splitlines():
501
  if line.startswith("data:"):
502
  line = line[5:].strip()
 
504
  continue
505
  try:
506
  response_json = json.loads(line)
 
507
  if (
508
  "usage" in response_json and
509
  "completion_tokens" in response_json["usage"]
 
511
  completion_tokens = response_json[
512
  "usage"
513
  ]["completion_tokens"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
  if (
515
  "usage" in response_json and
516
  "prompt_tokens" in response_json["usage"]
 
518
  prompt_tokens = response_json[
519
  "usage"
520
  ]["prompt_tokens"]
 
521
  except (
522
  KeyError,
523
  ValueError,
 
527
  f"解析流式响应单行 JSON 失败: {e}, "
528
  f"行内容: {line}"
529
  )
530
+
531
  user_content = ""
532
  messages = data.get("messages", [])
533
  for message in messages:
 
550
  user_content_replaced = user_content.replace(
551
  '\n', '\\n'
552
  ).replace('\r', '\\n')
553
+ response_content_replaced = (formatted_reasoning + "\n" + content_buffer).replace(
554
  '\n', '\\n'
555
  ).replace('\r', '\\n')
556
 
 
574
  content_type=response.headers['Content-Type']
575
  )
576
  else:
 
577
  response.raise_for_status()
578
  end_time = time.time()
579
  response_json = response.json()