yangtb24 commited on
Commit
03b936c
·
verified ·
1 Parent(s): 54ab1ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -27
app.py CHANGED
@@ -468,12 +468,38 @@ def handsome_chat_completions():
468
  if model_name == "deepseek-reasoner-openwebui":
469
  first_chunk_time = None
470
  full_response_content = ""
471
- for chunk in response.iter_content(chunk_size=2048):
 
 
 
 
472
  if chunk:
473
  if first_chunk_time is None:
474
  first_chunk_time = time.time()
475
  full_response_content += chunk.decode("utf-8")
476
- yield chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
477
 
478
  end_time = time.time()
479
  first_token_time = (
@@ -484,7 +510,6 @@ def handsome_chat_completions():
484
 
485
  prompt_tokens = 0
486
  completion_tokens = 0
487
- response_content = ""
488
  for line in full_response_content.splitlines():
489
  if line.startswith("data:"):
490
  line = line[5:].strip()
@@ -497,22 +522,9 @@ def handsome_chat_completions():
497
  "usage" in response_json and
498
  "completion_tokens" in response_json["usage"]
499
  ):
500
- completion_tokens = response_json[
501
  "usage"
502
  ]["completion_tokens"]
503
-
504
- if (
505
- "choices" in response_json and
506
- len(response_json["choices"]) > 0 and
507
- "delta" in response_json["choices"][0] and
508
- "content" in response_json[
509
- "choices"
510
- ][0]["delta"]
511
- ):
512
- response_content += response_json[
513
- "choices"
514
- ][0]["delta"]["content"]
515
-
516
  if (
517
  "usage" in response_json and
518
  "prompt_tokens" in response_json["usage"]
@@ -531,12 +543,30 @@ def handsome_chat_completions():
531
  f"行内容: {line}"
532
  )
533
 
534
- user_content = extract_user_content(data.get("messages", []))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
535
 
536
  user_content_replaced = user_content.replace(
537
  '\n', '\\n'
538
  ).replace('\r', '\\n')
539
- response_content_replaced = response_content.replace(
 
540
  '\n', '\\n'
541
  ).replace('\r', '\\n')
542
 
@@ -553,14 +583,14 @@ def handsome_chat_completions():
553
 
554
  with data_lock:
555
  request_timestamps.append(time.time())
556
- token_counts.append(prompt_tokens+completion_tokens)
557
- request_timestamps_day.append(time.time())
558
- token_counts_day.append(prompt_tokens+completion_tokens)
559
 
560
- return Response(
561
- stream_with_context(generate()),
562
- content_type=response.headers['Content-Type']
563
- )
 
 
564
 
565
  first_chunk_time = None
566
  full_response_content = ""
@@ -596,7 +626,7 @@ def handsome_chat_completions():
596
  yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
597
 
598
  except (KeyError, ValueError, json.JSONDecodeError) as e:
599
- logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {line}")
600
  continue
601
 
602
  end_time = time.time()
 
468
  if model_name == "deepseek-reasoner-openwebui":
469
  first_chunk_time = None
470
  full_response_content = ""
471
+ reasoning_content_accumulated = ""
472
+ content_accumulated = ""
473
+ first_reasoning_chunk = True
474
+
475
+ for chunk in response.iter_lines():
476
  if chunk:
477
  if first_chunk_time is None:
478
  first_chunk_time = time.time()
479
  full_response_content += chunk.decode("utf-8")
480
+
481
+ for line in chunk.decode("utf-8").splitlines():
482
+ if line.startswith("data:"):
483
+ try:
484
+ chunk_json = json.loads(line.lstrip("data: ").strip())
485
+ if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
486
+ delta = chunk_json["choices"][0].get("delta", {})
487
+
488
+ if delta.get("reasoning_content") is not None:
489
+ reasoning_chunk = delta["reasoning_content"]
490
+ if first_reasoning_chunk:
491
+ reasoning_chunk = f"<think>\n{reasoning_chunk}"
492
+ first_reasoning_chunk = False
493
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
494
+
495
+ if delta.get("content") is not None:
496
+ if not first_reasoning_chunk:
497
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': f'\n<\think>\n'}, 'index': 0}]})}\n\n"
498
+ first_reasoning_chunk = True
499
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
500
+
501
+ except (KeyError, ValueError, json.JSONDecodeError) as e:
502
+ continue
503
 
504
  end_time = time.time()
505
  first_token_time = (
 
510
 
511
  prompt_tokens = 0
512
  completion_tokens = 0
 
513
  for line in full_response_content.splitlines():
514
  if line.startswith("data:"):
515
  line = line[5:].strip()
 
522
  "usage" in response_json and
523
  "completion_tokens" in response_json["usage"]
524
  ):
525
+ completion_tokens += response_json[
526
  "usage"
527
  ]["completion_tokens"]
 
 
 
 
 
 
 
 
 
 
 
 
 
528
  if (
529
  "usage" in response_json and
530
  "prompt_tokens" in response_json["usage"]
 
543
  f"行内容: {line}"
544
  )
545
 
546
+ user_content = ""
547
+ messages = data.get("messages", [])
548
+ for message in messages:
549
+ if message["role"] == "user":
550
+ if isinstance(message["content"], str):
551
+ user_content += message["content"] + " "
552
+ elif isinstance(message["content"], list):
553
+ for item in message["content"]:
554
+ if (
555
+ isinstance(item, dict) and
556
+ item.get("type") == "text"
557
+ ):
558
+ user_content += (
559
+ item.get("text", "") +
560
+ " "
561
+ )
562
+
563
+ user_content = user_content.strip()
564
 
565
  user_content_replaced = user_content.replace(
566
  '\n', '\\n'
567
  ).replace('\r', '\\n')
568
+ response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
569
+ response_content_replaced = response_content_replaced.replace(
570
  '\n', '\\n'
571
  ).replace('\r', '\\n')
572
 
 
583
 
584
  with data_lock:
585
  request_timestamps.append(time.time())
586
+ token_counts.append(prompt_tokens + completion_tokens)
 
 
587
 
588
+ yield "data: [DONE]\n\n"
589
+
590
+ return Response(
591
+ stream_with_context(generate()),
592
+ content_type="text/event-stream"
593
+ )
594
 
595
  first_chunk_time = None
596
  full_response_content = ""
 
626
  yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
627
 
628
  except (KeyError, ValueError, json.JSONDecodeError) as e:
629
+ # logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {line}")
630
  continue
631
 
632
  end_time = time.time()