yangtb24 commited on
Commit
8d0c64e
·
verified ·
1 Parent(s): 529b23d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -22
app.py CHANGED
@@ -421,7 +421,7 @@ def handsome_chat_completions():
421
  return jsonify({"error": "Invalid request data"}), 400
422
 
423
  model_name = data['model']
424
-
425
  api_key = select_key(model_name)
426
 
427
  if not api_key:
@@ -434,7 +434,8 @@ def handsome_chat_completions():
434
  )
435
  }
436
  ), 429
437
-
 
438
  if model_name == "deepseek-reasoner":
439
  for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
440
  if param in data:
@@ -444,7 +445,7 @@ def handsome_chat_completions():
444
  "Authorization": f"Bearer {api_key}",
445
  "Content-Type": "application/json"
446
  }
447
-
448
  try:
449
  start_time = time.time()
450
  response = requests.post(
@@ -495,12 +496,17 @@ def handsome_chat_completions():
495
  "usage"
496
  ]["completion_tokens"]
497
 
498
- if "choices" in response_json and len(response_json["choices"]) > 0:
 
499
  delta = response_json["choices"][0].get("delta", {})
500
  if "reasoning_content" in delta and delta["reasoning_content"]:
501
  reasoning_lines = delta["reasoning_content"].splitlines()
502
  formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
503
- response_content += formatted_reasoning + "\n" # Add a newline after reasoning
 
 
 
 
504
  if "content" in delta and delta["content"]:
505
  response_content += delta["content"]
506
 
@@ -561,7 +567,7 @@ def handsome_chat_completions():
561
 
562
  with data_lock:
563
  request_timestamps.append(time.time())
564
- token_counts.append(prompt_tokens+completion_tokens)
565
 
566
  return Response(
567
  stream_with_context(generate()),
@@ -575,18 +581,22 @@ def handsome_chat_completions():
575
 
576
  try:
577
  prompt_tokens = response_json["usage"]["prompt_tokens"]
578
- completion_tokens = response_json[
579
- "usage"
580
- ]["completion_tokens"]
581
  response_content = ""
582
- if "choices" in response_json and len(response_json["choices"]) > 0:
 
 
583
  choice = response_json["choices"][0]
584
- if "reasoning_content" in choice:
585
- reasoning_lines = choice["reasoning_content"].splitlines()
586
- formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
587
- response_content += formatted_reasoning + "\n"
588
- if "message" in choice and "content" in choice["message"]:
589
- response_content += choice["message"]["content"]
 
 
 
 
590
  except (KeyError, ValueError, IndexError) as e:
591
  logging.error(
592
  f"解析非流式响应 JSON 失败: {e}, "
@@ -609,7 +619,8 @@ def handsome_chat_completions():
609
  item.get("type") == "text"
610
  ):
611
  user_content += (
612
- item.get("text", "") + " "
 
613
  )
614
 
615
  user_content = user_content.strip()
@@ -633,11 +644,8 @@ def handsome_chat_completions():
633
  )
634
  with data_lock:
635
  request_timestamps.append(time.time())
636
- if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
637
- token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
638
- else:
639
- token_counts.append(0)
640
-
641
  # Reformat the response to standard OpenAI format for non-streaming responses
642
  formatted_response = {
643
  "id": response_json.get("id", ""),
 
421
  return jsonify({"error": "Invalid request data"}), 400
422
 
423
  model_name = data['model']
424
+
425
  api_key = select_key(model_name)
426
 
427
  if not api_key:
 
434
  )
435
  }
436
  ), 429
437
+
438
+ # Special handling for deepseek-reasoner
439
  if model_name == "deepseek-reasoner":
440
  for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
441
  if param in data:
 
445
  "Authorization": f"Bearer {api_key}",
446
  "Content-Type": "application/json"
447
  }
448
+
449
  try:
450
  start_time = time.time()
451
  response = requests.post(
 
496
  "usage"
497
  ]["completion_tokens"]
498
 
499
+ # Special handling for deepseek-reasoner in streaming mode
500
+ if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
501
  delta = response_json["choices"][0].get("delta", {})
502
  if "reasoning_content" in delta and delta["reasoning_content"]:
503
  reasoning_lines = delta["reasoning_content"].splitlines()
504
  formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
505
+ response_content += formatted_reasoning + "\n"
506
+ if "content" in delta and delta["content"]:
507
+ response_content += delta["content"]
508
+ elif "choices" in response_json and len(response_json["choices"]) > 0:
509
+ delta = response_json["choices"][0].get("delta", {})
510
  if "content" in delta and delta["content"]:
511
  response_content += delta["content"]
512
 
 
567
 
568
  with data_lock:
569
  request_timestamps.append(time.time())
570
+ token_counts.append(prompt_tokens + completion_tokens)
571
 
572
  return Response(
573
  stream_with_context(generate()),
 
581
 
582
  try:
583
  prompt_tokens = response_json["usage"]["prompt_tokens"]
584
+ completion_tokens = response_json["usage"]["completion_tokens"]
 
 
585
  response_content = ""
586
+
587
+ # Special handling for deepseek-reasoner in non-streaming mode
588
+ if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
589
  choice = response_json["choices"][0]
590
+ if "message" in choice:
591
+ if "reasoning_content" in choice["message"]:
592
+ reasoning_lines = choice["message"]["reasoning_content"].splitlines()
593
+ formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
594
+ response_content += formatted_reasoning + "\n"
595
+ if "content" in choice["message"]:
596
+ response_content += choice["message"]["content"]
597
+ elif "choices" in response_json and len(response_json["choices"]) > 0:
598
+ response_content = response_json["choices"][0]["message"]["content"]
599
+
600
  except (KeyError, ValueError, IndexError) as e:
601
  logging.error(
602
  f"解析非流式响应 JSON 失败: {e}, "
 
619
  item.get("type") == "text"
620
  ):
621
  user_content += (
622
+ item.get("text", "") +
623
+ " "
624
  )
625
 
626
  user_content = user_content.strip()
 
644
  )
645
  with data_lock:
646
  request_timestamps.append(time.time())
647
+ token_counts.append(prompt_tokens + completion_tokens)
648
+
 
 
 
649
  # Reformat the response to standard OpenAI format for non-streaming responses
650
  formatted_response = {
651
  "id": response_json.get("id", ""),