ds

Sleeping

App Files Files Community

yangtb24 commited on Jan 20

Commit

8d0c64e

verified ·

1 Parent(s): 529b23d

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -22

app.py CHANGED Viewed

@@ -421,7 +421,7 @@ def handsome_chat_completions():
         return jsonify({"error": "Invalid request data"}), 400
     model_name = data['model']
     api_key = select_key(model_name)
     if not api_key:
@@ -434,7 +434,8 @@ def handsome_chat_completions():
                 )
             }
         ), 429
     if model_name == "deepseek-reasoner":
         for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
             if param in data:
@@ -444,7 +445,7 @@ def handsome_chat_completions():
         "Authorization": f"Bearer {api_key}",
         "Content-Type": "application/json"
     }
     try:
         start_time = time.time()
         response = requests.post(
@@ -495,12 +496,17 @@ def handsome_chat_completions():
                                     "usage"
                                 ]["completion_tokens"]
-                            if "choices" in response_json and len(response_json["choices"]) > 0:
                                 delta = response_json["choices"][0].get("delta", {})
                                 if "reasoning_content" in delta and delta["reasoning_content"]:
                                     reasoning_lines = delta["reasoning_content"].splitlines()
                                     formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
-                                    response_content += formatted_reasoning + "\n"  # Add a newline after reasoning
                                 if "content" in delta and delta["content"]:
                                     response_content += delta["content"]
@@ -561,7 +567,7 @@ def handsome_chat_completions():
                 with data_lock:
                     request_timestamps.append(time.time())
-                    token_counts.append(prompt_tokens+completion_tokens)
             return Response(
                 stream_with_context(generate()),
@@ -575,18 +581,22 @@ def handsome_chat_completions():
             try:
                 prompt_tokens = response_json["usage"]["prompt_tokens"]
-                completion_tokens = response_json[
-                    "usage"
-                ]["completion_tokens"]
                 response_content = ""
-                if "choices" in response_json and len(response_json["choices"]) > 0:
                     choice = response_json["choices"][0]
-                    if "reasoning_content" in choice:
-                        reasoning_lines = choice["reasoning_content"].splitlines()
-                        formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
-                        response_content += formatted_reasoning + "\n"
-                    if "message" in choice and "content" in choice["message"]:
-                        response_content += choice["message"]["content"]
             except (KeyError, ValueError, IndexError) as e:
                 logging.error(
                     f"解析非流式响应 JSON 失败: {e}, "
@@ -609,7 +619,8 @@ def handsome_chat_completions():
                                 item.get("type") == "text"
                             ):
                                 user_content += (
-                                    item.get("text", "") + " "
                                 )
             user_content = user_content.strip()
@@ -633,11 +644,8 @@ def handsome_chat_completions():
             )
             with data_lock:
                 request_timestamps.append(time.time())
-                if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
-                    token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
-                else:
-                    token_counts.append(0)
             # Reformat the response to standard OpenAI format for non-streaming responses
             formatted_response = {
                 "id": response_json.get("id", ""),

         return jsonify({"error": "Invalid request data"}), 400
     model_name = data['model']
     api_key = select_key(model_name)
     if not api_key:
                 )
             }
         ), 429
+    # Special handling for deepseek-reasoner
     if model_name == "deepseek-reasoner":
         for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
             if param in data:
         "Authorization": f"Bearer {api_key}",
         "Content-Type": "application/json"
     }
     try:
         start_time = time.time()
         response = requests.post(
                                     "usage"
                                 ]["completion_tokens"]
+                            # Special handling for deepseek-reasoner in streaming mode
+                            if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
                                 delta = response_json["choices"][0].get("delta", {})
                                 if "reasoning_content" in delta and delta["reasoning_content"]:
                                     reasoning_lines = delta["reasoning_content"].splitlines()
                                     formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
+                                    response_content += formatted_reasoning + "\n"
+                                if "content" in delta and delta["content"]:
+                                    response_content += delta["content"]
+                            elif "choices" in response_json and len(response_json["choices"]) > 0:
+                                delta = response_json["choices"][0].get("delta", {})
                                 if "content" in delta and delta["content"]:
                                     response_content += delta["content"]
                 with data_lock:
                     request_timestamps.append(time.time())
+                    token_counts.append(prompt_tokens + completion_tokens)
             return Response(
                 stream_with_context(generate()),
             try:
                 prompt_tokens = response_json["usage"]["prompt_tokens"]
+                completion_tokens = response_json["usage"]["completion_tokens"]
                 response_content = ""
+                # Special handling for deepseek-reasoner in non-streaming mode
+                if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
                     choice = response_json["choices"][0]
+                    if "message" in choice:
+                        if "reasoning_content" in choice["message"]:
+                            reasoning_lines = choice["message"]["reasoning_content"].splitlines()
+                            formatted_reasoning = "\n".join(f"> {line}" for line in reasoning_lines)
+                            response_content += formatted_reasoning + "\n"
+                        if "content" in choice["message"]:
+                            response_content += choice["message"]["content"]
+                elif "choices" in response_json and len(response_json["choices"]) > 0:
+                    response_content = response_json["choices"][0]["message"]["content"]
             except (KeyError, ValueError, IndexError) as e:
                 logging.error(
                     f"解析非流式响应 JSON 失败: {e}, "
                                 item.get("type") == "text"
                             ):
                                 user_content += (
+                                    item.get("text", "") +
+                                    " "
                                 )
             user_content = user_content.strip()
             )
             with data_lock:
                 request_timestamps.append(time.time())
+                token_counts.append(prompt_tokens + completion_tokens)
             # Reformat the response to standard OpenAI format for non-streaming responses
             formatted_response = {
                 "id": response_json.get("id", ""),