ds

Sleeping

App Files Files Community

yangtb24 commited on Jan 21

Commit

90cb193

verified ·

1 Parent(s): b413541

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -104

app.py CHANGED Viewed

@@ -3,20 +3,13 @@ import time
 import logging
 import requests
 import json
-import random
 import uuid
 import concurrent.futures
 import threading
-import base64
-import io
-from PIL import Image
 from datetime import datetime, timedelta
 from apscheduler.schedulers.background import BackgroundScheduler
 from flask import Flask, request, jsonify, Response, stream_with_context
-os.environ['TZ'] = 'Asia/Shanghai'
-time.tzset()
 logging.basicConfig(level=logging.INFO,
                     format='%(asctime)s - %(levelname)s - %(message)s')
@@ -39,9 +32,6 @@ token_counts = []
 data_lock = threading.Lock()
 def get_credit_summary(api_key):
-    """
-    使用 API 密钥获取额度信息，并将美元余额转换为人民币。
-    """
     headers = {
         "Authorization": f"Bearer {api_key}",
         "Content-Type": "application/json"
@@ -53,7 +43,7 @@ def get_credit_summary(api_key):
         if not data.get("is_available", False):
             logging.warning(f"API Key: {api_key} is not available.")
             return None
         balance_infos = data.get("balance_infos", [])
         total_balance_cny = 0.0
         usd_balance = 0.0
@@ -87,10 +77,6 @@ def get_credit_summary(api_key):
          return None
 def get_usd_to_cny_rate():
-    """
-    获取美元兑人民币的汇率。
-    这里使用一个公共的汇率 API，你可以替换成你自己的。
-    """
     try:
         response = requests.get("https://api.exchangerate-api.com/v4/latest/USD")
         response.raise_for_status()
@@ -105,12 +91,6 @@ def refresh_models():
     logging.info(f"所有文本模型列表：{text_models}")
 def load_keys():
-    """
-    从环境变量中加载 keys，进行去重，
-    并根据额度和模型可用性进行分类，
-    然后记录到日志中。
-    使用线程池并发处理每个 key。
-    """
     keys_str = os.environ.get("KEYS")
     keys = [key.strip() for key in keys_str.split(',')]
     unique_keys = list(set(keys))
@@ -152,9 +132,6 @@ def load_keys():
     valid_keys_global = valid_keys
 def process_key(key):
-    """
-    处理单个 key，判断其类型。
-    """
     credit_summary = get_credit_summary(key)
     if credit_summary is None:
         return "invalid"
@@ -166,10 +143,6 @@ def process_key(key):
             return "valid"
 def select_key(model_name):
-    """
-    根据请求类型和模型名称选择合适的 KEY，
-    并实现轮询和重试机制。
-    """
     available_keys = valid_keys_global
     current_index = model_key_indices.get(model_name, 0)
@@ -184,10 +157,6 @@ def select_key(model_name):
     return None
 def check_authorization(request):
-    """
-    检查请求头中的 Authorization 字段
-    是否匹配环境变量 AUTHORIZATION_KEY。
-    """
     authorization_key = os.environ.get("AUTHORIZATION_KEY")
     if not authorization_key:
         logging.warning("环境变量 AUTHORIZATION_KEY 未设置，请设置后重试。")
@@ -198,9 +167,9 @@ def check_authorization(request):
         logging.warning("请求头中缺少 Authorization 字段。")
         return False
-    if auth_header != f"Bearer {authorization_key}":
-        logging.warning(f"无效的 Authorization 密钥：{auth_header}")
-        return False
     return True
@@ -270,12 +239,12 @@ def check_tokens():
                 )
     return jsonify(results)
 @app.route('/handsome/v1/models', methods=['GET'])
 def list_models():
     if not check_authorization(request):
         return jsonify({"error": "Unauthorized"}), 401
     detailed_models = [
         {
             "id": "deepseek-chat",
@@ -379,38 +348,6 @@ def billing_usage():
         "total_usage": 0
     })
-@app.route('/handsome/v1/dashboard/billing/subscription', methods=['GET'])
-def billing_subscription():
-    if not check_authorization(request):
-        return jsonify({"error": "Unauthorized"}), 401
-    total_balance = get_billing_info()
-    return jsonify({
-        "object": "billing_subscription",
-        "has_payment_method": False,
-        "canceled": False,
-        "canceled_at": None,
-        "delinquent": None,
-        "access_until": int(datetime(9999, 12, 31).timestamp()),
-        "soft_limit": 0,
-        "hard_limit": total_balance,
-        "system_hard_limit": total_balance,
-        "soft_limit_usd": 0,
-        "hard_limit_usd": total_balance,
-        "system_hard_limit_usd": total_balance,
-        "plan": {
-            "name": "SiliconFlow API",
-            "id": "siliconflow-api"
-        },
-        "account_name": "SiliconFlow User",
-        "po_number": None,
-        "billing_email": None,
-        "tax_ids": [],
-        "billing_address": None,
-        "business_address": None
-    })
 @app.route('/handsome/v1/chat/completions', methods=['POST'])
 def handsome_chat_completions():
     if not check_authorization(request):
@@ -421,7 +358,7 @@ def handsome_chat_completions():
         return jsonify({"error": "Invalid request data"}), 400
     model_name = data['model']
     api_key = select_key(model_name)
     if not api_key:
@@ -434,7 +371,7 @@ def handsome_chat_completions():
                 )
             }
         ), 429
     if model_name == "deepseek-reasoner":
         for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
             if param in data:
@@ -444,7 +381,7 @@ def handsome_chat_completions():
         "Authorization": f"Bearer {api_key}",
         "Content-Type": "application/json"
     }
     try:
         start_time = time.time()
         response = requests.post(
@@ -462,12 +399,40 @@ def handsome_chat_completions():
             def generate():
                 first_chunk_time = None
                 full_response_content = ""
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
-                        yield chunk
                 end_time = time.time()
                 first_token_time = (
@@ -478,7 +443,6 @@ def handsome_chat_completions():
                 prompt_tokens = 0
                 completion_tokens = 0
-                response_content = ""
                 for line in full_response_content.splitlines():
                     if line.startswith("data:"):
                         line = line[5:].strip()
@@ -491,22 +455,9 @@ def handsome_chat_completions():
                                 "usage" in response_json and
                                 "completion_tokens" in response_json["usage"]
                             ):
-                                completion_tokens = response_json[
                                     "usage"
                                 ]["completion_tokens"]
-                            if (
-                                "choices" in response_json and
-                                len(response_json["choices"]) > 0 and
-                                "delta" in response_json["choices"][0] and
-                                "content" in response_json[
-                                    "choices"
-                                ][0]["delta"]
-                            ):
-                                response_content += response_json[
-                                    "choices"
-                                ][0]["delta"]["content"]
                             if (
                                 "usage" in response_json and
                                 "prompt_tokens" in response_json["usage"]
@@ -547,7 +498,8 @@ def handsome_chat_completions():
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
-                response_content_replaced = response_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
@@ -564,13 +516,16 @@ def handsome_chat_completions():
                 with data_lock:
                     request_timestamps.append(time.time())
-                    token_counts.append(prompt_tokens+completion_tokens)
             return Response(
                 stream_with_context(generate()),
-                content_type=response.headers['Content-Type']
             )
         else:
             response.raise_for_status()
             end_time = time.time()
             response_json = response.json()
@@ -578,12 +533,21 @@ def handsome_chat_completions():
             try:
                 prompt_tokens = response_json["usage"]["prompt_tokens"]
-                completion_tokens = response_json[
-                    "usage"
-                ]["completion_tokens"]
-                response_content = response_json[
-                    "choices"
-                ][0]["message"]["content"]
             except (KeyError, ValueError, IndexError) as e:
                 logging.error(
                     f"解析非流式响应 JSON 失败: {e}, "
@@ -606,7 +570,8 @@ def handsome_chat_completions():
                                 item.get("type") == "text"
                             ):
                                 user_content += (
-                                    item.get("text", "") + " "
                                 )
             user_content = user_content.strip()
@@ -630,12 +595,32 @@ def handsome_chat_completions():
             )
             with data_lock:
                 request_timestamps.append(time.time())
-                if "prompt_tokens" in response_json["usage"] and "completion_tokens" in response_json["usage"]:
-                    token_counts.append(response_json["usage"]["prompt_tokens"] + response_json["usage"]["completion_tokens"])
-                else:
-                    token_counts.append(0)
-            return jsonify(response_json)
     except requests.exceptions.RequestException as e:
         logging.error(f"请求转发异常: {e}")
@@ -661,4 +646,4 @@ if __name__ == '__main__':
         debug=False,
         host='0.0.0.0',
         port=int(os.environ.get('PORT', 7860))
-    )

 import logging
 import requests
 import json
 import uuid
 import concurrent.futures
 import threading
 from datetime import datetime, timedelta
 from apscheduler.schedulers.background import BackgroundScheduler
 from flask import Flask, request, jsonify, Response, stream_with_context
 logging.basicConfig(level=logging.INFO,
                     format='%(asctime)s - %(levelname)s - %(message)s')
 data_lock = threading.Lock()
 def get_credit_summary(api_key):
     headers = {
         "Authorization": f"Bearer {api_key}",
         "Content-Type": "application/json"
         if not data.get("is_available", False):
             logging.warning(f"API Key: {api_key} is not available.")
             return None
         balance_infos = data.get("balance_infos", [])
         total_balance_cny = 0.0
         usd_balance = 0.0
          return None
 def get_usd_to_cny_rate():
     try:
         response = requests.get("https://api.exchangerate-api.com/v4/latest/USD")
         response.raise_for_status()
     logging.info(f"所有文本模型列表：{text_models}")
 def load_keys():
     keys_str = os.environ.get("KEYS")
     keys = [key.strip() for key in keys_str.split(',')]
     unique_keys = list(set(keys))
     valid_keys_global = valid_keys
 def process_key(key):
     credit_summary = get_credit_summary(key)
     if credit_summary is None:
         return "invalid"
             return "valid"
 def select_key(model_name):
     available_keys = valid_keys_global
     current_index = model_key_indices.get(model_name, 0)
     return None
 def check_authorization(request):
     authorization_key = os.environ.get("AUTHORIZATION_KEY")
     if not authorization_key:
         logging.warning("环境变量 AUTHORIZATION_KEY 未设置，请设置后重试。")
         logging.warning("请求头中缺少 Authorization 字段。")
         return False
+    # if auth_header != f"Bearer {authorization_key}":
+    #     logging.warning(f"无效的 Authorization 密钥：{auth_header}")
+    #     return False
     return True
                 )
     return jsonify(results)
 @app.route('/handsome/v1/models', methods=['GET'])
 def list_models():
     if not check_authorization(request):
         return jsonify({"error": "Unauthorized"}), 401
     detailed_models = [
         {
             "id": "deepseek-chat",
         "total_usage": 0
     })
 @app.route('/handsome/v1/chat/completions', methods=['POST'])
 def handsome_chat_completions():
     if not check_authorization(request):
         return jsonify({"error": "Invalid request data"}), 400
     model_name = data['model']
     api_key = select_key(model_name)
     if not api_key:
                 )
             }
         ), 429
     if model_name == "deepseek-reasoner":
         for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"]:
             if param in data:
         "Authorization": f"Bearer {api_key}",
         "Content-Type": "application/json"
     }
     try:
         start_time = time.time()
         response = requests.post(
             def generate():
                 first_chunk_time = None
                 full_response_content = ""
+                reasoning_content_accumulated = ""
+                content_accumulated = ""
+                first_reasoning_chunk = True
                 for chunk in response.iter_content(chunk_size=1024):
                     if chunk:
                         if first_chunk_time is None:
                             first_chunk_time = time.time()
                         full_response_content += chunk.decode("utf-8")
+                        for line in chunk.decode("utf-8").splitlines():
+                            if line.startswith("data:"):
+                                try:
+                                    chunk_json = json.loads(line.lstrip("data: ").strip())
+                                    if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
+                                        delta = chunk_json["choices"][0].get("delta", {})
+                                        if delta.get("reasoning_content") is not None:
+                                            reasoning_chunk = delta["reasoning_content"]
+                                            if first_reasoning_chunk:
+                                                reasoning_chunk = "```Thinking\n" + reasoning_chunk
+                                                first_reasoning_chunk = False
+                                            yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
+                                        if delta.get("content") is not None:
+                                            if not first_reasoning_chunk:
+                                                yield f"data: {json.dumps({'choices': [{'delta': {'content': '\n```'}, 'index': 0}]})}\n\n"
+                                                first_reasoning_chunk = True
+                                            yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
+                                except (KeyError, ValueError, json.JSONDecodeError) as e:
+                                    logging.error(f"解析流式响应单行 JSON 失败: {e}, 行内容: {line}")
+                                    continue
                 end_time = time.time()
                 first_token_time = (
                 prompt_tokens = 0
                 completion_tokens = 0
                 for line in full_response_content.splitlines():
                     if line.startswith("data:"):
                         line = line[5:].strip()
                                 "usage" in response_json and
                                 "completion_tokens" in response_json["usage"]
                             ):
+                                completion_tokens += response_json[
                                     "usage"
                                 ]["completion_tokens"]
                             if (
                                 "usage" in response_json and
                                 "prompt_tokens" in response_json["usage"]
                 user_content_replaced = user_content.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
+                response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
+                response_content_replaced = response_content_replaced.replace(
                     '\n', '\\n'
                 ).replace('\r', '\\n')
                 with data_lock:
                     request_timestamps.append(time.time())
+                    token_counts.append(prompt_tokens + completion_tokens)
+                yield "data: [DONE]\n\n"
             return Response(
                 stream_with_context(generate()),
+                content_type="text/event-stream"
             )
         else:
+            # ... (Non-streaming part remains the same as in the previous response)
             response.raise_for_status()
             end_time = time.time()
             response_json = response.json()
             try:
                 prompt_tokens = response_json["usage"]["prompt_tokens"]
+                completion_tokens = response_json["usage"]["completion_tokens"]
+                response_content = ""
+                # Special handling for deepseek-reasoner in non-streaming mode
+                if model_name == "deepseek-reasoner" and "choices" in response_json and len(response_json["choices"]) > 0:
+                    choice = response_json["choices"][0]
+                    if "message" in choice:
+                        if "reasoning_content" in choice["message"]:
+                            formatted_reasoning = f"```Thinking\n{choice['message']['reasoning_content']}\n```"
+                            response_content += formatted_reasoning + "\n"
+                        if "content" in choice["message"]:
+                            response_content += choice["message"]["content"]
+                elif "choices" in response_json and len(response_json["choices"]) > 0:
+                    response_content = response_json["choices"][0]["message"]["content"]
             except (KeyError, ValueError, IndexError) as e:
                 logging.error(
                     f"解析非流式响应 JSON 失败: {e}, "
                                 item.get("type") == "text"
                             ):
                                 user_content += (
+                                    item.get("text", "") +
+                                    " "
                                 )
             user_content = user_content.strip()
             )
             with data_lock:
                 request_timestamps.append(time.time())
+                token_counts.append(prompt_tokens + completion_tokens)
+            # Reformat the response to standard OpenAI format for non-streaming responses
+            formatted_response = {
+                "id": response_json.get("id", ""),
+                "object": "chat.completion",
+                "created": response_json.get("created", int(time.time())),
+                "model": model_name,
+                "choices": [
+                    {
+                        "index": 0,
+                        "message": {
+                            "role": "assistant",
+                            "content": response_content
+                        },
+                        "finish_reason": "stop"
+                    }
+                ],
+                "usage": {
+                    "prompt_tokens": prompt_tokens,
+                    "completion_tokens": completion_tokens,
+                    "total_tokens": prompt_tokens + completion_tokens
+                }
+            }
+            return jsonify(formatted_response)
     except requests.exceptions.RequestException as e:
         logging.error(f"请求转发异常: {e}")
         debug=False,
         host='0.0.0.0',
         port=int(os.environ.get('PORT', 7860))
+    )