gccnb commited on
Commit
8258a03
·
verified ·
1 Parent(s): a51b2df

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -925
app.py DELETED
@@ -1,925 +0,0 @@
1
- #!/usr/bin/env python3
2
- import json
3
- import base64
4
- import time
5
- import logging
6
- from curl_cffi import requests
7
- import random
8
- from flask import Flask, render_template, request, Response, stream_with_context, jsonify, g
9
- import os
10
- import struct
11
- import ctypes
12
- from wasmtime import Store, Module, Linker
13
- import re
14
- import transformers
15
- import queue
16
- import threading
17
-
18
- # -------------------------- 初始化 tokenizer --------------------------
19
- chat_tokenizer_dir = "THUDM/chatglm2-6b" # 使用现成的模型tokenizer
20
- tokenizer = transformers.AutoTokenizer.from_pretrained(
21
- chat_tokenizer_dir,
22
- trust_remote_code=True,
23
- use_fast=False # 使用慢速tokenizer避免fast tokenizer的转换问题
24
- )
25
-
26
- # ----------------------------------------------------------------------
27
- # =========================== 日志配置 ===========================
28
- logging.basicConfig(
29
- level=logging.INFO,
30
- format='%(asctime)s [%(levelname)s] %(name)s: %(message)s'
31
- )
32
- app = Flask(__name__)
33
-
34
- # -------------------- 全局添加 CORS 支持 --------------------
35
- @app.before_request
36
- def handle_options_request():
37
- if request.method == 'OPTIONS':
38
- response = Response()
39
- response.headers["Access-Control-Allow-Origin"] = "*"
40
- response.headers["Access-Control-Allow-Headers"] = "Content-Type, Authorization"
41
- response.headers["Access-Control-Allow-Methods"] = "GET, POST, OPTIONS, PUT, DELETE"
42
- return response
43
-
44
- @app.after_request
45
- def add_cors_headers(response):
46
- response.headers["Access-Control-Allow-Origin"] = "*"
47
- response.headers["Access-Control-Allow-Headers"] = "Content-Type, Authorization"
48
- response.headers["Access-Control-Allow-Methods"] = "GET, POST, OPTIONS"
49
- return response
50
-
51
- # ----------------------------------------------------------------------
52
- # 全局集合:记录当前正在对话中的账号(以 email 或 phone 标识),保证同一账号同时只进行一个对话
53
- active_accounts = set()
54
-
55
- # ----------------------------------------------------------------------
56
- # (1) 配置文件的读写函数
57
- # ----------------------------------------------------------------------
58
- CONFIG_PATH = "config.json"
59
-
60
- def load_config():
61
- """从环境变量加载配置"""
62
- config = {
63
- "keys": [],
64
- "accounts": []
65
- }
66
-
67
- # 从环境变量读取API keys
68
- api_keys = os.getenv("DEEPSEEK_API_KEYS", "").strip()
69
- if api_keys:
70
- config["keys"] = [k.strip() for k in api_keys.split(",") if k.strip()]
71
-
72
- # 从环境变量读取账号信息
73
- # 格式:
74
- # - 使用email登录: email:password:token(可选)
75
- # - 使用mobile登录: mobile:password:token(可选)
76
- accounts_str = os.getenv("DEEPSEEK_ACCOUNTS", "").strip()
77
- if accounts_str:
78
- for acc in accounts_str.split(","):
79
- parts = [p.strip() for p in acc.split(":") if p.strip()]
80
- if len(parts) >= 2: # 至少需要账号和密码
81
- account = {}
82
- # 根据第一个参数是否包含@判断是email还是mobile
83
- if "@" in parts[0]:
84
- account["email"] = parts[0]
85
- else:
86
- account["mobile"] = parts[0]
87
- account["password"] = parts[1]
88
- # 如果有第三个参数,则为token
89
- if len(parts) > 2:
90
- account["token"] = parts[2]
91
- config["accounts"].append(account)
92
-
93
- return config
94
-
95
- def save_config(cfg):
96
- """
97
- 由于使用环境变量,此函数仅更新内存中的CONFIG
98
- token更新后需要手动同步到环境变量中
99
- """
100
- global CONFIG
101
- CONFIG = cfg
102
- # 可选:打印提示信息
103
- app.logger.info("[save_config] 配置已更新(仅内存)")
104
-
105
- CONFIG = load_config()
106
-
107
- # ----------------------------------------------------------------------
108
- # (2) DeepSeek 相关常量
109
- # ----------------------------------------------------------------------
110
- DEEPSEEK_HOST = "chat.deepseek.com"
111
-
112
- DEEPSEEK_LOGIN_URL = f"https://{DEEPSEEK_HOST}/api/v0/users/login"
113
- DEEPSEEK_CREATE_SESSION_URL = f"https://{DEEPSEEK_HOST}/api/v0/chat_session/create"
114
- DEEPSEEK_CREATE_POW_URL = f"https://{DEEPSEEK_HOST}/api/v0/chat/create_pow_challenge"
115
- DEEPSEEK_COMPLETION_URL = f"https://{DEEPSEEK_HOST}/api/v0/chat/completion"
116
-
117
- BASE_HEADERS = {
118
- 'Host': "chat.deepseek.com",
119
- 'User-Agent': "DeepSeek/1.0.13 Android/35",
120
- 'Accept': "application/json",
121
- 'Accept-Encoding': "gzip",
122
- 'Content-Type': "application/json",
123
- 'x-client-platform': "android",
124
- 'x-client-version': "1.0.13",
125
- 'x-client-locale': "zh_CN",
126
- 'accept-charset': "UTF-8",
127
- }
128
-
129
- # WASM 模块文件路径(请确保文件存在)
130
- WASM_PATH = "sha3_wasm_bg.7b9ca65ddd.wasm"
131
-
132
- # ----------------------------------------------------------------------
133
- # 辅助函数:获取账号唯一标识(优先 email,否则 mobile)
134
- # ----------------------------------------------------------------------
135
- def get_account_identifier(account):
136
- """返回账号的唯一标识,优先使用 email,否则使用 mobile"""
137
- return account.get("email", "").strip() or account.get("mobile", "").strip()
138
-
139
- # ----------------------------------------------------------------------
140
- # (3) 登录函数:支持使用 email 或 mobile 登录
141
- # ----------------------------------------------------------------------
142
- def login_deepseek_via_account(account):
143
- """使用 account 中的 email 或 mobile 登录 DeepSeek,
144
- 成功后将返回的 token 写入 account 并保存至配置文件,返回新 token。"""
145
- email = account.get("email", "").strip()
146
- mobile = account.get("mobile", "").strip()
147
- password = account.get("password", "").strip()
148
- if not password or (not email and not mobile):
149
- raise ValueError("账号缺少必要的登录信息(必须提供 email 或 mobile 以及 password)")
150
-
151
- if email:
152
- app.logger.info(f"[login_deepseek_via_account] 正在使用 email 登录账号:{email}")
153
- payload = {
154
- "email": email,
155
- "mobile": "",
156
- "password": password,
157
- "area_code": "",
158
- "device_id": "deepseek_to_api",
159
- "os": "android"
160
- }
161
- else:
162
- app.logger.info(f"[login_deepseek_via_account] 正在使用 mobile 登录账号:{mobile}")
163
- payload = {
164
- "mobile": mobile,
165
- "area_code": None,
166
- "password": password,
167
- "device_id": "deepseek_to_api",
168
- "os": "android"
169
- }
170
-
171
- # 增加 timeout 参数,防止请求阻塞过久
172
- resp = requests.post(DEEPSEEK_LOGIN_URL, headers=BASE_HEADERS, json=payload, timeout=30)
173
- app.logger.debug(f"[login_deepseek_via_account] 状态码: {resp.status_code}")
174
- app.logger.debug(f"[login_deepseek_via_account] 响应体: {resp.text}")
175
- resp.raise_for_status()
176
- data = resp.json()
177
- if data.get("code") != 0:
178
- raise ValueError(f"登录失败, code={data.get('code')}, msg={data.get('msg')}")
179
-
180
- new_token = data["data"]["biz_data"]["user"]["token"]
181
- account["token"] = new_token
182
- save_config(CONFIG)
183
- identifier = email if email else mobile
184
- app.logger.info(f"[login_deepseek_via_account] 成功登录账号 {identifier},token: {new_token}")
185
- return new_token
186
-
187
- # ----------------------------------------------------------------------
188
- # -------------------------- 全局账号队列 --------------------------
189
- account_queue = [] # 维护所有可用账号
190
-
191
- def init_account_queue():
192
- """初始化时从配置加载账号"""
193
- global account_queue
194
- account_queue = CONFIG.get("accounts", [])[:] # 深拷贝
195
- random.shuffle(account_queue) # 初始随机排序
196
-
197
- init_account_queue()
198
-
199
- def choose_new_account():
200
- """选择策略:
201
- 1. 遍历队列,找到第一个未被 exclude_ids 包含的账号
202
- 2. 从队列中移除该账号
203
- 3. 返回该账号(由后续逻辑保证最终会重新入队)
204
- """
205
- for i in range(len(account_queue)):
206
- acc = account_queue[i]
207
- acc_id = get_account_identifier(acc)
208
- if acc_id:
209
- # 从队列中移除并返回
210
- return account_queue.pop(i)
211
- app.logger.warning("[choose_new_account] 没有可用的账号或所有账号都在使用中")
212
- return None
213
-
214
- def release_account(account):
215
- """将账号重新加入队列末尾"""
216
- account_queue.append(account)
217
-
218
- # ----------------------------------------------------------------------
219
- # (5) 判断调用模式:配置模式 vs 用户自带 token
220
- # ----------------------------------------------------------------------
221
- def determine_mode_and_token():
222
- """根据请求头 Authorization 判断使用哪种模式:
223
- - 如果 Bearer token 出现在 CONFIG["keys"] 中,则为配置模式,从 CONFIG["accounts"] 中随机选择一个账号(排除已尝试账号),
224
- 检查该账号是否已有 token,否则调用登录接口获取;
225
- - 否则,直接使用请求中的 Bearer 值作为 DeepSeek token。
226
- 结果存入 g.deepseek_token;配置模式下同时存入 g.account 与 g.tried_accounts。
227
- """
228
- auth_header = request.headers.get("Authorization", "")
229
- if not auth_header.startswith("Bearer "):
230
- return Response(json.dumps({"error": "Unauthorized: missing Bearer token."}),
231
- status=401, mimetype="application/json")
232
- caller_key = auth_header.replace("Bearer ", "", 1).strip()
233
- config_keys = CONFIG.get("keys", [])
234
- if caller_key in config_keys:
235
- g.use_config_token = True
236
- g.tried_accounts = [] # 初始化已尝试账号
237
- selected_account = choose_new_account()
238
- if not selected_account:
239
- return Response(json.dumps({"error": "No accounts configured or all accounts are busy."}),
240
- status=429, mimetype="application/json")
241
- if not selected_account.get("token", "").strip():
242
- try:
243
- login_deepseek_via_account(selected_account)
244
- except Exception as e:
245
- app.logger.error(f"[determine_mode_and_token] 账号 {get_account_identifier(selected_account)} 登录失败:{e}")
246
- return Response(json.dumps({"error": "Account login failed."}),
247
- status=500, mimetype="application/json")
248
- g.deepseek_token = selected_account.get("token")
249
- g.account = selected_account
250
- else:
251
- g.use_config_token = False
252
- g.deepseek_token = caller_key
253
- return None
254
-
255
- def get_auth_headers():
256
- """返回 DeepSeek 请求所需的公共请求头"""
257
- return { **BASE_HEADERS, "authorization": f"Bearer {g.deepseek_token}" }
258
-
259
- # ----------------------------------------------------------------------
260
- # (6) 封装对话接口调用的重试机制
261
- # ----------------------------------------------------------------------
262
- def call_completion_endpoint(payload, headers, stream, max_attempts=3):
263
- attempts = 0
264
- while attempts < max_attempts:
265
- try:
266
- deepseek_resp = requests.post(DEEPSEEK_COMPLETION_URL, headers=headers, json=payload, stream=stream)
267
- except Exception as e:
268
- app.logger.warning(f"[call_completion_endpoint] 请求异常: {e}")
269
- time.sleep(1)
270
- attempts += 1
271
- continue
272
- if deepseek_resp.status_code == 200:
273
- return deepseek_resp
274
- else:
275
- app.logger.warning(f"[call_completion_endpoint] 调用对话接口失败, 状态码: {deepseek_resp.status_code}")
276
- deepseek_resp.close()
277
- time.sleep(1)
278
- attempts += 1
279
- return None
280
-
281
- # ----------------------------------------------------------------------
282
- # (7) 创建会话 & 获取 PoW(重试时,配置模式下错误会切换账号;用户自带 token 模式下仅重试)
283
- # ----------------------------------------------------------------------
284
- def create_session(max_attempts=3):
285
- attempts = 0
286
- while attempts < max_attempts:
287
- headers = get_auth_headers()
288
- try:
289
- resp = requests.post(DEEPSEEK_CREATE_SESSION_URL, headers=headers, json={"agent": "chat"}, timeout=30)
290
- except Exception as e:
291
- app.logger.error(f"[create_session] 请求异常: {e}")
292
- attempts += 1
293
- continue
294
- try:
295
- data = resp.json()
296
- except Exception as e:
297
- app.logger.error(f"[create_session] JSON解析异常: {e}")
298
- data = {}
299
- if resp.status_code == 200 and data.get("code") == 0:
300
- session_id = data["data"]["biz_data"]["id"]
301
- app.logger.info(f"[create_session] 新会话 chat_session_id={session_id}")
302
- resp.close()
303
- return session_id
304
- else:
305
- code = data.get("code")
306
- app.logger.warning(f"[create_session] 创建会话失败, code={code}, msg={data.get('msg')}")
307
- resp.close()
308
- if g.use_config_token:
309
- current_id = get_account_identifier(g.account)
310
- if not hasattr(g, 'tried_accounts'):
311
- g.tried_accounts = []
312
- if current_id not in g.tried_accounts:
313
- g.tried_accounts.append(current_id)
314
- new_account = choose_new_account()
315
- if new_account is None:
316
- break
317
- try:
318
- login_deepseek_via_account(new_account)
319
- except Exception as e:
320
- app.logger.error(f"[create_session] 账号 {get_account_identifier(new_account)} 登录失败:{e}")
321
- attempts += 1
322
- continue
323
- g.account = new_account
324
- g.deepseek_token = new_account.get("token")
325
- else:
326
- attempts += 1
327
- continue
328
- attempts += 1
329
- return None
330
-
331
- # ----------------------------------------------------------------------
332
- # (7.1) 使用 WASM 模块计算 PoW 答案的辅助函数
333
- # ----------------------------------------------------------------------
334
- def compute_pow_answer(algorithm: str,
335
- challenge_str: str,
336
- salt: str,
337
- difficulty: int,
338
- expire_at: int,
339
- signature: str,
340
- target_path: str,
341
- wasm_path: str) -> int:
342
- """
343
- 使用 WASM 模块计算 DeepSeekHash 答案(answer)。
344
- 根据 JS 逻辑:
345
- - 拼接前缀: "{salt}_{expire_at}_"
346
- - 将 challenge 与前缀写入 wasm 内存后调用 wasm_solve 进行求解,
347
- - 从 wasm 内存中读取状态与求解结果,
348
- - 若状态非 0,则返回整数形式的答案,否则返回 None。
349
- """
350
- if algorithm != "DeepSeekHashV1":
351
- raise ValueError(f"不支持的算法:{algorithm}")
352
-
353
- prefix = f"{salt}_{expire_at}_"
354
-
355
- # --- 加载 wasm 模块 ---
356
- store = Store()
357
- linker = Linker(store.engine)
358
- try:
359
- with open(wasm_path, "rb") as f:
360
- wasm_bytes = f.read()
361
- except Exception as e:
362
- raise RuntimeError(f"加载 wasm 文件失败: {wasm_path}, 错误: {e}")
363
- module = Module(store.engine, wasm_bytes)
364
- instance = linker.instantiate(store, module)
365
- exports = instance.exports(store)
366
- try:
367
- memory = exports["memory"]
368
- add_to_stack = exports["__wbindgen_add_to_stack_pointer"]
369
- alloc = exports["__wbindgen_export_0"]
370
- wasm_solve = exports["wasm_solve"]
371
- except KeyError as e:
372
- raise RuntimeError(f"缺少 wasm 导出函数: {e}")
373
-
374
- def write_memory(offset: int, data: bytes):
375
- size = len(data)
376
- base_addr = ctypes.cast(memory.data_ptr(store), ctypes.c_void_p).value
377
- ctypes.memmove(base_addr + offset, data, size)
378
-
379
- def read_memory(offset: int, size: int) -> bytes:
380
- base_addr = ctypes.cast(memory.data_ptr(store), ctypes.c_void_p).value
381
- return ctypes.string_at(base_addr + offset, size)
382
-
383
- def encode_string(text: str):
384
- data = text.encode("utf-8")
385
- length = len(data)
386
- ptr_val = alloc(store, length, 1)
387
- ptr = int(ptr_val.value) if hasattr(ptr_val, "value") else int(ptr_val)
388
- write_memory(ptr, data)
389
- return ptr, length
390
-
391
- # 1. 申请 16 字节栈空间
392
- retptr = add_to_stack(store, -16)
393
- # 2. 编码 challenge 与 prefix 到 wasm 内存中
394
- ptr_challenge, len_challenge = encode_string(challenge_str)
395
- ptr_prefix, len_prefix = encode_string(prefix)
396
- # 3. 调用 wasm_solve(注意:difficulty 以 float 形式传入)
397
- wasm_solve(store, retptr, ptr_challenge, len_challenge, ptr_prefix, len_prefix, float(difficulty))
398
- # 4. 从 retptr 处读取 4 字节状态和 8 字节求解结果
399
- status_bytes = read_memory(retptr, 4)
400
- if len(status_bytes) != 4:
401
- add_to_stack(store, 16)
402
- raise RuntimeError("读取状态字节失败")
403
- status = struct.unpack("<i", status_bytes)[0]
404
- value_bytes = read_memory(retptr + 8, 8)
405
- if len(value_bytes) != 8:
406
- add_to_stack(store, 16)
407
- raise RuntimeError("读取结果字节失败")
408
- value = struct.unpack("<d", value_bytes)[0]
409
- # 5. 恢复栈指针
410
- add_to_stack(store, 16)
411
- if status == 0:
412
- return None
413
- return int(value)
414
-
415
- # ----------------------------------------------------------------------
416
- # (7.2) 获取 PoW 响应,融合计算 answer 逻辑
417
- # ----------------------------------------------------------------------
418
- def get_pow_response(max_attempts=3):
419
- attempts = 0
420
- while attempts < max_attempts:
421
- headers = get_auth_headers()
422
- try:
423
- resp = requests.post(DEEPSEEK_CREATE_POW_URL, headers=headers, json={"target_path": "/api/v0/chat/completion"}, timeout=30)
424
- except Exception as e:
425
- app.logger.error(f"[get_pow_response] 请求异常: {e}")
426
- attempts += 1
427
- continue
428
- try:
429
- data = resp.json()
430
- except Exception as e:
431
- app.logger.error(f"[get_pow_response] JSON解析异常: {e}")
432
- data = {}
433
- if resp.status_code == 200 and data.get("code") == 0:
434
- challenge = data["data"]["biz_data"]["challenge"]
435
- difficulty = challenge.get("difficulty", 144000)
436
- expire_at = challenge.get("expire_at", 1680000000)
437
- try:
438
- answer = compute_pow_answer(
439
- challenge["algorithm"],
440
- challenge["challenge"],
441
- challenge["salt"],
442
- difficulty,
443
- expire_at,
444
- challenge["signature"],
445
- challenge["target_path"],
446
- WASM_PATH
447
- )
448
- except Exception as e:
449
- app.logger.error(f"[get_pow_response] PoW 答案计算异常: {e}")
450
- answer = None
451
- if answer is None:
452
- app.logger.warning("[get_pow_response] PoW 答案计算失败,重试中...")
453
- resp.close()
454
- attempts += 1
455
- continue
456
-
457
- pow_dict = {
458
- "algorithm": challenge["algorithm"],
459
- "challenge": challenge["challenge"],
460
- "salt": challenge["salt"],
461
- "answer": answer, # 整数形式答案
462
- "signature": challenge["signature"],
463
- "target_path": challenge["target_path"]
464
- }
465
- pow_str = json.dumps(pow_dict, separators=(',', ':'), ensure_ascii=False)
466
- encoded = base64.b64encode(pow_str.encode("utf-8")).decode("utf-8").rstrip("=")
467
- resp.close()
468
- return encoded
469
- else:
470
- code = data.get("code")
471
- app.logger.warning(f"[get_pow_response] 获取 PoW 失败, code={code}, msg={data.get('msg')}")
472
- resp.close()
473
- if g.use_config_token:
474
- current_id = get_account_identifier(g.account)
475
- if not hasattr(g, 'tried_accounts'):
476
- g.tried_accounts = []
477
- if current_id not in g.tried_accounts:
478
- g.tried_accounts.append(current_id)
479
- new_account = choose_new_account()
480
- if new_account is None:
481
- break
482
- try:
483
- login_deepseek_via_account(new_account)
484
- except Exception as e:
485
- app.logger.error(f"[get_pow_response] 账号 {get_account_identifier(new_account)} 登录失败:{e}")
486
- attempts += 1
487
- continue
488
- g.account = new_account
489
- g.deepseek_token = new_account.get("token")
490
- else:
491
- attempts += 1
492
- continue
493
- attempts += 1
494
- return None
495
-
496
- # ----------------------------------------------------------------------
497
- # (8) 路由:/v1/models(模拟 OpenAI 模型列表)
498
- # ----------------------------------------------------------------------
499
- @app.route("/hf/v1/models", methods=["GET"])
500
- def list_models():
501
- app.logger.info("[list_models] 用户请求 /v1/models")
502
- models_list = [
503
- {
504
- "id": "deepseek-chat",
505
- "object": "model",
506
- "created": 1677610602,
507
- "owned_by": "deepseek",
508
- "permission": []
509
- },
510
- {
511
- "id": "deepseek-reasoner",
512
- "object": "model",
513
- "created": 1677610602,
514
- "owned_by": "deepseek",
515
- "permission": []
516
- },
517
- {
518
- "id": "deepseek-chat-search",
519
- "object": "model",
520
- "created": 1677610602,
521
- "owned_by": "deepseek",
522
- "permission": []
523
- },
524
- {
525
- "id": "deepseek-reasoner-search",
526
- "object": "model",
527
- "created": 1677610602,
528
- "owned_by": "deepseek",
529
- "permission": []
530
- }
531
- ]
532
- data = {"object": "list", "data": models_list}
533
- return jsonify(data), 200
534
-
535
- # ----------------------------------------------------------------------
536
- # (新增) 消息预处理函数,将多轮对话合并成最终 prompt
537
- # ----------------------------------------------------------------------
538
- def messages_prepare(messages: list) -> str:
539
- """处理消息列表,合并连续相同角色的消息,并添加角色标签:
540
- - 对于 assistant 消息,加上 <|Assistant|> 前缀及 结束标签;
541
- - 对于 user/system 消息(除第一条外)加上 结束标签;
542
- - 如果消息 content 为数组,则提取其中 type 为 "text" 的部分;
543
- - 最后移除 markdown 图片格式的内容。
544
- """
545
- processed = []
546
- for m in messages:
547
- role = m.get("role", "")
548
- content = m.get("content", "")
549
- if isinstance(content, list):
550
- texts = [item.get("text", "") for item in content if item.get("type") == "text"]
551
- text = "\n".join(texts)
552
- else:
553
- text = str(content)
554
- processed.append({"role": role, "text": text})
555
- if not processed:
556
- return ""
557
- # 合并连续同一角色的消息
558
- merged = [processed[0]]
559
- for msg in processed[1:]:
560
- if msg["role"] == merged[-1]["role"]:
561
- merged[-1]["text"] += "\n\n" + msg["text"]
562
- else:
563
- merged.append(msg)
564
- # 添加标签
565
- parts = []
566
- for idx, block in enumerate(merged):
567
- role = block["role"]
568
- text = block["text"]
569
- if role == "assistant":
570
- parts.append(f"<|Assistant|>{text}")
571
- elif role in ("user", "system"):
572
- if idx > 0:
573
- parts.append(f"结束标签")
574
- else:
575
- parts.append(text)
576
- else:
577
- parts.append(text)
578
- final_prompt = "".join(parts)
579
- # 仅移除 markdown 图片格式(不全部移除 !)
580
- final_prompt = re.sub(r"!\[(.*?)\]\((.*?)\)", r"[\1](\2)", final_prompt)
581
- return final_prompt
582
-
583
- # ----------------------------------------------------------------------
584
- # (10) 路由:/v1/chat/completions
585
- # ----------------------------------------------------------------------
586
- @app.route("/hf/v1/chat/completions", methods=["POST"])
587
- def chat_completions():
588
- mode_resp = determine_mode_and_token()
589
- if mode_resp:
590
- return mode_resp
591
-
592
- try:
593
- req_data = request.json or {}
594
- app.logger.info(f"[chat_completions] 收到请求: {req_data}")
595
- model = req_data.get("model")
596
- messages = req_data.get("messages", [])
597
- if not model or not messages:
598
- return jsonify({"error": "Request must include 'model' and 'messages'."}), 400
599
-
600
- # 判断是否启用"思考"功能(这里根据模型名称判断)
601
- model_lower = model.lower()
602
- if model_lower in ["deepseek-v3", "deepseek-chat"]:
603
- thinking_enabled = False
604
- search_enabled = False
605
- elif model_lower in ["deepseek-r1", "deepseek-reasoner"]:
606
- thinking_enabled = True
607
- search_enabled = False
608
- elif model_lower in ["deepseek-v3-search", "deepseek-chat-search"]:
609
- thinking_enabled = False
610
- search_enabled = True
611
- elif model_lower in ["deepseek-r1-search", "deepseek-reasoner-search"]:
612
- thinking_enabled = True
613
- search_enabled = True
614
- else:
615
- return Response(json.dumps({"error": f"Model '{model}' is not available."}),
616
- status=503, mimetype="application/json")
617
-
618
- # 使用 messages_prepare 函数构造最终 prompt
619
- final_prompt = messages_prepare(messages)
620
- app.logger.debug(f"[chat_completions] 最终 Prompt: {final_prompt}")
621
-
622
- session_id = create_session()
623
- if not session_id:
624
- return jsonify({"error": "invalid token."}), 401
625
-
626
- pow_resp = get_pow_response()
627
- if not pow_resp:
628
- return jsonify({"error": "Failed to get PoW (invalid token or unknown error)."}), 401
629
- app.logger.info(f"获取 PoW 成功: {pow_resp}")
630
-
631
- headers = {
632
- **get_auth_headers(),
633
- "x-ds-pow-response": pow_resp
634
- }
635
- payload = {
636
- "chat_session_id": session_id,
637
- "parent_message_id": None,
638
- "prompt": final_prompt,
639
- "ref_file_ids": [],
640
- "thinking_enabled": thinking_enabled,
641
- "search_enabled": search_enabled
642
- }
643
- app.logger.debug(f"[chat_completions] -> {DEEPSEEK_COMPLETION_URL}, payload={payload}")
644
-
645
- deepseek_resp = call_completion_endpoint(payload, headers, stream=bool(req_data.get("stream", False)), max_attempts=3)
646
- if not deepseek_resp:
647
- return jsonify({"error": "Failed to get completion."}), 500
648
-
649
- created_time = int(time.time())
650
- completion_id = f"{session_id}"
651
-
652
- # 流式响应:SSE 格式返回事件流
653
- if bool(req_data.get("stream", False)):
654
- if deepseek_resp.status_code != 200:
655
- deepseek_resp.close()
656
- return Response(deepseek_resp.content,
657
- status=deepseek_resp.status_code,
658
- mimetype="application/json")
659
-
660
- # 添加保活超时配置(5秒)
661
- KEEP_ALIVE_TIMEOUT = 5
662
-
663
- def sse_stream():
664
- try:
665
- final_text = ""
666
- final_thinking = ""
667
- first_chunk_sent = False
668
- result_queue = queue.Queue()
669
- last_send_time = time.time()
670
- citation_map = {} # 用于存储引用链接的字典
671
-
672
- def process_data():
673
- try:
674
- for raw_line in deepseek_resp.iter_lines():
675
- try:
676
- line = raw_line.decode("utf-8")
677
- except Exception as e:
678
- app.logger.warning(f"[sse_stream] 解码失败: {e}")
679
- busy_content_str = '{"choices":[{"index":0,"delta":{"content":"服务器繁忙,请稍候再试","type":"text"}}],"model":"","chunk_token_usage":1,"created":0,"message_id":-1,"parent_id":-1}'
680
- busy_content = json.loads(busy_content_str)
681
- result_queue.put(busy_content)
682
- result_queue.put(None)
683
- break
684
- if not line:
685
- continue
686
- if line.startswith("data:"):
687
- data_str = line[5:].strip()
688
- if data_str == "[DONE]":
689
- result_queue.put(None) # 结束信号
690
- break
691
- try:
692
- chunk = json.loads(data_str)
693
- # 处理搜索索引数据
694
- if chunk.get("choices", [{}])[0].get("delta", {}).get("type") == "search_index":
695
- search_indexes = chunk["choices"][0]["delta"].get("search_indexes", [])
696
- for idx in search_indexes:
697
- citation_map[str(idx.get("cite_index"))] = idx.get("url", "")
698
- continue
699
- result_queue.put(chunk) # 将数据放入队列
700
- except Exception as e:
701
- app.logger.warning(f"[sse_stream] 无法解析: {data_str}, 错误: {e}")
702
- busy_content_str = '{"choices":[{"index":0,"delta":{"content":"服务器繁忙,请稍候再试","type":"text"}}],"model":"","chunk_token_usage":1,"created":0,"message_id":-1,"parent_id":-1}'
703
- busy_content = json.loads(busy_content_str)
704
- result_queue.put(busy_content)
705
- result_queue.put(None)
706
- break
707
- except Exception as e:
708
- app.logger.warning(f"[sse_stream] 错误: {e}")
709
- busy_content_str = '{"choices":[{"index":0,"delta":{"content":"服务器繁忙,请稍候再试","type":"text"}}],"model":"","chunk_token_usage":1,"created":0,"message_id":-1,"parent_id":-1}'
710
- busy_content = json.loads(busy_content_str)
711
- result_queue.put(busy_content)
712
- result_queue.put(None)
713
- finally:
714
- deepseek_resp.close()
715
-
716
- process_thread = threading.Thread(target=process_data)
717
- process_thread.start()
718
-
719
- while True:
720
- current_time = time.time()
721
- if current_time - last_send_time >= KEEP_ALIVE_TIMEOUT:
722
- yield ": keep-alive\n\n"
723
- last_send_time = current_time
724
- continue
725
- try:
726
- chunk = result_queue.get(timeout=0.1)
727
- if chunk is None:
728
- # 发送最终统计信息
729
- prompt_tokens = len(tokenizer.encode(final_prompt))
730
- completion_tokens = len(tokenizer.encode(final_text))
731
- usage = {
732
- "prompt_tokens": prompt_tokens,
733
- "completion_tokens": completion_tokens,
734
- "total_tokens": prompt_tokens + completion_tokens,
735
- }
736
- finish_chunk = {
737
- "id": completion_id,
738
- "object": "chat.completion.chunk",
739
- "created": created_time,
740
- "model": model,
741
- "choices": [
742
- {
743
- "delta": {},
744
- "index": 0,
745
- "finish_reason": "stop",
746
- }
747
- ],
748
- "usage": usage,
749
- }
750
- yield f"data: {json.dumps(finish_chunk, ensure_ascii=False)}\n\n"
751
- yield "data: [DONE]\n\n"
752
- last_send_time = current_time
753
- break
754
- new_choices = []
755
- for choice in chunk.get("choices", []):
756
- delta = choice.get("delta", {})
757
- ctype = delta.get("type")
758
- ctext = delta.get("content", "")
759
- if choice.get("finish_reason") == "backend_busy":
760
- ctext = '服务器繁忙,请稍候再试'
761
- if search_enabled and ctext.startswith("[citation:"):
762
- ctext = ""
763
- if ctype == "thinking":
764
- if thinking_enabled:
765
- final_thinking += ctext
766
- elif ctype == "text":
767
- final_text += ctext
768
- delta_obj = {}
769
- if not first_chunk_sent:
770
- delta_obj["role"] = "assistant"
771
- first_chunk_sent = True
772
- if ctype == "thinking":
773
- if thinking_enabled:
774
- delta_obj["reasoning_content"] = ctext
775
- elif ctype == "text":
776
- delta_obj["content"] = ctext
777
- if delta_obj:
778
- new_choices.append(
779
- {
780
- "delta": delta_obj,
781
- "index": choice.get("index", 0),
782
- }
783
- )
784
- if new_choices:
785
- out_chunk = {
786
- "id": completion_id,
787
- "object": "chat.completion.chunk",
788
- "created": created_time,
789
- "model": model,
790
- "choices": new_choices,
791
- }
792
- yield f"data: {json.dumps(out_chunk, ensure_ascii=False)}\n\n"
793
- last_send_time = current_time
794
- except queue.Empty:
795
- continue
796
- except Exception as e:
797
- app.logger.error(f"[sse_stream] 异常: {e}")
798
- finally:
799
- deepseek_resp.close()
800
- if g.use_config_token:
801
- release_account(g.account)
802
- return Response(stream_with_context(sse_stream()), content_type="text/event-stream")
803
- else:
804
- # 非流式响应处理
805
- think_list = []
806
- text_list = []
807
- result = None
808
- citation_map = {} # 用于存储引用链接的字典
809
-
810
- data_queue = queue.Queue()
811
-
812
- def collect_data():
813
- nonlocal result
814
- try:
815
- for raw_line in deepseek_resp.iter_lines():
816
- try:
817
- line = raw_line.decode("utf-8")
818
- except Exception as e:
819
- app.logger.warning(f"[chat_completions] 解码失败: {e}")
820
- ctext = '服务器繁忙,请稍候再试'
821
- text_list.append(ctext)
822
- data_queue.put(None)
823
- break
824
- if not line:
825
- continue
826
- if line.startswith("data:"):
827
- data_str = line[5:].strip()
828
- if data_str == "[DONE]":
829
- data_queue.put(None)
830
- break
831
- try:
832
- chunk = json.loads(data_str)
833
- if chunk.get("choices", [{}])[0].get("delta", {}).get("type") == "search_index":
834
- search_indexes = chunk["choices"][0]["delta"].get("search_indexes", [])
835
- for idx in search_indexes:
836
- citation_map[str(idx.get("cite_index"))] = idx.get("url", "")
837
- continue
838
- for choice in chunk.get("choices", []):
839
- delta = choice.get("delta", {})
840
- ctype = delta.get("type")
841
- ctext = delta.get("content", "")
842
- if choice.get("finish_reason") == "backend_busy":
843
- ctext = '服务器繁忙,请稍候再试'
844
- if search_enabled and ctext.startswith("[citation:"):
845
- ctext = ""
846
- if ctype == "thinking" and thinking_enabled:
847
- think_list.append(ctext)
848
- elif ctype == "text":
849
- text_list.append(ctext)
850
- except Exception as e:
851
- app.logger.warning(f"[collect_data] 无法解析: {data_str}, 错误: {e}")
852
- ctext = '服务器繁忙,请稍候再试'
853
- text_list.append(ctext)
854
- data_queue.put(None)
855
- break
856
- except Exception as e:
857
- app.logger.warning(f"[collect_data] 错误: {e}")
858
- ctext = '服务器繁忙,请稍候再试'
859
- text_list.append(ctext)
860
- data_queue.put(None)
861
- finally:
862
- deepseek_resp.close()
863
- final_reasoning = "".join(think_list)
864
- final_content = "".join(text_list)
865
- prompt_tokens = len(tokenizer.encode(final_prompt))
866
- completion_tokens = len(tokenizer.encode(final_content))
867
- result = {
868
- "id": completion_id,
869
- "object": "chat.completion",
870
- "created": created_time,
871
- "model": model,
872
- "choices": [
873
- {
874
- "index": 0,
875
- "message": {
876
- "role": "assistant",
877
- "content": final_content,
878
- "reasoning_content": final_reasoning,
879
- },
880
- "finish_reason": "stop",
881
- }
882
- ],
883
- "usage": {
884
- "prompt_tokens": prompt_tokens,
885
- "completion_tokens": completion_tokens,
886
- "total_tokens": prompt_tokens + completion_tokens,
887
- },
888
- }
889
- data_queue.put("DONE")
890
-
891
- collect_thread = threading.Thread(target=collect_data)
892
- collect_thread.start()
893
-
894
- def generate():
895
- last_send_time = time.time()
896
- while True:
897
- current_time = time.time()
898
- if current_time - last_send_time >= KEEP_ALIVE_TIMEOUT:
899
- yield ""
900
- last_send_time = current_time
901
- if not collect_thread.is_alive() and result is not None:
902
- yield json.dumps(result)
903
- break
904
- time.sleep(0.1)
905
-
906
- return Response(generate(), mimetype="application/json")
907
- except Exception as e:
908
- app.logger.error(f"[chat_completions] 未知异常: {e}")
909
- return jsonify({"error": "Internal Server Error"}), 500
910
- finally:
911
- if g.use_config_token:
912
- release_account(g.account)
913
-
914
- # ----------------------------------------------------------------------
915
- # (11) 路由:/
916
- # ----------------------------------------------------------------------
917
- @app.route("/")
918
- def index():
919
- return render_template("welcome.html")
920
-
921
- # ----------------------------------------------------------------------
922
- # 启动 Flask 应用(直接使用 Flask 内置服务器)
923
- # ----------------------------------------------------------------------
924
- if __name__ == "__main__":
925
- app.run(host="0.0.0.0", port=7860, debug=False)