yxmiler commited on
Commit
1bb46df
·
verified ·
1 Parent(s): 3b6c5db

Upload kilo_proxy.py

Browse files
Files changed (1) hide show
  1. kilo_proxy.py +804 -0
kilo_proxy.py ADDED
@@ -0,0 +1,804 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import aiohttp
3
+ from aiohttp import web
4
+ import json
5
+ import logging
6
+ import os
7
+ import time
8
+ from typing import Dict, List, Optional, Any, Union
9
+ from collections import deque
10
+ from dataclasses import dataclass
11
+ from enum import Enum
12
+ import uuid
13
+ import sys
14
+
15
+ # 配置日志
16
+ logging.basicConfig(
17
+ level=logging.INFO,
18
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
19
+ handlers=[logging.StreamHandler(sys.stdout)]
20
+ )
21
+ logger = logging.getLogger(__name__)
22
+
23
+ class SystemRoleMode(Enum):
24
+ KEEP = "keep" # 保留system角色模式
25
+ CONVERT = "convert" # system角色转换为user模式
26
+
27
+ @dataclass
28
+ class TokenInfo:
29
+ token: str
30
+ failed_count: int = 0
31
+ last_used: float = 0
32
+ last_balance_check: float = 0
33
+
34
+ class ConfigManager:
35
+ """配置管理器"""
36
+
37
+ def __init__(self):
38
+ self.API_KEY = os.getenv('API_KEY', 'sk-123456')
39
+ self.TARGET_URL = os.getenv('TARGET_URL', 'https://kilocode.ai/api/openrouter/chat/completions')
40
+ self.BALANCE_CHECK_URL = os.getenv('BALANCE_CHECK_URL', 'https://kilocode.ai/api/profile/balance')
41
+ self.TARGET_HEADERS = {
42
+ 'Content-Type': 'application/json',
43
+ 'User-Agent': 'Kilo-Code/4.58.0',
44
+ 'Accept': 'application/json',
45
+ 'Accept-Encoding': 'br, gzip, deflate',
46
+ 'X-Stainless-Retry-Count': '0',
47
+ 'X-Stainless-Lang': 'js',
48
+ 'X-Stainless-Package-Version': '5.5.1',
49
+ 'X-Stainless-OS': 'Windows',
50
+ 'X-Stainless-Arch': 'x64',
51
+ 'X-Stainless-Runtime': 'node',
52
+ 'X-Stainless-Runtime-Version': 'v20.19.0',
53
+ 'HTTP-Referer': 'https://kilocode.ai',
54
+ 'X-Title': 'Kilo Code',
55
+ 'X-KiloCode-Version': '4.58.0',
56
+ 'accept-language': '*',
57
+ 'sec-fetch-mode': 'cors'
58
+ }
59
+ # 余额检测专用头部
60
+ self.BALANCE_CHECK_HEADERS = {
61
+ 'User-Agent': 'axios/1.9.0',
62
+ 'Connection': 'close',
63
+ 'Accept': 'application/json, text/plain, */*',
64
+ 'Accept-Encoding': 'gzip, compress, deflate, br',
65
+ 'Content-Type': 'application/json'
66
+ }
67
+ self.MAX_RETRIES = int(os.getenv('MAX_RETRIES', '3'))
68
+ self.MAX_CONCURRENT = int(os.getenv('MAX_CONCURRENT', '10'))
69
+ self.PORT = int(os.getenv('PORT', '25526'))
70
+ self.SYSTEM_ROLE_MODE = SystemRoleMode(os.getenv('SYSTEM_ROLE_MODE', 'keep'))
71
+
72
+ # 模型映射字典 - OpenAI模型映射到Kilo模型
73
+ self.MODEL_MAPPING = {
74
+ 'gemini-2.5-flash':'google/gemini-2.5-flash',
75
+ 'gemini-2.5-flash-thinking':'google/gemini-2.5-flash',
76
+ 'gemini-2.5-pro-thinking':'google/gemini-2.5-pro',
77
+ 'grok-4-07-09-thingking':'x-ai/grok-4',
78
+ 'claude-3-7-sonnet-20250219': 'anthropic/claude-3.7-sonnet',
79
+ 'claude-3-7-sonnet-20250219-thinking': 'anthropic/claude-3.7-sonnet',
80
+ 'claude-opus-4-20250514': 'anthropic/claude-opus-4',
81
+ 'claude-opus-4-20250514-thinking': 'anthropic/claude-opus-4',
82
+ 'claude-sonnet-4-20250514': 'anthropic/claude-sonnet-4',
83
+ 'claude-sonnet-4-20250514-thinking': 'anthropic/claude-sonnet-4'
84
+ }
85
+
86
+ # Token池配置
87
+ self.TOKEN_POOL = self._load_token_pool()
88
+ self.TOKEN_FAILURE_THRESHOLD = int(os.getenv('TOKEN_FAILURE_THRESHOLD', '3'))
89
+ # 余额检测配置
90
+ self.BALANCE_CHECK_INTERVAL = int(os.getenv('BALANCE_CHECK_INTERVAL', '3600')) # 余额检测间隔(秒)
91
+ self.MIN_BALANCE_THRESHOLD = float(os.getenv('MIN_BALANCE_THRESHOLD', '1.0')) # 最小余额阈值
92
+
93
+ def _load_token_pool(self) -> List[str]:
94
+ """加载Token池"""
95
+ tokens = os.getenv('TOKEN_POOL', '').split(',')
96
+ return [token.strip() for token in tokens if token.strip()]
97
+
98
+ class MessageProcessor:
99
+ """消息处理器"""
100
+
101
+ def __init__(self, config: ConfigManager):
102
+ self.config = config
103
+
104
+ def process_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
105
+ """处理消息数组"""
106
+ if self.config.SYSTEM_ROLE_MODE == SystemRoleMode.KEEP:
107
+ return self._process_keep_system_mode(messages)
108
+ else:
109
+ return self._process_convert_system_mode(messages)
110
+
111
+ def _process_keep_system_mode(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
112
+ """保留system角色模式处理"""
113
+ if not messages:
114
+ return messages
115
+
116
+ result = []
117
+ i = 0
118
+
119
+ # 第一阶段:合并开头的连续system消息
120
+ if messages[0].get('role') == 'system':
121
+ merged_content = []
122
+ while i < len(messages) and messages[i].get('role') == 'system':
123
+ content = messages[i].get('content', '')
124
+ if content:
125
+ merged_content.append(self._extract_text_content(content))
126
+ i += 1
127
+
128
+ if merged_content:
129
+ result.append({
130
+ 'role': 'system',
131
+ 'content': [{'type': 'text', 'text': '\n'.join(merged_content)}]
132
+ })
133
+
134
+ # 第二阶段:处理剩余消息
135
+ while i < len(messages):
136
+ current_msg = messages[i].copy()
137
+
138
+ # 将后续的system消息转为user
139
+ if current_msg.get('role') == 'system':
140
+ current_msg['role'] = 'user'
141
+
142
+ # 确保content格式正确
143
+ current_msg = self._normalize_message_content(current_msg)
144
+
145
+ # 检查是否需要与前一个消息合并
146
+ if (result and
147
+ result[-1].get('role') == current_msg.get('role') and
148
+ self._can_merge_content(result[-1].get('content')) and
149
+ self._can_merge_content(current_msg.get('content'))):
150
+
151
+ # 合并内容
152
+ prev_content = self._extract_text_content(result[-1]['content'])
153
+ curr_content = self._extract_text_content(current_msg['content'])
154
+ result[-1]['content'] = [{'type': 'text', 'text': f"{prev_content}\n{curr_content}"}]
155
+ else:
156
+ result.append(current_msg)
157
+
158
+ i += 1
159
+
160
+ return result
161
+
162
+ def _process_convert_system_mode(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
163
+ """转换system角色模式处理"""
164
+ if not messages:
165
+ return messages
166
+
167
+ # 第一阶段:转换所有system为user
168
+ converted_messages = []
169
+ for msg in messages:
170
+ new_msg = msg.copy()
171
+ if new_msg.get('role') == 'system':
172
+ new_msg['role'] = 'user'
173
+ new_msg = self._normalize_message_content(new_msg)
174
+ converted_messages.append(new_msg)
175
+
176
+ # 第二阶段:合并连续的相同角色
177
+ result = []
178
+ for msg in converted_messages:
179
+ if (result and
180
+ result[-1].get('role') == msg.get('role') and
181
+ self._can_merge_content(result[-1].get('content')) and
182
+ self._can_merge_content(msg.get('content'))):
183
+
184
+ # 合并内容
185
+ prev_content = self._extract_text_content(result[-1]['content'])
186
+ curr_content = self._extract_text_content(msg['content'])
187
+ result[-1]['content'] = [{'type': 'text', 'text': f"{prev_content}\n{curr_content}"}]
188
+ else:
189
+ result.append(msg)
190
+
191
+ return result
192
+
193
+ def _normalize_message_content(self, message: Dict[str, Any]) -> Dict[str, Any]:
194
+ """标准化消息内容格式"""
195
+ content = message.get('content')
196
+ role = message.get('role')
197
+ tool_calls = message.get('tool_calls', None)
198
+
199
+ if role == 'tool' or tool_calls is not None:
200
+ return message
201
+ if isinstance(content, str):
202
+ message['content'] = [{'type': 'text', 'text': content}]
203
+ elif isinstance(content, list):
204
+ # 保持原有格式
205
+ pass
206
+ else:
207
+ message['content'] = [{'type': 'text', 'text': str(content)}]
208
+
209
+ return message
210
+
211
+ def _can_merge_content(self, content: Any) -> bool:
212
+ """判断内容是否可以合并"""
213
+ if isinstance(content, list) and len(content) == 1:
214
+ return content[0].get('type') == 'text'
215
+ return False
216
+
217
+ def _extract_text_content(self, content: Any) -> str:
218
+ """提取文本内容"""
219
+ if isinstance(content, str):
220
+ return content
221
+ elif isinstance(content, list) and len(content) == 1 and content[0].get('type') == 'text':
222
+ return content[0].get('text', '')
223
+ return str(content)
224
+
225
+ class TokenManager:
226
+ """Token管理器"""
227
+
228
+ def __init__(self, config: ConfigManager):
229
+ self.config = config
230
+ self.available_tokens = deque([TokenInfo(token) for token in config.TOKEN_POOL])
231
+ self.failed_tokens = deque()
232
+ self.lock = asyncio.Lock()
233
+ self.balance_check_task = None
234
+ self._shutdown_event = asyncio.Event()
235
+
236
+ async def start_balance_checker(self):
237
+ """启动余额检测后台任务"""
238
+ if self.balance_check_task is None:
239
+ self.balance_check_task = asyncio.create_task(self._balance_check_loop())
240
+ logger.info("余额检测后台任务已启动")
241
+
242
+ async def stop_balance_checker(self):
243
+ """停止余额检测后台任务"""
244
+ if self.balance_check_task:
245
+ self._shutdown_event.set()
246
+ try:
247
+ await asyncio.wait_for(self.balance_check_task, timeout=5.0)
248
+ except asyncio.TimeoutError:
249
+ self.balance_check_task.cancel()
250
+ self.balance_check_task = None
251
+ logger.info("余额检测后台任务已停止")
252
+
253
+ async def get_token(self) -> Optional[str]:
254
+ """获取可用token"""
255
+ async with self.lock:
256
+ # 如果没有可用token且有失败token,立即尝试恢复一次
257
+ if not self.available_tokens and self.failed_tokens:
258
+ await self._immediate_recovery_check()
259
+
260
+ if self.available_tokens:
261
+ token_info = self.available_tokens.popleft()
262
+ token_info.last_used = time.time()
263
+ return token_info.token
264
+
265
+ return None
266
+
267
+ async def return_token(self, token: str, success: bool = True):
268
+ """归还token"""
269
+ async with self.lock:
270
+ token_info = TokenInfo(token)
271
+
272
+ if success:
273
+ token_info.failed_count = 0
274
+ self.available_tokens.append(token_info)
275
+ else:
276
+ token_info.failed_count += 1
277
+ if token_info.failed_count >= self.config.TOKEN_FAILURE_THRESHOLD:
278
+ self.failed_tokens.append(token_info)
279
+ logger.warning(f"Token已移至失败池: {token[:10]}...")
280
+ else:
281
+ self.available_tokens.append(token_info)
282
+
283
+ async def _balance_check_loop(self):
284
+ """余额检测循环(后台任务)"""
285
+ logger.info(f"开始余额检测循环,检测间隔: {self.config.BALANCE_CHECK_INTERVAL}秒")
286
+
287
+ while not self._shutdown_event.is_set():
288
+ try:
289
+ await asyncio.wait_for(
290
+ self._shutdown_event.wait(),
291
+ timeout=self.config.BALANCE_CHECK_INTERVAL
292
+ )
293
+ break # 如果事件被设置,退出循环
294
+ except asyncio.TimeoutError:
295
+ pass # 超时是正常的,继续检测
296
+
297
+ # 执行余额检测
298
+ await self._check_failed_tokens_balance()
299
+
300
+ async def _immediate_recovery_check(self):
301
+ """立即恢复检测(当没有可用token时)"""
302
+ logger.info("没有可用token,立即执行恢复检测")
303
+ await self._check_failed_tokens_balance()
304
+
305
+ async def _check_failed_tokens_balance(self):
306
+ """检测失败token的余额状态"""
307
+ if not self.failed_tokens:
308
+ return
309
+
310
+ current_time = time.time()
311
+ tokens_to_check = []
312
+
313
+ # 收集需要检测的token
314
+ async with self.lock:
315
+ for token_info in list(self.failed_tokens):
316
+ # 检查是否需要进行余额检测(避免频繁检测同一个token)
317
+ if current_time - token_info.last_balance_check >= self.config.BALANCE_CHECK_INTERVAL:
318
+ tokens_to_check.append(token_info)
319
+
320
+ if not tokens_to_check:
321
+ return
322
+
323
+ logger.info(f"开始检测 {len(tokens_to_check)} 个失败token的余额")
324
+
325
+ # 并发检测所有失败token的余额
326
+ check_tasks = [
327
+ self._check_single_token_balance(token_info)
328
+ for token_info in tokens_to_check
329
+ ]
330
+
331
+ results = await asyncio.gather(*check_tasks, return_exceptions=True)
332
+
333
+ # 处理检测结果
334
+ recovered_tokens = []
335
+ async with self.lock:
336
+ for token_info, result in zip(tokens_to_check, results):
337
+ token_info.last_balance_check = current_time
338
+
339
+ if isinstance(result, Exception):
340
+ logger.warning(f"Token {token_info.token[:10]}... 余额检测失败: {str(result)}")
341
+ continue
342
+
343
+ if result: # 余额充足
344
+ # 从失败池中移除
345
+ try:
346
+ self.failed_tokens.remove(token_info)
347
+ token_info.failed_count = 0
348
+ recovered_tokens.append(token_info)
349
+ except ValueError:
350
+ pass # token可能已被其他地方移除
351
+
352
+ # 将恢复的token加入可用池
353
+ if recovered_tokens:
354
+ async with self.lock:
355
+ self.available_tokens.extend(recovered_tokens)
356
+ logger.info(f"成功恢复 {len(recovered_tokens)} 个token到可用池")
357
+
358
+ async def _check_single_token_balance(self, token_info: TokenInfo) -> bool:
359
+ """检测单个token的余额"""
360
+ try:
361
+ headers = self.config.BALANCE_CHECK_HEADERS.copy()
362
+ headers['Authorization'] = f'Bearer {token_info.token}'
363
+
364
+ timeout = aiohttp.ClientTimeout(total=10) # 10秒超时
365
+
366
+ async with aiohttp.ClientSession(timeout=timeout) as session:
367
+ async with session.get(
368
+ self.config.BALANCE_CHECK_URL,
369
+ headers=headers
370
+ ) as response:
371
+
372
+ if response.status == 200:
373
+ balance_data = await response.json()
374
+ balance = balance_data.get('balance', 0)
375
+ is_depleted = balance_data.get('isDepleted', True)
376
+
377
+ logger.info(f"Token {token_info.token[:10]}... 余额检测: balance={balance}, isDepleted={is_depleted}")
378
+
379
+ # 检查余额是否大于阈值且未耗尽
380
+ if balance > self.config.MIN_BALANCE_THRESHOLD and not is_depleted:
381
+ logger.info(f"Token {token_info.token[:10]}... 余额充足,可以恢复使用")
382
+ return True
383
+ else:
384
+ logger.info(f"Token {token_info.token[:10]}... 余额不足或已耗尽")
385
+ return False
386
+ else:
387
+ error_text = await response.text()
388
+ logger.warning(f"Token {token_info.token[:10]}... 余额检测失败: 状态码={response.status}, 错误={error_text}")
389
+ return False
390
+
391
+ except Exception as e:
392
+ logger.error(f"Token {token_info.token[:10]}... 余额检测异常: {str(e)}")
393
+ return False
394
+
395
+ class RequestHandler:
396
+ """请求处理器"""
397
+
398
+ def __init__(self, config: ConfigManager, message_processor: MessageProcessor, token_manager: TokenManager):
399
+ self.config = config
400
+ self.message_processor = message_processor
401
+ self.token_manager = token_manager
402
+ self.semaphore = asyncio.Semaphore(config.MAX_CONCURRENT)
403
+
404
+ async def handle_chat_completion(self, request: web.Request) -> web.Response:
405
+ """处理聊天完成请求"""
406
+ async with self.semaphore:
407
+ try:
408
+ # 验证API Key
409
+ if not self._validate_api_key(request):
410
+ logger.warning("API密钥验证失败")
411
+ return web.json_response(
412
+ {"error": {"message": "Invalid API key", "type": "authentication_error"}},
413
+ status=401
414
+ )
415
+
416
+ # 解析请求体
417
+ request_data = await request.json()
418
+ logger.info(f"收到请求: 模型={request_data.get('model', 'unknown')}, 消息数={len(request_data.get('messages', []))}")
419
+
420
+ # 提取和验证参数
421
+ extracted_params = self._extract_openai_params(request_data)
422
+
423
+ # 处理消息
424
+ processed_messages = self.message_processor.process_messages(extracted_params['messages'])
425
+ logger.info(f"处理后的消息: {json.dumps(processed_messages, ensure_ascii=False)}")
426
+
427
+ # 构建目标请求体
428
+ target_request = self._build_target_request(extracted_params, processed_messages)
429
+ logger.info(f"构建的目标请求体: {json.dumps(target_request, ensure_ascii=False, indent=4)}")
430
+
431
+ # 执行请求
432
+ return await self._execute_request(request, target_request, extracted_params.get('stream', False))
433
+
434
+ except Exception as e:
435
+ logger.error(f"请求处理错误: {str(e)}")
436
+ return web.json_response(
437
+ {"error": {"message": "Internal server error", "type": "server_error"}},
438
+ status=500
439
+ )
440
+
441
+ def _validate_api_key(self, request: web.Request) -> bool:
442
+ """验证API Key"""
443
+ auth_header = request.headers.get('Authorization', '')
444
+ if not auth_header.startswith('Bearer '):
445
+ return False
446
+
447
+ api_key = auth_header[7:] # Remove 'Bearer ' prefix
448
+ return api_key in self.config.API_KEY
449
+
450
+ def _extract_openai_params(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
451
+ """提取OpenAI标准参数"""
452
+ params = {}
453
+
454
+ # 必需参数
455
+ params['messages'] = request_data.get('messages', [])
456
+ params['model'] = request_data.get('model', None)
457
+ if params['model'] is None:
458
+ raise ValueError("model 不能为空")
459
+ elif params['model'] not in self.config.MODEL_MAPPING:
460
+ raise ValueError(f"model {params['model']} 不支持")
461
+
462
+ # 可选参数
463
+ optional_params = [
464
+ 'stream', 'max_tokens', 'temperature', 'top_p', 'reasoning',
465
+ 'include_reasoning', 'stop', 'frequency_penalty', 'presence_penalty',
466
+ 'seed', 'repetition_penalty', 'logit_bias', 'tools', 'tool_choice',
467
+ 'stream_options'
468
+ ]
469
+
470
+ for param in optional_params:
471
+ if param in request_data:
472
+ params[param] = request_data[param]
473
+
474
+ return params
475
+
476
+ def _build_target_request(self, params: Dict[str, Any], processed_messages: List[Dict[str, Any]]) -> Dict[str, Any]:
477
+ """构建目标请求体"""
478
+ target_request = {
479
+ 'messages': processed_messages,
480
+ 'model': self.config.MODEL_MAPPING.get(params['model'], params['model'])
481
+ }
482
+
483
+ # 添加其他参数
484
+ for key, value in params.items():
485
+ if key not in ['messages', 'model']:
486
+ target_request[key] = value
487
+
488
+ if "thinking" in params['model']:
489
+ if "max_tokens" in params:
490
+ target_request['reasoning'] ={'max_tokens': int(params['max_tokens'] / 2)}
491
+ else:
492
+ target_request['max_tokens'] = 4096
493
+ target_request['reasoning'] = {'max_tokens': 2048}
494
+ logger.info(f"目标模型: {target_request['model']}")
495
+ return target_request
496
+
497
+ async def _execute_request(self, original_request: web.Request, target_request: Dict[str, Any], is_stream: bool) -> web.Response:
498
+ """执行请求"""
499
+ for attempt in range(self.config.MAX_RETRIES):
500
+ token = await self.token_manager.get_token()
501
+ if not token:
502
+ logger.error("没有可用的token")
503
+ return web.json_response(
504
+ {"error": {"message": "No available tokens", "type": "server_error"}},
505
+ status=503
506
+ )
507
+
508
+ try:
509
+ headers = self.config.TARGET_HEADERS.copy()
510
+ headers['authorization'] = f'Bearer {token}'
511
+ headers['X-KiloCode-TaskId'] = str(uuid.uuid4())
512
+
513
+ timeout = aiohttp.ClientTimeout(total=3000) # 5分钟超时
514
+
515
+ logger.info(f"尝试第 {attempt + 1} 次请求 Kilo API")
516
+
517
+ async with aiohttp.ClientSession(timeout=timeout) as session:
518
+ async with session.post(
519
+ self.config.TARGET_URL,
520
+ json=target_request,
521
+ headers=headers
522
+ ) as response:
523
+
524
+ if response.status == 200:
525
+ await self.token_manager.return_token(token, success=True)
526
+ logger.info(f"请求成功: 状态码={response.status}, 流式={is_stream}")
527
+
528
+ if is_stream:
529
+ return await self._handle_stream_response(original_request, response)
530
+ else:
531
+ return await self._handle_non_stream_response(response)
532
+ else:
533
+ await self.token_manager.return_token(token, success=False)
534
+ error_text = await response.text()
535
+ logger.error(f"请求失败: 状态码={response.status}, 错误={error_text}")
536
+
537
+ if attempt == self.config.MAX_RETRIES - 1:
538
+ return web.json_response(
539
+ {"error": {"message": error_text, "type": "api_error"}},
540
+ status=response.status
541
+ )
542
+
543
+ except Exception as e:
544
+ await self.token_manager.return_token(token, success=False)
545
+ logger.error(f"请求尝试 {attempt + 1} 失败: {str(e)}")
546
+
547
+ if attempt == self.config.MAX_RETRIES - 1:
548
+ return web.json_response(
549
+ {"error": {"message": "Request failed after retries", "type": "server_error"}},
550
+ status=500
551
+ )
552
+
553
+ return web.json_response(
554
+ {"error": {"message": "Max retries exceeded", "type": "server_error"}},
555
+ status=500
556
+ )
557
+
558
+ async def _handle_stream_response(self, original_request: web.Request, response: aiohttp.ClientResponse) -> web.Response:
559
+ """处理流式响应"""
560
+ stream_response = web.StreamResponse(
561
+ status=200,
562
+ headers={
563
+ 'Content-Type': 'text/event-stream',
564
+ 'Cache-Control': 'no-cache',
565
+ 'Connection': 'keep-alive',
566
+ 'Access-Control-Allow-Origin': '*'
567
+ }
568
+ )
569
+
570
+ await stream_response.prepare(original_request)
571
+ logger.info("开始处理流式响应")
572
+
573
+ try:
574
+ async for line in response.content:
575
+ logger.info(f"流式响应: {line}")
576
+ # 检查客户端是否断开连接
577
+ if original_request.transport is None or original_request.transport.is_closing():
578
+ logger.info("客户端在流式传输期间断开连接")
579
+ break
580
+
581
+ if not line:
582
+ continue
583
+
584
+ try:
585
+ line_str = line.decode('utf-8').strip()
586
+
587
+ # 处理SSE格式的数据
588
+ if line_str.startswith('data: '):
589
+ json_str = line_str[6:] # 移除 'data: ' 前缀
590
+
591
+ # 检查是否是结束标记
592
+ if json_str == '[DONE]':
593
+ continue # 跳过,在finally中发送自己的结束标记
594
+
595
+ # 解析JSON数据
596
+ openai_chunk = json.loads(json_str)
597
+ sse_line = f"data: {json.dumps(openai_chunk, ensure_ascii=False)}\n\n"
598
+ await stream_response.write(sse_line.encode('utf-8'))
599
+
600
+ except json.JSONDecodeError:
601
+ # 跳过无法解析的行
602
+ continue
603
+ except Exception as e:
604
+ logger.warning(f"处理流式数据时出错: {str(e)}")
605
+ continue
606
+
607
+ except Exception as e:
608
+ logger.error(f"流式响应错误: {str(e)}")
609
+ finally:
610
+ # 发送结束标记
611
+ await stream_response.write(b"data: [DONE]\n\n")
612
+ logger.info("流式响应处理完成")
613
+
614
+ return stream_response
615
+
616
+ async def _handle_non_stream_response(self, response: aiohttp.ClientResponse) -> web.Response:
617
+ """处理非流式响应"""
618
+ logger.info("开始处理非流式响应")
619
+ response_data = await response.json()
620
+ logger.info("非流式响应处理完成")
621
+ return web.json_response(response_data)
622
+
623
+ def _convert_chunk_to_openai_format(self, kilo_chunk: Dict[str, Any]) -> Dict[str, Any]:
624
+ """转换Kilo流式chunk为OpenAI格式"""
625
+ openai_chunk = {
626
+ "id": kilo_chunk.get("id", ""),
627
+ "object": "chat.completion.chunk",
628
+ "created": kilo_chunk.get("created", int(time.time())),
629
+ "model": kilo_chunk.get("model", "gpt-3.5-turbo"),
630
+ "choices": []
631
+ }
632
+
633
+ if "choices" in kilo_chunk:
634
+ for choice in kilo_chunk["choices"]:
635
+ openai_choice = {
636
+ "index": choice.get("index", 0),
637
+ "delta": {},
638
+ "finish_reason": choice.get("finish_reason")
639
+ }
640
+
641
+ if "delta" in choice:
642
+ delta = choice["delta"]
643
+ if "role" in delta:
644
+ openai_choice["delta"]["role"] = delta["role"]
645
+ if "content" in delta:
646
+ openai_choice["delta"]["content"] = delta["content"]
647
+
648
+ openai_chunk["choices"].append(openai_choice)
649
+
650
+ # 添加usage信息(如果存在)
651
+ if "usage" in kilo_chunk:
652
+ openai_chunk["usage"] = kilo_chunk["usage"]
653
+
654
+ return openai_chunk
655
+
656
+ def _convert_to_openai_format(self, kilo_response: Dict[str, Any]) -> Dict[str, Any]:
657
+ """转换Kilo响应为OpenAI格式"""
658
+ openai_response = {
659
+ "id": kilo_response.get("id", ""),
660
+ "object": "chat.completion",
661
+ "created": kilo_response.get("created", int(time.time())),
662
+ "model": kilo_response.get("model", "gpt-3.5-turbo"),
663
+ "choices": [],
664
+ "usage": kilo_response.get("usage", {})
665
+ }
666
+
667
+ if "choices" in kilo_response:
668
+ for choice in kilo_response["choices"]:
669
+ openai_choice = {
670
+ "index": choice.get("index", 0),
671
+ "message": choice.get("message", {}),
672
+ "finish_reason": choice.get("finish_reason", "stop")
673
+ }
674
+ openai_response["choices"].append(openai_choice)
675
+
676
+ return openai_response
677
+
678
+ class ModelListHandler:
679
+ """模型列表处理器"""
680
+
681
+ def __init__(self, config: ConfigManager):
682
+ self.config = config
683
+
684
+ async def handle_models(self, request: web.Request) -> web.Response:
685
+ """处理模型列表请求"""
686
+ models = []
687
+ current_time = int(time.time())
688
+
689
+ # 返回映射中���所有模型
690
+ for openai_model ,kilo_model in self.config.MODEL_MAPPING.items():
691
+ models.append({
692
+ "id": openai_model,
693
+ "object": "model",
694
+ "created": current_time,
695
+ "owned_by": "kilo-proxy",
696
+ "permission": [],
697
+ "root": openai_model,
698
+ "parent": None
699
+ })
700
+
701
+ logger.info(f"返回 {len(models)} 个可用模型")
702
+ return web.json_response({
703
+ "object": "list",
704
+ "data": models
705
+ })
706
+
707
+ class ProxyServer:
708
+ """代理服务器主类"""
709
+
710
+ def __init__(self):
711
+ self.config = ConfigManager()
712
+ self.message_processor = MessageProcessor(self.config)
713
+ self.token_manager = TokenManager(self.config)
714
+ self.request_handler = RequestHandler(self.config, self.message_processor, self.token_manager)
715
+ self.model_handler = ModelListHandler(self.config)
716
+ self.app = self._create_app()
717
+
718
+ def _create_app(self) -> web.Application:
719
+ """创建应用"""
720
+ app = web.Application()
721
+
722
+ # 添加路由
723
+ app.router.add_post('/v1/chat/completions', self.request_handler.handle_chat_completion)
724
+ app.router.add_get('/v1/models', self.model_handler.handle_models)
725
+
726
+ # 添加CORS中间件
727
+ app.middlewares.append(self._cors_middleware)
728
+
729
+ return app
730
+
731
+ async def _cors_middleware(self, app, handler):
732
+ """CORS中间件"""
733
+ async def middleware_handler(request):
734
+ if request.method == 'OPTIONS':
735
+ return web.Response(
736
+ headers={
737
+ 'Access-Control-Allow-Origin': '*',
738
+ 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
739
+ 'Access-Control-Allow-Headers': 'Content-Type, Authorization'
740
+ }
741
+ )
742
+
743
+ response = await handler(request)
744
+ response.headers['Access-Control-Allow-Origin'] = '*'
745
+ return response
746
+
747
+ return middleware_handler
748
+
749
+ async def start(self):
750
+ """启动服务器"""
751
+ runner = web.AppRunner(self.app)
752
+ await runner.setup()
753
+
754
+ site = web.TCPSite(runner, '0.0.0.0', self.config.PORT)
755
+ await site.start()
756
+
757
+ logger.info(f"Kilo代理服务器已启动 http://127.0.0.1:{self.config.PORT}")
758
+ logger.info(f"系统角色模式: {self.config.SYSTEM_ROLE_MODE.value}")
759
+ logger.info(f"可用token数量: {len(self.config.TOKEN_POOL)}")
760
+ logger.info(f"余额检测间隔: {self.config.BALANCE_CHECK_INTERVAL}秒")
761
+ logger.info(f"最小余额阈值: {self.config.MIN_BALANCE_THRESHOLD}")
762
+ logger.info(f"模型映射: {self.config.MODEL_MAPPING}")
763
+ logger.info(f"目标URL: {self.config.TARGET_URL}")
764
+ logger.info(f"余额检测URL: {self.config.BALANCE_CHECK_URL}")
765
+
766
+ # 启动余额检测后台任务
767
+ await self.token_manager.start_balance_checker()
768
+
769
+ # 保持服务器运行
770
+ try:
771
+ await asyncio.Future() # 永远等待
772
+ except KeyboardInterrupt:
773
+ logger.info("正在关闭服务器...")
774
+ finally:
775
+ await self.token_manager.stop_balance_checker()
776
+ await runner.cleanup()
777
+
778
+ def main():
779
+ """主函数"""
780
+ # 检查必要的环境变量
781
+ required_env_vars = ['API_KEY', 'TOKEN_POOL']
782
+ missing_vars = [var for var in required_env_vars if not os.getenv(var)]
783
+
784
+ if missing_vars:
785
+ logger.error(f"缺少必需的环境变量: {missing_vars}")
786
+ logger.error("请设置以下环境变量:")
787
+ logger.error("- API_KEY: 您的API密钥(多个用逗号分隔)")
788
+ logger.error("- TOKEN_POOL: Kilo token池(多个token用逗号分隔)")
789
+ sys.exit(1)
790
+
791
+ # 创建并启动服务器
792
+ server = ProxyServer()
793
+
794
+ try:
795
+ logger.info("正在启动Kilo代理服务器...")
796
+ asyncio.run(server.start())
797
+ except KeyboardInterrupt:
798
+ logger.info("服务器已被用户停止")
799
+ except Exception as e:
800
+ logger.error(f"服务器错误: {str(e)}")
801
+ sys.exit(1)
802
+
803
+ if __name__ == '__main__':
804
+ main()