dan92 commited on
Commit
d0d6353
·
verified ·
1 Parent(s): f58c29b

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +547 -0
app.py CHANGED
@@ -26,3 +26,550 @@ from concurrent.futures import TimeoutError
26
  # 新增导入
27
  import register_bot
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # 新增导入
27
  import register_bot
28
 
29
+ # Constants
30
+ CHAT_COMPLETION_CHUNK = 'chat.completion.chunk'
31
+ CHAT_COMPLETION = 'chat.completion'
32
+ CONTENT_TYPE_EVENT_STREAM = 'text/event-stream'
33
+ _BASE_URL = "https://chat.notdiamond.ai"
34
+ _API_BASE_URL = "https://spuckhogycrxcbomznwo.supabase.co"
35
+ _USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36'
36
+
37
+ # 从环境变量获取API密钥和特定URL
38
+ API_KEY = os.getenv('API_KEY')
39
+ _PASTE_API_URL = os.getenv('PASTE_API_URL')
40
+ _PASTE_API_PASSWORD = os.getenv('PASTE_API_PASSWORD')
41
+
42
+ if not API_KEY:
43
+ raise ValueError("API_KEY environment variable must be set")
44
+
45
+ if not _PASTE_API_URL:
46
+ raise ValueError("PASTE_API_URL environment variable must be set")
47
+
48
+ # 创建 Flask 应用
49
+ app = Flask(__name__)
50
+ logging.basicConfig(level=logging.INFO)
51
+ logger = logging.getLogger(__name__)
52
+ CORS(app, resources={r"/*": {"origins": "*"}})
53
+ executor = ThreadPoolExecutor(max_workers=10)
54
+
55
+ proxy_url = os.getenv('PROXY_URL')
56
+ NOTDIAMOND_IP = os.getenv('NOTDIAMOND_IP')
57
+ NOTDIAMOND_DOMAIN = os.getenv('NOTDIAMOND_DOMAIN')
58
+
59
+ if not NOTDIAMOND_IP:
60
+ logger.error("NOTDIAMOND_IP environment variable is not set!")
61
+ raise ValueError("NOTDIAMOND_IP must be set")
62
+
63
+ # 其他代码保持不变...
64
+
65
+ @app.route('/', methods=['GET'])
66
+ def root():
67
+ return jsonify({
68
+ "service": "AI Chat Completion Proxy",
69
+ "usage": {
70
+ "endpoint": "/ai/v1/chat/completions",
71
+ "method": "POST",
72
+ "headers": {
73
+ "Authorization": "Bearer YOUR_API_KEY"
74
+ },
75
+ "body": {
76
+ "model": "One of: " + ", ".join(MODEL_INFO.keys()),
77
+ "messages": [
78
+ {"role": "system", "content": "You are a helpful assistant."},
79
+ {"role": "user", "content": "Hello, who are you?"}
80
+ ],
81
+ "stream": False,
82
+ "temperature": 0.7
83
+ }
84
+ },
85
+ "availableModels": list(MODEL_INFO.keys()),
86
+ "note": "API key authentication is required for other endpoints."
87
+ })
88
+
89
+ # 为了兼容 Flask CLI 和 Gunicorn,修改启动逻辑
90
+ if __name__ != "__main__":
91
+ health_check_thread = threading.Thread(target=health_check, daemon=True)
92
+ health_check_thread.start()
93
+
94
+ if __name__ == "__main__":
95
+ health_check_thread = threading.Thread(target=health_check, daemon=True)
96
+ health_check_thread.start()
97
+
98
+ port = int(os.environ.get("PORT", 3000))
99
+ app.run(debug=False, host='0.0.0.0', port=port, threaded=True)
100
+
101
+ # API密钥验证装饰器
102
+ def require_api_key(f):
103
+ @wraps(f)
104
+ def decorated_function(*args, **kwargs):
105
+ auth_header = request.headers.get('Authorization')
106
+ if not auth_header:
107
+ return jsonify({'error': 'No API key provided'}), 401
108
+
109
+ try:
110
+ # 从 Bearer token 中提取API密钥
111
+ provided_key = auth_header.split('Bearer ')[-1].strip()
112
+ if provided_key != API_KEY:
113
+ return jsonify({'error': 'Invalid API key'}), 401
114
+ except Exception:
115
+ return jsonify({'error': 'Invalid Authorization header format'}), 401
116
+
117
+ return f(*args, **kwargs)
118
+ return decorated_function
119
+
120
+ refresh_token_cache = TTLCache(maxsize=1000, ttl=3600)
121
+ headers_cache = TTLCache(maxsize=1, ttl=3600) # 1小时过期
122
+ token_refresh_lock = threading.Lock()
123
+
124
+ # 自定义连接函数
125
+ def patched_create_connection(address, *args, **kwargs):
126
+ host, port = address
127
+ if host == NOTDIAMOND_DOMAIN:
128
+ logger.info(f"Connecting to {NOTDIAMOND_DOMAIN} using IP: {NOTDIAMOND_IP}")
129
+ return create_connection((NOTDIAMOND_IP, port), *args, **kwargs)
130
+ return create_connection(address, *args, **kwargs)
131
+
132
+ # 替换 urllib3 的默认连接函数
133
+ urllib3.util.connection.create_connection = patched_create_connection
134
+
135
+ # 自定义 HTTPAdapter
136
+ class CustomHTTPAdapter(HTTPAdapter):
137
+ def init_poolmanager(self, *args, **kwargs):
138
+ kwargs['socket_options'] = kwargs.get('socket_options', [])
139
+ kwargs['socket_options'] += [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
140
+ return super(CustomHTTPAdapter, self).init_poolmanager(*args, **kwargs)
141
+
142
+ # 创建自定义的 Session
143
+ def create_custom_session():
144
+ session = requests.Session()
145
+ adapter = CustomHTTPAdapter()
146
+ session.mount('https://', adapter)
147
+ session.mount('http://', adapter)
148
+ return session
149
+
150
+ # 添加速率限制相关的常量
151
+ AUTH_RETRY_DELAY = 60 # 认证重试延迟(秒)
152
+ AUTH_BACKOFF_FACTOR = 2 # 退避因子
153
+ AUTH_MAX_RETRIES = 3 # 最大重试次数
154
+ AUTH_CHECK_INTERVAL = 300 # 健康检查间隔(秒)
155
+ AUTH_RATE_LIMIT_WINDOW = 3600 # 速率限制窗口(秒)
156
+ AUTH_MAX_REQUESTS = 100 # 每个窗口最大请求数
157
+
158
+ class AuthManager:
159
+ def __init__(self, email: str, password: str):
160
+ self._email: str = email
161
+ self._password: str = password
162
+ self._max_retries: int = 3
163
+ self._retry_delay: int = 1
164
+ self._api_key: str = ""
165
+ self._user_info: Dict[str, Any] = {}
166
+ self._refresh_token: str = ""
167
+ self._access_token: str = ""
168
+ self._token_expiry: float = 0
169
+ self._session: requests.Session = create_custom_session()
170
+ self._logger: logging.Logger = logging.getLogger(__name__)
171
+ self.model_status = {model: True for model in MODEL_INFO.keys()}
172
+ self.last_successful_index = 0
173
+ self.last_success_date = datetime.now().date()
174
+
175
+ def get_next_auth_manager(self, model):
176
+ """改进的账号选择逻辑,优先使用上次成功的账号"""
177
+ current_date = datetime.now().date()
178
+
179
+ # 如果是新的一天,重置状态并从第一个账号开始
180
+ if current_date > self.last_success_date:
181
+ self.current_index = 0
182
+ self.last_successful_index = 0
183
+ self.last_success_date = current_date
184
+ self.reset_all_model_status()
185
+ return self.auth_managers[0] if self.auth_managers else None
186
+
187
+ # 优先使用上次成功的账号
188
+ auth_manager = self.auth_managers[self.last_successful_index]
189
+ if auth_manager.is_model_available(model) and auth_manager._should_attempt_auth():
190
+ return auth_manager
191
+
192
+ # 如果上次成功的账号不可用,才开始轮询其他账号
193
+ start_index = (self.last_successful_index + 1) % len(self.auth_managers)
194
+ current = start_index
195
+
196
+ while current != self.last_successful_index:
197
+ auth_manager = self.auth_managers[current]
198
+ if auth_manager.is_model_available(model) and auth_manager._should_attempt_auth():
199
+ self.last_successful_index = current
200
+ return auth_manager
201
+ current = (current + 1) % len(self.auth_managers)
202
+
203
+ return None
204
+
205
+ def update_last_successful(self, index):
206
+ """更新最后一次成功使用的账号索引"""
207
+ self.last_successful_index = index
208
+ self.last_success_date = datetime.now().date()
209
+
210
+ # ... (其他 AuthManager 方法保持不变)
211
+
212
+ MODEL_INFO = {
213
+ "gpt-4o-mini": {"provider": "openai", "mapping": "gpt-4o-mini"},
214
+ "gpt-4o": {"provider": "openai", "mapping": "gpt-4o"},
215
+ "gpt-4-turbo": {"provider": "openai", "mapping": "gpt-4-turbo-2024-04-09"},
216
+ "chatgpt-4o-latest": {"provider": "openai", "mapping": "chatgpt-4o-latest"},
217
+ "gemini-1.5-pro-latest": {"provider": "google", "mapping": "models/gemini-1.5-pro-latest"},
218
+ "gemini-1.5-flash-latest": {"provider": "google", "mapping": "models/gemini-1.5-flash-latest"},
219
+ "llama-3.1-70b-instruct": {"provider": "togetherai", "mapping": "meta.llama3-1-70b-instruct-v1:0"},
220
+ "llama-3.1-405b-instruct": {"provider": "togetherai", "mapping": "meta.llama3-1-405b-instruct-v1:0"},
221
+ "claude-3-5-sonnet-20241022": {"provider": "anthropic", "mapping": "anthropic.claude-3-5-sonnet-20241022-v2:0"},
222
+ "claude-3-5-haiku-20241022": {"provider": "anthropic", "mapping": "anthropic.claude-3-5-haiku-20241022-v1:0"},
223
+ "perplexity": {"provider": "perplexity", "mapping": "llama-3.1-sonar-large-128k-online"},
224
+ "mistral-large-2407": {"provider": "mistral", "mapping": "mistral.mistral-large-2407-v1:0"}
225
+ }
226
+
227
+ def stream_notdiamond_response(response, model):
228
+ """改进的流式响应处理,添加超时处理和错误恢复"""
229
+ buffer = ""
230
+ full_content = ""
231
+ last_activity = time.time()
232
+ timeout = 30 # 设置单个块的超时时间
233
+
234
+ try:
235
+ for chunk in response.iter_content(chunk_size=1024):
236
+ current_time = time.time()
237
+
238
+ # 检查是否超时
239
+ if current_time - last_activity > timeout:
240
+ logger.warning("Stream response timeout, sending partial content")
241
+ if full_content:
242
+ final_chunk = create_openai_chunk('', model, 'timeout')
243
+ if 'choices' in final_chunk and final_chunk['choices']:
244
+ final_chunk['choices'][0]['context'] = full_content
245
+ yield final_chunk
246
+ return
247
+
248
+ if chunk:
249
+ try:
250
+ new_content = chunk.decode('utf-8')
251
+ buffer += new_content
252
+ full_content += new_content
253
+
254
+ chunk_data = create_openai_chunk(new_content, model)
255
+
256
+ if 'choices' in chunk_data and chunk_data['choices']:
257
+ chunk_data['choices'][0]['delta']['content'] = new_content
258
+ chunk_data['choices'][0]['context'] = full_content
259
+
260
+ yield chunk_data
261
+ last_activity = current_time
262
+
263
+ except Exception as e:
264
+ logger.error(f"Error processing chunk: {e}")
265
+ continue
266
+
267
+ final_chunk = create_openai_chunk('', model, 'stop')
268
+ if 'choices' in final_chunk and final_chunk['choices']:
269
+ final_chunk['choices'][0]['context'] = full_content
270
+ yield final_chunk
271
+
272
+ except Exception as e:
273
+ logger.error(f"Stream response error: {e}")
274
+ error_chunk = create_openai_chunk('', model, 'error')
275
+ if 'choices' in error_chunk and error_chunk['choices']:
276
+ error_chunk['choices'][0]['context'] = full_content
277
+ yield error_chunk
278
+
279
+ def make_request(payload, auth_manager, model_id):
280
+ """改进的请求处理,添加超时控制"""
281
+ global multi_auth_manager
282
+ max_retries = 3
283
+ retry_delay = 1
284
+ request_timeout = 30 # 设置请求超时时间
285
+
286
+ logger.info(f"尝试发送请求,模型:{model_id}")
287
+
288
+ # ... (其他代码保持不变)
289
+
290
+ while len(tried_accounts) < len(multi_auth_manager.auth_managers):
291
+ auth_manager = multi_auth_manager.get_next_auth_manager(model_id)
292
+ if not auth_manager:
293
+ break
294
+
295
+ if auth_manager._email in tried_accounts:
296
+ continue
297
+
298
+ tried_accounts.add(auth_manager._email)
299
+ logger.info(f"尝试使用账号 {auth_manager._email}")
300
+
301
+ for attempt in range(max_retries):
302
+ try:
303
+ url = get_notdiamond_url()
304
+ headers = get_notdiamond_headers(auth_manager)
305
+
306
+ response = executor.submit(
307
+ requests.post,
308
+ url,
309
+ headers=headers,
310
+ json=payload,
311
+ stream=True,
312
+ timeout=request_timeout
313
+ ).result(timeout=request_timeout)
314
+
315
+ if response.status_code == 200 and response.headers.get('Content-Type') == 'text/event-stream':
316
+ logger.info(f"请求成功,使用账号 {auth_manager._email}")
317
+ current_index = multi_auth_manager.auth_managers.index(auth_manager)
318
+ multi_auth_manager.update_last_successful(current_index)
319
+ return response
320
+
321
+ except (requests.Timeout, concurrent.futures.TimeoutError) as e:
322
+ logger.error(f"Request timeout for account {auth_manager._email}: {e}")
323
+ break
324
+ except Exception as e:
325
+ logger.error(f"Request attempt {attempt + 1} failed for account {auth_manager._email}: {e}")
326
+ if attempt < max_retries - 1:
327
+ time.sleep(retry_delay)
328
+ continue
329
+
330
+ def health_check():
331
+ """改进的健康检查函数,每60秒只检查一个账号"""
332
+ check_index = 0
333
+ last_check_date = datetime.now().date()
334
+
335
+ while True:
336
+ try:
337
+ if multi_auth_manager:
338
+ current_date = datetime.now().date()
339
+
340
+ # 如果是新的一天,重置检查索引
341
+ if current_date > last_check_date:
342
+ check_index = 0
343
+ last_check_date = current_date
344
+ logger.info("New day started, resetting health check index")
345
+ continue
346
+
347
+ # 只检查一个账号
348
+ if check_index < len(multi_auth_manager.auth_managers):
349
+ auth_manager = multi_auth_manager.auth_managers[check_index]
350
+ email = auth_manager._email
351
+
352
+ if auth_manager._should_attempt_auth():
353
+ if not auth_manager.ensure_valid_token():
354
+ logger.warning(f"Auth token validation failed during health check for {email}")
355
+ auth_manager.clear_auth()
356
+ else:
357
+ logger.info(f"Health check passed for {email}")
358
+ else:
359
+ logger.info(f"Skipping health check for {email} due to rate limiting")
360
+
361
+ # 更新检查索引
362
+ check_index = (check_index + 1) % len(multi_auth_manager.auth_managers)
363
+
364
+ # 在每天午夜重置所有账号的模型使用状态
365
+ current_time_local = time.localtime()
366
+ if current_time_local.tm_hour == 0 and current_time_local.tm_min == 0:
367
+ multi_auth_manager.reset_all_model_status()
368
+ logger.info("Reset model status for all accounts")
369
+
370
+ except Exception as e:
371
+ logger.error(f"Health check error: {e}")
372
+
373
+ sleep(60) # 每60秒检查一个账号
374
+
375
+ def generate_system_fingerprint():
376
+ """生成并返回唯一的系统指纹。"""
377
+ return f"fp_{uuid.uuid4().hex[:10]}"
378
+
379
+ def create_openai_chunk(content, model, finish_reason=None, usage=None):
380
+ """创建OpenAI格式的响应块。"""
381
+ chunk = {
382
+ "id": f"chatcmpl-{uuid.uuid4()}",
383
+ "object": CHAT_COMPLETION_CHUNK,
384
+ "created": int(time.time()),
385
+ "model": model,
386
+ "system_fingerprint": generate_system_fingerprint(),
387
+ "choices": [
388
+ {
389
+ "index": 0,
390
+ "delta": {"content": content} if content else {},
391
+ "logprobs": None,
392
+ "finish_reason": finish_reason
393
+ }
394
+ ]
395
+ }
396
+
397
+ if usage is not None:
398
+ chunk["usage"] = usage
399
+
400
+ return chunk
401
+
402
+ def count_tokens(text, model="gpt-3.5-turbo-0301"):
403
+ """计算给定文本的令牌数量。"""
404
+ try:
405
+ return len(tiktoken.encoding_for_model(model).encode(text))
406
+ except KeyError:
407
+ return len(tiktoken.get_encoding("cl100k_base").encode(text))
408
+
409
+ def count_message_tokens(messages, model="gpt-3.5-turbo-0301"):
410
+ """计算消息列表中的总令牌数量。"""
411
+ return sum(count_tokens(str(message), model) for message in messages)
412
+
413
+ NOTDIAMOND_URLS = os.getenv('NOTDIAMOND_URLS', 'https://not-diamond-workers.t7-cc4.workers.dev/stream-message').split(',')
414
+
415
+ def get_notdiamond_url():
416
+ """随机选择并返回一个 notdiamond URL。"""
417
+ return random.choice(NOTDIAMOND_URLS)
418
+
419
+ def get_notdiamond_headers(auth_manager):
420
+ """返回用于 notdiamond API 请求的头信息。"""
421
+ cache_key = f'notdiamond_headers_{auth_manager.get_jwt_value()}'
422
+
423
+ try:
424
+ return headers_cache[cache_key]
425
+ except KeyError:
426
+ headers = {
427
+ 'accept': 'text/event-stream',
428
+ 'accept-language': 'zh-CN,zh;q=0.9',
429
+ 'content-type': 'application/json',
430
+ 'user-agent': _USER_AGENT,
431
+ 'authorization': f'Bearer {auth_manager.get_jwt_value()}'
432
+ }
433
+ headers_cache[cache_key] = headers
434
+ return headers
435
+
436
+ def generate_stream_response(response, model, prompt_tokens):
437
+ """生成流式 HTTP 响应。"""
438
+ total_completion_tokens = 0
439
+
440
+ for chunk in stream_notdiamond_response(response, model):
441
+ content = chunk['choices'][0]['delta'].get('content', '')
442
+ total_completion_tokens += count_tokens(content, model)
443
+
444
+ chunk['usage'] = {
445
+ "prompt_tokens": prompt_tokens,
446
+ "completion_tokens": total_completion_tokens,
447
+ "total_tokens": prompt_tokens + total_completion_tokens
448
+ }
449
+
450
+ yield f"data: {json.dumps(chunk)}\n\n"
451
+
452
+ yield "data: [DONE]\n\n"
453
+
454
+ def handle_non_stream_response(response, model, prompt_tokens):
455
+ """处理非流式响应。"""
456
+ full_content = ""
457
+ try:
458
+ for chunk in response.iter_content(chunk_size=1024):
459
+ if chunk:
460
+ content = chunk.decode('utf-8')
461
+ full_content += content
462
+
463
+ completion_tokens = count_tokens(full_content, model)
464
+ total_tokens = prompt_tokens + completion_tokens
465
+
466
+ response_data = {
467
+ "id": f"chatcmpl-{uuid.uuid4()}",
468
+ "object": CHAT_COMPLETION,
469
+ "created": int(time.time()),
470
+ "model": model,
471
+ "system_fingerprint": generate_system_fingerprint(),
472
+ "choices": [
473
+ {
474
+ "index": 0,
475
+ "message": {
476
+ "role": "assistant",
477
+ "content": full_content
478
+ },
479
+ "finish_reason": "stop"
480
+ }
481
+ ],
482
+ "usage": {
483
+ "prompt_tokens": prompt_tokens,
484
+ "completion_tokens": completion_tokens,
485
+ "total_tokens": total_tokens
486
+ }
487
+ }
488
+
489
+ return jsonify(response_data)
490
+
491
+ except Exception as e:
492
+ logger.error(f"Error processing non-stream response: {e}")
493
+ raise
494
+
495
+ @app.route('/ai/v1/chat/completions', methods=['POST'])
496
+ @require_api_key
497
+ def handle_request():
498
+ """处理聊天完成请求的主路由。"""
499
+ global multi_auth_manager
500
+ if not multi_auth_manager:
501
+ return jsonify({'error': 'Unauthorized'}), 401
502
+
503
+ try:
504
+ request_data = request.get_json()
505
+ model_id = request_data.get('model', '')
506
+
507
+ auth_manager = multi_auth_manager.ensure_valid_token(model_id)
508
+ if not auth_manager:
509
+ return jsonify({'error': 'No available accounts for this model'}), 403
510
+
511
+ stream = request_data.get('stream', False)
512
+ prompt_tokens = count_message_tokens(
513
+ request_data.get('messages', []),
514
+ model_id
515
+ )
516
+
517
+ payload = {
518
+ 'model': MODEL_INFO[model_id]['mapping'],
519
+ 'messages': request_data.get('messages', []),
520
+ 'temperature': request_data.get('temperature', 1),
521
+ 'max_tokens': request_data.get('max_tokens'),
522
+ 'presence_penalty': request_data.get('presence_penalty'),
523
+ 'frequency_penalty': request_data.get('frequency_penalty'),
524
+ 'top_p': request_data.get('top_p', 1),
525
+ }
526
+
527
+ response = make_request(payload, auth_manager, model_id)
528
+
529
+ if stream:
530
+ return Response(
531
+ stream_with_context(generate_stream_response(response, model_id, prompt_tokens)),
532
+ content_type=CONTENT_TYPE_EVENT_STREAM
533
+ )
534
+ else:
535
+ return handle_non_stream_response(response, model_id, prompt_tokens)
536
+
537
+ except requests.RequestException as e:
538
+ logger.error(f"Request error: {e}")
539
+ return jsonify({
540
+ 'error': {
541
+ 'message': 'Error communicating with the API',
542
+ 'type': 'api_error',
543
+ 'details': str(e)
544
+ }
545
+ }), 503
546
+ except Exception as e:
547
+ logger.error(f"Unexpected error: {e}")
548
+ return jsonify({
549
+ 'error': {
550
+ 'message': 'Internal Server Error',
551
+ 'type': 'server_error',
552
+ 'details': str(e)
553
+ }
554
+ }), 500
555
+
556
+ @app.route('/ai/v1/models', methods=['GET'])
557
+ @require_api_key
558
+ def list_models():
559
+ """返回可用模型列表。"""
560
+ models = [
561
+ {
562
+ "id": model_id,
563
+ "object": "model",
564
+ "created": int(time.time()),
565
+ "owned_by": "notdiamond",
566
+ "permission": [],
567
+ "root": model_id,
568
+ "parent": None,
569
+ } for model_id in MODEL_INFO.keys()
570
+ ]
571
+ return jsonify({
572
+ "object": "list",
573
+ "data": models
574
+ })
575
+