Calmlo commited on
Commit
8f3811c
·
verified ·
1 Parent(s): 46da92e

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +347 -244
server.js CHANGED
@@ -1,46 +1,132 @@
1
  import express from 'express';
2
  import { fal } from '@fal-ai/client';
3
 
4
- // --- Key Management ---
5
- const FAL_KEY_STRING = process.env.FAL_KEY;
6
- const API_KEY = process.env.API_KEY; // 自定义 API Key 环境变量保持不变
7
 
8
- if (!FAL_KEY_STRING) {
9
- console.error("Error: FAL_KEY environment variable is not set.");
10
  process.exit(1);
11
  }
12
 
13
- // 解析 FAL_KEY 字符串为数组,去除空白并过滤空值
14
- const falKeys = FAL_KEY_STRING.split(',')
15
- .map(key => key.trim())
16
- .filter(key => key.length > 0);
 
 
 
 
 
 
 
 
 
 
17
 
18
  if (falKeys.length === 0) {
19
- console.error("Error: FAL_KEY environment variable is set, but no valid keys were found after parsing.");
20
  process.exit(1);
21
  }
22
 
23
- console.log(`Loaded ${falKeys.length} Fal AI Keys initially.`);
24
-
25
- // 不再需要 currentFalKeyIndex
26
- // let currentFalKeyIndex = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- // --- End Key Management ---
 
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- if (!API_KEY) {
32
- console.error("Error: API_KEY environment variable is not set.");
33
- process.exit(1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
 
 
36
  const app = express();
37
  app.use(express.json({ limit: '50mb' }));
38
  app.use(express.urlencoded({ extended: true, limit: '50mb' }));
39
 
40
  const PORT = process.env.PORT || 3000;
41
 
42
- // API Key 鉴权中间件 (保持不变)
43
  const apiKeyAuth = (req, res, next) => {
 
44
  const authHeader = req.headers['authorization'];
45
 
46
  if (!authHeader) {
@@ -63,16 +149,15 @@ const apiKeyAuth = (req, res, next) => {
63
  next();
64
  };
65
 
66
- // 应用 API Key 鉴权中间件到所有 API 路由 (保持不变)
67
  app.use(['/v1/models', '/v1/chat/completions'], apiKeyAuth);
68
 
69
- // === 全局定义限制 === (保持不变)
70
  const PROMPT_LIMIT = 4800;
71
  const SYSTEM_PROMPT_LIMIT = 4800;
72
  // === 限制定义结束 ===
73
 
74
- // 定义 fal-ai/any-llm 支持的模型列表 (保持不变)
75
- const FAL_SUPPORTED_MODELS = [
76
  "anthropic/claude-3.7-sonnet",
77
  "anthropic/claude-3.5-sonnet",
78
  "anthropic/claude-3-5-haiku",
@@ -92,16 +177,16 @@ const FAL_SUPPORTED_MODELS = [
92
  "meta-llama/llama-4-scout"
93
  ];
94
 
95
- // Helper function to get owner from model ID (保持不变)
96
- const getOwner = (modelId) => {
97
  if (modelId && modelId.includes('/')) {
98
  return modelId.split('/')[0];
99
  }
100
  return 'fal-ai';
101
- }
102
 
103
- // GET /v1/models endpoint (保持不变)
104
- app.get('/v1/models', (req, res) => {
105
  console.log("Received request for GET /v1/models");
106
  try {
107
  const modelsData = FAL_SUPPORTED_MODELS.map(modelId => ({
@@ -115,11 +200,11 @@ app.get('/v1/models', (req, res) => {
115
  }
116
  });
117
 
118
- // === convertMessagesToFalPrompt 函数 (保持不变) ===
119
- function convertMessagesToFalPrompt(messages) {
120
  let fixed_system_prompt_content = "";
121
  const conversation_message_blocks = [];
122
- // console.log(`Original messages count: ${messages.length}`); // Reduced logging verbosity
123
 
124
  // 1. 分离 System 消息,格式化 User/Assistant 消息
125
  for (const message of messages) {
@@ -204,8 +289,12 @@ function convertMessagesToFalPrompt(messages) {
204
  // 5. *** 组合最终的 prompt 和 system_prompt (包含分隔符逻辑) ***
205
  const system_prompt_history_content = system_prompt_history_blocks.join('').trim();
206
  const final_prompt = prompt_history_blocks.join('').trim();
 
 
207
  const SEPARATOR = "\n\n-------下面是比较早之前的对话内容-----\n\n";
 
208
  let final_system_prompt = "";
 
209
  const hasFixedSystem = fixed_system_prompt_content.length > 0;
210
  const hasSystemHistory = system_prompt_history_content.length > 0;
211
 
@@ -220,259 +309,273 @@ function convertMessagesToFalPrompt(messages) {
220
  // console.log("Using only history in system prompt slot.");
221
  }
222
 
 
223
  const result = {
224
  system_prompt: final_system_prompt,
225
  prompt: final_prompt
226
  };
227
 
228
- // console.log(`Final system_prompt length (Sys+Separator+Hist): ${result.system_prompt.length}`);
229
- // console.log(`Final prompt length (Hist): ${result.prompt.length}`);
230
 
231
  return result;
232
  }
233
  // === convertMessagesToFalPrompt 函数结束 ===
234
 
235
 
236
- // POST /v1/chat/completions endpoint (随机 Key + 失败排除)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  app.post('/v1/chat/completions', async (req, res) => {
238
  const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body;
239
- const requestId = `req-${Date.now()}`;
240
 
241
- console.log(`[${requestId}] Received chat completion request for model: ${model}, stream: ${stream}. Strategy: Random key with exclusion.`);
242
 
243
  if (!FAL_SUPPORTED_MODELS.includes(model)) {
244
- console.warn(`[${requestId}] Warning: Requested model '${model}' is not in the explicitly supported list.`);
 
245
  }
246
  if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
247
- console.error(`[${requestId}] Invalid request parameters:`, { model, messages: Array.isArray(messages) ? messages.length : typeof messages });
248
  return res.status(400).json({ error: 'Missing or invalid parameters: model and messages array are required.' });
249
  }
250
 
251
- let lastError = null;
252
- let success = false;
253
- let attempt = 0;
254
- const maxAttempts = falKeys.length; // Safety limit
255
-
256
- // *** 為此請求創建可用的 Key 列表副本 ***
257
- let availableKeysForRequest = [...falKeys];
258
-
259
- // 准备 Fal Input (只需要准备一次)
260
- const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
261
- const falInput = {
262
- model: model,
263
- prompt: prompt,
264
- ...(system_prompt && { system_prompt: system_prompt }),
265
- reasoning: !!reasoning,
266
- };
267
- console.log(`[${requestId}] Fal Input prepared. System Prompt Length: ${system_prompt?.length || 0}, Prompt Length: ${prompt?.length || 0}`);
268
-
269
-
270
- // *** 重试循环:只要还有可用的 Key 且未达最大尝试次数 ***
271
- while (availableKeysForRequest.length > 0 && attempt < maxAttempts) {
272
- attempt++;
273
- // *** 隨機選擇一個 Key ***
274
- const randomIndex = Math.floor(Math.random() * availableKeysForRequest.length);
275
- const selectedFalKey = availableKeysForRequest[randomIndex];
276
- // Mask key in logs for security
277
- const maskedKey = selectedFalKey.length > 8 ? `${selectedFalKey.substring(0, 4)}...${selectedFalKey.substring(selectedFalKey.length - 4)}` : selectedFalKey;
278
- console.log(`[${requestId}] Attempt ${attempt}/${maxAttempts}: Trying random key (masked: ${maskedKey}). ${availableKeysForRequest.length} keys available.`);
279
-
280
- try {
281
- fal.config({ credentials: selectedFalKey });
282
-
283
- if (stream) {
284
- // --- 流式处理 ---
285
- res.setHeader('Content-Type', 'text/event-stream; charset=utf-8');
286
- res.setHeader('Cache-Control', 'no-cache');
287
- res.setHeader('Connection', 'keep-alive');
288
- res.setHeader('Access-Control-Allow-Origin', '*');
289
-
290
- let previousOutput = '';
291
- let firstEventProcessed = false;
292
- let streamFailedMidway = false;
293
- let keyConfirmedWorking = false;
294
-
295
- const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
296
-
297
- try {
298
- for await (const event of falStream) {
299
- const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
300
- const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
301
- const errorInfo = (event && event.error) ? event.error : null;
302
- const eventStatus = errorInfo?.status;
303
-
304
- if (errorInfo) {
305
- console.warn(`[${requestId}] Error in stream event (Key: ${maskedKey}):`, errorInfo);
306
- lastError = errorInfo;
307
-
308
- if (!firstEventProcessed && (eventStatus === 401 || eventStatus === 403 || eventStatus === 429)) {
309
- console.warn(`[${requestId}] Key ${maskedKey} failed (${eventStatus}) on first event. Excluding it for this request.`);
310
- availableKeysForRequest.splice(randomIndex, 1); // 从可用列表移除
311
- console.log(`[${requestId}] ${availableKeysForRequest.length} keys remaining for this request.`);
312
- break; // 中断内部循环,外部循环将尝试下一个随机 key
313
- } else {
314
- console.error(`[${requestId}] Unrecoverable stream error or error after stream start (Key: ${maskedKey}).`);
315
- streamFailedMidway = true;
316
- if (!res.headersSent) {
317
- res.status(500).json({ object: "error", message: `Fal Stream Error: ${JSON.stringify(errorInfo)}`, type:"fal_stream_error"});
318
- } else if (!res.writableEnded) {
319
- const errorChunk = { id: `chatcmpl-${Date.now()}-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Fal Stream Error: ${JSON.stringify(errorInfo)}` } }] };
320
- res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
321
- }
322
- break; // 中断内部循环
323
- }
324
- }
325
-
326
- if (!keyConfirmedWorking && !errorInfo) {
327
- success = true;
328
- keyConfirmedWorking = true;
329
- console.log(`[${requestId}] Key ${maskedKey} confirmed working (stream).`);
330
- if (!res.headersSent) {
331
- res.flushHeaders();
332
- console.log(`[${requestId}] Stream headers flushed.`);
333
- }
334
- firstEventProcessed = true;
335
- }
336
-
337
- if (!errorInfo) {
338
- let deltaContent = '';
339
- if (currentOutput.startsWith(previousOutput)) {
340
- deltaContent = currentOutput.substring(previousOutput.length);
341
- } else if (currentOutput.length > 0) {
342
- deltaContent = currentOutput; previousOutput = '';
343
- }
344
- previousOutput = currentOutput;
345
-
346
- if (deltaContent || !isPartial) {
347
- const openAIChunk = { id: `chatcmpl-${Date.now()}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: { content: deltaContent }, finish_reason: isPartial === false ? "stop" : null }] };
348
- if (!res.writableEnded) {
349
- res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
350
- }
351
- }
352
- }
353
- } // End `for await...of` loop
354
-
355
- if (streamFailedMidway) {
356
- if (!res.writableEnded) {
357
- res.write(`data: [DONE]\n\n`); res.end();
358
- }
359
- break; // 中断外部循环,因为流在中途失败
360
- } else if (keyConfirmedWorking) {
361
- if (!res.writableEnded) {
362
- res.write(`data: [DONE]\n\n`); res.end();
363
- }
364
- break; // 中断外部循环,因为成功
365
  }
366
- // 如果内部循环因为首个事件是 key 错误而中断,外部循环会继续
367
-
368
- } catch (streamProcessingError) {
369
- console.error(`[${requestId}] Error during fal stream processing loop logic (Key: ${maskedKey}):`, streamProcessingError);
370
- lastError = streamProcessingError;
371
- if (!res.headersSent) {
372
- res.status(500).json({ object: "error", message: `Proxy Stream Processing Error: ${streamProcessingError.message}`, type:"proxy_internal_error"});
373
- } else if (!res.writableEnded) {
374
- try {
375
- res.write(`data: ${JSON.stringify({ error: { message: "Proxy Stream processing error", type: "proxy_internal_error", details: streamProcessingError.message } })}\n\n`);
376
- res.write(`data: [DONE]\n\n`); res.end();
377
- } catch (finalError) { if (!res.writableEnded) { res.end(); } }
378
  }
379
- break; // 中断外部循环
380
- }
381
 
382
- // 如果流成功或中途失败,外部循环会 break;如果因首个 key 错误而中断内部循环,则外部循环继续
383
- if (success || streamFailedMidway) {
384
- break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  }
386
-
387
-
388
- } else {
389
- // --- 非流式处理 ---
390
- console.log(`[${requestId}] Executing non-stream request (Key: ${maskedKey})...`);
391
- const result = await fal.subscribe("fal-ai/any-llm", { input: falInput, logs: true });
392
-
393
- if (result && result.error) {
394
- console.error(`[${requestId}] Fal-ai returned a business error (Key: ${maskedKey}):`, result.error);
395
- lastError = new Error(`Fal-ai error: ${JSON.stringify(result.error)}`);
396
- lastError.status = result.status || 500;
397
- lastError.type = "fal_ai_error";
398
- break; // 业务错误,中断重试
399
- }
400
-
401
- console.log(`[${requestId}] Received non-stream result (Key: ${maskedKey}).`);
402
- success = true; // 标记成功
403
- const openAIResponse = {
404
- id: `chatcmpl-${result.requestId || Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: model,
405
- choices: [{ index: 0, message: { role: "assistant", content: result.output || "" }, finish_reason: "stop" }],
406
- usage: { prompt_tokens: null, completion_tokens: null, total_tokens: null },
407
- system_fingerprint: null,
408
- ...(result.reasoning && { fal_reasoning: result.reasoning }),
409
- };
410
- res.json(openAIResponse);
411
- break; // 成功,中断外部循环
412
  }
413
 
414
- } catch (error) {
415
- // Catch errors from fal.config, fal.stream/subscribe setup
416
- lastError = error;
417
- const status = error?.status;
418
- const errorMessage = error?.body?.detail || error?.message || 'Unknown setup error';
419
- console.warn(`[${requestId}] Attempt ${attempt} with key ${maskedKey} failed during setup. Status: ${status || 'N/A'}, Message: ${errorMessage}`);
420
- // console.error("Setup Error details:", error); // Log full error for debug if needed
421
-
422
- if (status === 401 || status === 403 || status === 429) {
423
- console.warn(`[${requestId}] Key ${maskedKey} failed (${status}) during setup. Excluding it for this request.`);
424
- availableKeysForRequest.splice(randomIndex, 1); // 从可用列表移除
425
- console.log(`[${requestId}] ${availableKeysForRequest.length} keys remaining for this request.`);
426
- // 继续外部循环
427
- } else {
428
- console.error(`[${requestId}] Unrecoverable setup error encountered (Key: ${maskedKey}). Status: ${status || 'N/A'}. Stopping retries.`);
429
- break; // 中断外部循环
430
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
  }
432
- } // --- 结束重试循环 ---
433
 
434
- // 如果循环结束了还没有成功
435
- if (!success) {
436
- console.error(`[${requestId}] All attempts failed or an unrecoverable error occurred. No available keys left or max attempts reached.`);
437
  if (!res.headersSent) {
438
- const statusCode = lastError?.status || 503;
439
- const errorMessage = (lastError instanceof Error) ? lastError.message : JSON.stringify(lastError);
440
- const detailMessage = lastError?.body?.detail || errorMessage;
441
- const errorType = lastError?.type || (statusCode === 401 || statusCode === 403 || statusCode === 429 ? "key_error" : "proxy_error");
442
- console.error(`[${requestId}] Sending final error response. Status: ${statusCode}, Type: ${errorType}, Message: ${detailMessage}`);
443
- res.status(statusCode).json({
444
- object: "error",
445
- message: `All attempts failed. Last error: ${detailMessage}`,
446
- type: errorType,
447
- param: null,
448
- code: statusCode === 429 ? "rate_limit_exceeded" : (statusCode === 401 || statusCode === 403 ? "invalid_api_key" : "service_unavailable")
449
- });
450
  } else if (!res.writableEnded) {
451
- console.error(`[${requestId}] Headers potentially sent, but request failed. Ending stream.`);
452
- try {
453
- res.write(`data: [DONE]\n\n`); res.end();
454
- } catch (e) { if (!res.writableEnded) res.end(); }
455
- } else {
456
- console.error(`[${requestId}] Request failed, but response stream was already fully ended.`);
457
  }
458
  }
459
-
460
  });
461
 
462
- // 启动服务器
463
  app.listen(PORT, () => {
464
- console.log(`===================================================`);
465
- console.log(` Fal OpenAI Proxy Server (Random Key + Exclusion Strategy)`); // 更新描述
466
  console.log(` Listening on port: ${PORT}`);
467
- console.log(` Loaded ${falKeys.length} Fal AI Keys initially.`);
468
- console.log(` Using Limits: System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
469
  console.log(` API Key Auth Enabled: ${API_KEY ? 'Yes' : 'No'}`);
470
- console.log(` Chat Completions Endpoint: POST http://localhost:${PORT}/v1/chat/completions`);
 
471
  console.log(` Models Endpoint: GET http://localhost:${PORT}/v1/models`);
472
- console.log(`===================================================`);
473
  });
474
 
475
- // 根路径响应
476
  app.get('/', (req, res) => {
477
- res.send('Fal OpenAI Proxy (Random Key + Exclusion Strategy) is running.'); // 更新描述
478
  });
 
1
  import express from 'express';
2
  import { fal } from '@fal-ai/client';
3
 
4
+ // --- Multi-Key Configuration ---
5
+ const rawFalKeys = process.env.FAL_KEYS; // Expect comma-separated keys: key1,key2,key3
6
+ const API_KEY = process.env.API_KEY; // Custom API Key for proxy auth remains the same
7
 
8
+ if (!rawFalKeys) {
9
+ console.error("Error: FAL_KEYS environment variable is not set (should be comma-separated).");
10
  process.exit(1);
11
  }
12
 
13
+ if (!API_KEY) {
14
+ console.error("Error: API_KEY environment variable is not set.");
15
+ process.exit(1);
16
+ }
17
+
18
+ // Parse and prepare the keys
19
+ let falKeys = rawFalKeys.split(',')
20
+ .map(key => key.trim())
21
+ .filter(key => key.length > 0)
22
+ .map(key => ({
23
+ key: key,
24
+ failed: false, // Track if the key is currently considered failed
25
+ failedTimestamp: 0 // Timestamp when the key was marked as failed
26
+ }));
27
 
28
  if (falKeys.length === 0) {
29
+ console.error("Error: No valid FAL_KEYS found after processing the environment variable.");
30
  process.exit(1);
31
  }
32
 
33
+ let currentKeyIndex = 0;
34
+ const failedKeyCooldown = 60 * 1000; // Cooldown period in milliseconds (e.g., 60 seconds) before retrying a failed key
35
+
36
+ console.log(`Loaded ${falKeys.length} FAL API Key(s).`);
37
+ console.log(`Failed key cooldown period: ${failedKeyCooldown / 1000} seconds.`);
38
+
39
+ // NOTE: We will configure fal client per request now, so initial global config is removed.
40
+ // fal.config({ ... }); // Removed
41
+
42
+ // --- Key Management Functions ---
43
+
44
+ /**
45
+ * Selects the next available FAL key using round-robin and skipping recently failed keys.
46
+ * @returns {object | null} Key info object { key, failed, failedTimestamp } or null if all keys are failed.
47
+ */
48
+ function getNextKey() {
49
+ const totalKeys = falKeys.length;
50
+ if (totalKeys === 0) return null;
51
+
52
+ let attempts = 0;
53
+ while (attempts < totalKeys) {
54
+ const keyIndex = currentKeyIndex % totalKeys;
55
+ const keyInfo = falKeys[keyIndex];
56
+ // Increment index for the *next* call, ensuring round-robin
57
+ currentKeyIndex = (currentKeyIndex + 1) % totalKeys;
58
+
59
+ // Check if key is marked as failed and if cooldown has passed
60
+ if (keyInfo.failed) {
61
+ const now = Date.now();
62
+ if (now - keyInfo.failedTimestamp < failedKeyCooldown) {
63
+ // console.log(`Key index ${keyIndex} is in cooldown. Skipping.`);
64
+ attempts++;
65
+ continue; // Skip this key, it's still in cooldown
66
+ } else {
67
+ console.log(`Cooldown finished for key index ${keyIndex}. Resetting failure status.`);
68
+ keyInfo.failed = false; // Cooldown expired, reset status
69
+ keyInfo.failedTimestamp = 0;
70
+ }
71
+ }
72
+ // console.log(`Selected key index: ${keyIndex}`);
73
+ return keyInfo; // Return the valid key info object
74
+ }
75
 
76
+ console.warn("All FAL keys are currently marked as failed and in cooldown.");
77
+ return null; // All keys are currently failed and within cooldown
78
+ }
79
 
80
+ /**
81
+ * Marks a specific key as failed.
82
+ * @param {object} keyInfo - The key info object to mark as failed.
83
+ */
84
+ function markKeyFailed(keyInfo) {
85
+ if (keyInfo && !keyInfo.failed) { // Only mark if not already marked
86
+ keyInfo.failed = true;
87
+ keyInfo.failedTimestamp = Date.now();
88
+ const keyIndex = falKeys.findIndex(k => k.key === keyInfo.key);
89
+ console.warn(`Marking key index ${keyIndex} (ending ...${keyInfo.key.slice(-4)}) as failed.`);
90
+ }
91
+ }
92
 
93
+ /**
94
+ * Determines if an error likely indicates an API key issue (auth, quota, etc.).
95
+ * This needs refinement based on actual errors from fal.ai.
96
+ * @param {Error} error - The error object caught from the fal client.
97
+ * @returns {boolean} - True if the error suggests a key failure, false otherwise.
98
+ */
99
+ function isKeyRelatedError(error) {
100
+ const errorMessage = error?.message?.toLowerCase() || '';
101
+ const errorStatus = error?.status; // Assuming the error object might have a status property
102
+
103
+ // Check for common indicators of key issues
104
+ if (errorStatus === 401 || errorStatus === 403 || // Unauthorized, Forbidden
105
+ errorMessage.includes('authentication failed') ||
106
+ errorMessage.includes('invalid api key') ||
107
+ errorMessage.includes('permission denied')) {
108
+ return true;
109
+ }
110
+ if (errorStatus === 429 || // Too Many Requests (Rate Limit / Quota)
111
+ errorMessage.includes('rate limit exceeded') ||
112
+ errorMessage.includes('quota exceeded')) {
113
+ return true;
114
+ }
115
+ // Add more specific error messages or codes from fal.ai if known
116
+ // console.log("Error does not appear to be key-related:", error); // Debugging
117
+ return false;
118
  }
119
 
120
+ // --- Express App Setup ---
121
  const app = express();
122
  app.use(express.json({ limit: '50mb' }));
123
  app.use(express.urlencoded({ extended: true, limit: '50mb' }));
124
 
125
  const PORT = process.env.PORT || 3000;
126
 
127
+ // API Key 鉴权中间件 (Remains the same, checks custom API_KEY)
128
  const apiKeyAuth = (req, res, next) => {
129
+ // ... (Keep existing apiKeyAuth middleware code) ...
130
  const authHeader = req.headers['authorization'];
131
 
132
  if (!authHeader) {
 
149
  next();
150
  };
151
 
 
152
  app.use(['/v1/models', '/v1/chat/completions'], apiKeyAuth);
153
 
154
+ // === 全局定义限制 === (Remains the same)
155
  const PROMPT_LIMIT = 4800;
156
  const SYSTEM_PROMPT_LIMIT = 4800;
157
  // === 限制定义结束 ===
158
 
159
+ // 定义 fal-ai/any-llm 支持的模型列表 (Remains the same)
160
+ const FAL_SUPPORTED_MODELS = [ /* ... model list ... */
161
  "anthropic/claude-3.7-sonnet",
162
  "anthropic/claude-3.5-sonnet",
163
  "anthropic/claude-3-5-haiku",
 
177
  "meta-llama/llama-4-scout"
178
  ];
179
 
180
+ // Helper function to get owner from model ID (Remains the same)
181
+ const getOwner = (modelId) => { /* ... */
182
  if (modelId && modelId.includes('/')) {
183
  return modelId.split('/')[0];
184
  }
185
  return 'fal-ai';
186
+ };
187
 
188
+ // GET /v1/models endpoint (Remains the same)
189
+ app.get('/v1/models', (req, res) => { /* ... */
190
  console.log("Received request for GET /v1/models");
191
  try {
192
  const modelsData = FAL_SUPPORTED_MODELS.map(modelId => ({
 
200
  }
201
  });
202
 
203
+ // === convertMessagesToFalPrompt 函数 (Remains the same) ===
204
+ function convertMessagesToFalPrompt(messages) { /* ... */
205
  let fixed_system_prompt_content = "";
206
  const conversation_message_blocks = [];
207
+ // console.log(`Original messages count: ${messages.length}`); // Less verbose logging
208
 
209
  // 1. 分离 System 消息,格式化 User/Assistant 消息
210
  for (const message of messages) {
 
289
  // 5. *** 组合最终的 prompt 和 system_prompt (包含分隔符逻辑) ***
290
  const system_prompt_history_content = system_prompt_history_blocks.join('').trim();
291
  const final_prompt = prompt_history_blocks.join('').trim();
292
+
293
+ // 定义分隔符
294
  const SEPARATOR = "\n\n-------下面是比较早之前的对话内容-----\n\n";
295
+
296
  let final_system_prompt = "";
297
+
298
  const hasFixedSystem = fixed_system_prompt_content.length > 0;
299
  const hasSystemHistory = system_prompt_history_content.length > 0;
300
 
 
309
  // console.log("Using only history in system prompt slot.");
310
  }
311
 
312
+ // 6. 返回结果
313
  const result = {
314
  system_prompt: final_system_prompt,
315
  prompt: final_prompt
316
  };
317
 
318
+ console.log(`Final system_prompt length: ${result.system_prompt.length}, Final prompt length: ${result.prompt.length}`);
 
319
 
320
  return result;
321
  }
322
  // === convertMessagesToFalPrompt 函数结束 ===
323
 
324
 
325
+ /**
326
+ * Wraps the fal.ai API call with retry logic using available keys.
327
+ * @param {'stream' | 'subscribe'} operation - The fal operation to perform.
328
+ * @param {string} functionId - The fal function ID (e.g., "fal-ai/any-llm").
329
+ * @param {object} params - The parameters for the fal function call (input, logs, etc.).
330
+ * @returns {Promise<any>} - The result from the successful fal call (stream or subscription result).
331
+ * @throws {Error} - Throws an error if all keys fail or a non-key-related error occurs.
332
+ */
333
+ async function tryFalCallWithFailover(operation, functionId, params) {
334
+ const maxRetries = falKeys.length; // Try each key at most once per request cycle
335
+ let lastError = null;
336
+
337
+ for (let i = 0; i < maxRetries; i++) {
338
+ const keyInfo = getNextKey();
339
+ if (!keyInfo) {
340
+ throw new Error(lastError ? `All FAL keys failed. Last error: ${lastError.message}` : "All FAL keys are currently unavailable (failed or in cooldown).");
341
+ }
342
+
343
+ const currentFalKey = keyInfo.key;
344
+ console.log(`Attempt ${i + 1}/${maxRetries}: Using key ending in ...${currentFalKey.slice(-4)}`);
345
+
346
+ try {
347
+ // --- Configure fal client with the selected key for this attempt ---
348
+ // WARNING: This global config change might have concurrency issues in high-load scenarios
349
+ // if the fal client library doesn't isolate requests properly.
350
+ // A better approach would be per-request credentials if the library supported it.
351
+ fal.config({ credentials: currentFalKey });
352
+
353
+ if (operation === 'stream') {
354
+ // For streams, the retry logic primarily applies to *initiating* the stream.
355
+ // If the stream starts but fails later, this loop won't restart it.
356
+ const streamResult = await fal.stream(functionId, params);
357
+ console.log(`Successfully initiated stream with key ending in ...${currentFalKey.slice(-4)}`);
358
+ // If successful, return the stream iterator
359
+ return streamResult;
360
+ } else { // 'subscribe' (non-stream)
361
+ const result = await fal.subscribe(functionId, params);
362
+ console.log(`Successfully completed subscribe request with key ending in ...${currentFalKey.slice(-4)}`);
363
+
364
+ // Check for application-level errors *returned* by fal within the result object
365
+ // These are usually model errors, not key errors. Let them propagate.
366
+ if (result && result.error) {
367
+ console.warn(`Fal-ai returned an application error (non-stream) with key ...${currentFalKey.slice(-4)}: ${JSON.stringify(result.error)}`);
368
+ // Don't mark key as failed for application errors unless specifically known.
369
+ }
370
+ // Return the result object (which might contain an error)
371
+ return result;
372
+ }
373
+ } catch (error) {
374
+ console.error(`Error using key ending in ...${currentFalKey.slice(-4)}:`, error.message || error);
375
+ lastError = error; // Store the error
376
+
377
+ // Check if the error is likely related to the key itself
378
+ if (isKeyRelatedError(error)) {
379
+ markKeyFailed(keyInfo);
380
+ console.log(`Key marked as failed. Trying next key if available...`);
381
+ // Continue the loop to try the next key
382
+ } else {
383
+ // If the error is not key-related (e.g., network issue, fal internal error),
384
+ // stop retrying and propagate the error immediately.
385
+ console.error("Non-key related error occurred. Aborting retries.");
386
+ throw error; // Re-throw the error
387
+ }
388
+ }
389
+ }
390
+
391
+ // If the loop finishes, all keys were tried and failed with key-related errors.
392
+ console.error("All FAL keys failed after attempting each one.");
393
+ throw new Error(lastError ? `All FAL keys failed. Last error: ${lastError.message}` : "All FAL API keys failed.");
394
+ }
395
+
396
+
397
+ // POST /v1/chat/completions endpoint (Modified to use tryFalCallWithFailover)
398
  app.post('/v1/chat/completions', async (req, res) => {
399
  const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body;
 
400
 
401
+ console.log(`Received chat completion request for model: ${model}, stream: ${stream}`);
402
 
403
  if (!FAL_SUPPORTED_MODELS.includes(model)) {
404
+ console.warn(`Warning: Requested model '${model}' is not in the explicitly supported list.`);
405
+ // Allow proceeding, maybe fal-ai/any-llm supports it dynamically
406
  }
407
  if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
408
+ console.error("Invalid request parameters:", { model, messages: Array.isArray(messages) ? messages.length : typeof messages });
409
  return res.status(400).json({ error: 'Missing or invalid parameters: model and messages array are required.' });
410
  }
411
 
412
+ try {
413
+ const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
414
+
415
+ const falInput = {
416
+ model: model,
417
+ prompt: prompt,
418
+ ...(system_prompt && { system_prompt: system_prompt }),
419
+ reasoning: !!reasoning, // Ensure boolean
420
+ // Spread any other OpenAI compatible params if needed, though fal might ignore them
421
+ // ...restOpenAIParams // Be careful with spreading unknown params
422
+ };
423
+
424
+ console.log("Prepared Fal Input (lengths):", { system_prompt: system_prompt?.length, prompt: prompt?.length });
425
+ // Optional: Log full input for debugging (can be verbose)
426
+ // console.log("Full Fal Input:", JSON.stringify(falInput, null, 2));
427
+
428
+ // --- Use the failover wrapper for the Fal API call ---
429
+ if (stream) {
430
+ res.setHeader('Content-Type', 'text/event-stream; charset=utf-8');
431
+ res.setHeader('Cache-Control', 'no-cache');
432
+ res.setHeader('Connection', 'keep-alive');
433
+ res.setHeader('Access-Control-Allow-Origin', '*'); // Keep CORS header if needed
434
+ res.flushHeaders();
435
+
436
+ let previousOutput = '';
437
+ let falStream;
438
+
439
+ try {
440
+ // --- Initiate stream using failover ---
441
+ falStream = await tryFalCallWithFailover('stream', "fal-ai/any-llm", { input: falInput });
442
+
443
+ // --- Process the stream (existing logic) ---
444
+ for await (const event of falStream) {
445
+ const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
446
+ const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
447
+ const errorInfo = (event && event.error) ? event.error : null;
448
+
449
+ if (errorInfo) {
450
+ console.error("Error received *during* fal stream:", errorInfo);
451
+ // Note: This error happened *after* successful stream initiation.
452
+ // We send an error chunk, but don't mark the key failed here as the connection worked initially.
453
+ const errorChunk = { id: `chatcmpl-${Date.now()}-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Fal Stream Error: ${JSON.stringify(errorInfo)}` } }] };
454
+ res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
455
+ break; // Stop processing this stream
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
  }
457
+
458
+ let deltaContent = '';
459
+ if (currentOutput.startsWith(previousOutput)) {
460
+ deltaContent = currentOutput.substring(previousOutput.length);
461
+ } else if (currentOutput.length > 0) {
462
+ console.warn("Fal stream output mismatch detected. Sending full current output as delta.", { previousLength: previousOutput.length, currentLength: currentOutput.length });
463
+ deltaContent = currentOutput;
464
+ previousOutput = ''; // Reset previous since we sent full
 
 
 
 
465
  }
466
+ previousOutput = currentOutput; // Update previousOutput for next iteration
 
467
 
468
+ if (deltaContent || !isPartial) { // Send delta or final chunk
469
+ const openAIChunk = { id: `chatcmpl-${Date.now()}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: { content: deltaContent }, finish_reason: isPartial === false ? "stop" : null }] };
470
+ res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
471
+ }
472
+ }
473
+ res.write(`data: [DONE]\n\n`);
474
+ res.end();
475
+ console.log("Stream finished successfully.");
476
+
477
+ } catch (streamError) {
478
+ // This catch handles errors from tryFalCallWithFailover OR the stream processing loop
479
+ console.error('Error during stream processing:', streamError);
480
+ // Don't try to write to response if headers already sent and stream failed mid-way uncleanly
481
+ if (!res.writableEnded) {
482
+ try {
483
+ // Send a final error chunk if possible
484
+ const errorDetails = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
485
+ const finalErrorChunk = { error: { message: "Stream failed", type: "proxy_error", details: errorDetails } };
486
+ res.write(`data: ${JSON.stringify(finalErrorChunk)}\n\n`);
487
+ res.write(`data: [DONE]\n\n`);
488
+ res.end();
489
+ } catch (finalError) {
490
+ console.error('Error sending final stream error message to client:', finalError);
491
+ if (!res.writableEnded) { res.end(); } // Ensure response ends
492
+ }
493
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
  }
495
 
496
+ } else { // Non-stream
497
+ console.log("Executing non-stream request with failover...");
498
+ // --- Call subscribe using failover ---
499
+ const result = await tryFalCallWithFailover('subscribe', "fal-ai/any-llm", { input: falInput, logs: true });
500
+
501
+ console.log("Received non-stream result from fal-ai via failover wrapper.");
502
+ // Optional: Log full result for debugging
503
+ // console.log("Full non-stream result:", JSON.stringify(result, null, 2));
504
+
505
+ // Check for application-level errors *within* the successful response
506
+ if (result && result.error) {
507
+ console.error("Fal-ai returned an application error in non-stream mode (after successful API call):", result.error);
508
+ // Return a 500 status but format it like OpenAI error if possible
509
+ return res.status(500).json({
510
+ object: "error",
511
+ message: `Fal-ai application error: ${JSON.stringify(result.error)}`,
512
+ type: "fal_ai_error",
513
+ param: null,
514
+ code: result.error.code || null // Include code if available
515
+ });
516
+ }
517
+
518
+ // --- Format successful non-stream response (existing logic) ---
519
+ const openAIResponse = {
520
+ id: `chatcmpl-${result?.requestId || Date.now()}`, // Use requestId if available
521
+ object: "chat.completion",
522
+ created: Math.floor(Date.now() / 1000),
523
+ model: model, // Use the requested model ID
524
+ choices: [{
525
+ index: 0,
526
+ message: {
527
+ role: "assistant",
528
+ content: result?.output || "" // Safely access output
529
+ },
530
+ finish_reason: "stop" // Assume stop for non-stream
531
+ }],
532
+ usage: { // Fal doesn't provide token usage
533
+ prompt_tokens: null,
534
+ completion_tokens: null,
535
+ total_tokens: null
536
+ },
537
+ system_fingerprint: null, // Not provided by fal
538
+ ...(result?.reasoning && { fal_reasoning: result.reasoning }), // Include reasoning if present
539
+ };
540
+ res.json(openAIResponse);
541
+ console.log("Returned non-stream response successfully.");
542
  }
 
543
 
544
+ } catch (error) {
545
+ // This catches errors from setup, convertMessagesToFalPrompt, or tryFalCallWithFailover (if all keys failed or non-key error occurred)
546
+ console.error('Unhandled error in /v1/chat/completions:', error);
547
  if (!res.headersSent) {
548
+ const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
549
+ // Provide a more informative error message
550
+ const errorType = error.message?.includes("All FAL keys failed") ? "api_key_error" : "proxy_internal_error";
551
+ res.status(500).json({
552
+ error: {
553
+ message: `Internal Server Error in Proxy: ${errorMessage}`,
554
+ type: errorType,
555
+ details: error.stack // Optional: include stack in dev/debug mode
556
+ }
557
+ });
 
 
558
  } else if (!res.writableEnded) {
559
+ console.error("Headers already sent, attempting to end response after error.");
560
+ res.end(); // Try to end the response if possible
 
 
 
 
561
  }
562
  }
 
563
  });
564
 
565
+ // --- Server Start ---
566
  app.listen(PORT, () => {
567
+ console.log(`===========================================================`);
568
+ console.log(` Fal OpenAI Proxy Server (Multi-Key Failover)`);
569
  console.log(` Listening on port: ${PORT}`);
570
+ console.log(` Loaded ${falKeys.length} FAL API Key(s).`);
 
571
  console.log(` API Key Auth Enabled: ${API_KEY ? 'Yes' : 'No'}`);
572
+ console.log(` Limits: System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
573
+ console.log(` Chat Completions: POST http://localhost:${PORT}/v1/chat/completions`);
574
  console.log(` Models Endpoint: GET http://localhost:${PORT}/v1/models`);
575
+ console.log(`===========================================================`);
576
  });
577
 
578
+ // Root path response (Remains the same)
579
  app.get('/', (req, res) => {
580
+ res.send('Fal OpenAI Proxy (Multi-Key Failover) is running.');
581
  });