Calmlo commited on
Commit
e6d9a41
·
verified ·
1 Parent(s): 713f2f6

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +166 -283
server.js CHANGED
@@ -1,5 +1,6 @@
1
  import express from 'express';
2
- import { fal } from '@fal-ai/client';
 
3
 
4
  // --- Key Management Setup ---
5
  // Read comma-separated keys from the SINGLE environment variable FAL_KEY
@@ -29,63 +30,91 @@ if (!API_KEY) {
29
  }
30
 
31
  let currentKeyIndex = 0;
32
- const invalidKeys = new Set(); // Keep track of keys that failed
 
33
 
34
  console.log(`Loaded ${falKeys.length} Fal AI Key(s) from the FAL_KEY environment variable.`);
35
 
36
- // Function to get the next valid key in a round-robin fashion
37
  function getNextValidKey() {
 
38
  if (invalidKeys.size >= falKeys.length) {
39
  console.error("All Fal AI keys are marked as invalid.");
40
  return null; // No valid keys left
41
  }
42
 
43
  const initialIndex = currentKeyIndex;
44
- let attempts = 0;
45
  while (attempts < falKeys.length) {
46
  const keyIndex = currentKeyIndex % falKeys.length;
47
  const key = falKeys[keyIndex];
48
 
49
- // Move to the next index for the *next* call
50
  currentKeyIndex = (keyIndex + 1) % falKeys.length;
51
 
 
52
  if (!invalidKeys.has(key)) {
53
  // Found a valid key
54
  console.log(`Using Fal Key index: ${keyIndex} (from FAL_KEY list)`);
55
- return { key, index: keyIndex };
 
 
56
  }
57
 
58
  attempts++;
59
- // Continue loop to check the next key
 
 
 
 
 
 
 
60
  }
61
 
62
- // Should not be reached if invalidKeys.size check is correct, but as a safeguard
63
- console.error("Could not find a valid Fal AI key after checking all.");
64
  return null;
65
  }
66
 
67
- // Function to check if an error is likely related to a bad key
68
  // NOTE: This is a heuristic. You might need to adjust based on actual errors from Fal AI.
69
  function isKeyRelatedError(error) {
70
- const message = error?.message?.toLowerCase() || '';
71
- const status = error?.status; // Check if the error object has a status code
 
 
 
 
72
 
73
  // Check for specific HTTP status codes indicative of auth/permission issues
74
- if (status === 401 || status === 403) {
 
75
  console.warn(`Detected potential key-related error (HTTP Status: ${status}).`);
76
  return true;
77
  }
78
- // Check for common error message patterns
 
 
79
  if (message.includes('invalid api key') ||
80
  message.includes('authentication failed') ||
81
  message.includes('permission denied') ||
82
- message.includes('quota exceeded') || // Include quota errors as key-related for rotation
83
  message.includes('forbidden') ||
84
- message.includes('unauthorized')) { // Add 'unauthorized'
85
- console.warn(`Detected potential key-related error (message: ${message})`);
 
 
 
86
  return true;
87
  }
 
88
  // Add more specific checks based on observed Fal AI errors if needed
 
 
 
 
 
89
  return false;
90
  }
91
  // --- End Key Management Setup ---
@@ -128,247 +157,119 @@ const SYSTEM_PROMPT_LIMIT = 4800;
128
  // === 限制定义结束 ===
129
 
130
  // 定义 fal-ai/any-llm 支持的模型列表 (unchanged)
131
- const FAL_SUPPORTED_MODELS = [
132
- "anthropic/claude-3.7-sonnet",
133
- "anthropic/claude-3.5-sonnet",
134
- "anthropic/claude-3-5-haiku",
135
- "anthropic/claude-3-haiku",
136
- "google/gemini-pro-1.5",
137
- "google/gemini-flash-1.5",
138
- "google/gemini-flash-1.5-8b",
139
- "google/gemini-2.0-flash-001",
140
- "meta-llama/llama-3.2-1b-instruct",
141
- "meta-llama/llama-3.2-3b-instruct",
142
- "meta-llama/llama-3.1-8b-instruct",
143
- "meta-llama/llama-3.1-70b-instruct",
144
- "openai/gpt-4o-mini",
145
- "openai/gpt-4o",
146
- "deepseek/deepseek-r1",
147
- "meta-llama/llama-4-maverick",
148
- "meta-llama/llama-4-scout"
149
- ];
150
-
151
  // Helper function getOwner (unchanged)
152
- const getOwner = (modelId) => {
153
- if (modelId && modelId.includes('/')) {
154
- return modelId.split('/')[0];
155
- }
156
- return 'fal-ai';
157
- }
158
-
159
  // GET /v1/models endpoint (unchanged)
160
- app.get('/v1/models', (req, res) => {
161
- console.log("Received request for GET /v1/models");
162
- try {
163
- const modelsData = FAL_SUPPORTED_MODELS.map(modelId => ({
164
- id: modelId, object: "model", created: Math.floor(Date.now() / 1000), owned_by: getOwner(modelId) // Use current time for created
165
- }));
166
- res.json({ object: "list", data: modelsData });
167
- console.log("Successfully returned model list.");
168
- } catch (error) {
169
- console.error("Error processing GET /v1/models:", error);
170
- res.status(500).json({ error: "Failed to retrieve model list." });
171
- }
172
- });
173
-
174
  // convertMessagesToFalPrompt 函数 (unchanged)
175
- function convertMessagesToFalPrompt(messages) {
176
- // ... (keep existing conversion logic)
177
- let fixed_system_prompt_content = "";
178
- const conversation_message_blocks = [];
179
- // console.log(`Original messages count: ${messages.length}`); // Less verbose logging
180
-
181
- // 1. 分离 System 消息,格式化 User/Assistant 消息
182
- for (const message of messages) {
183
- let content = (message.content === null || message.content === undefined) ? "" : String(message.content);
184
- switch (message.role) {
185
- case 'system':
186
- fixed_system_prompt_content += `System: ${content}\n\n`;
187
- break;
188
- case 'user':
189
- conversation_message_blocks.push(`Human: ${content}\n\n`);
190
- break;
191
- case 'assistant':
192
- conversation_message_blocks.push(`Assistant: ${content}\n\n`);
193
- break;
194
- default:
195
- console.warn(`Unsupported role: ${message.role}`);
196
- continue;
197
- }
198
- }
199
-
200
- // 2. 截断合并后的 system 消息(如果超长)
201
- if (fixed_system_prompt_content.length > SYSTEM_PROMPT_LIMIT) {
202
- const originalLength = fixed_system_prompt_content.length;
203
- fixed_system_prompt_content = fixed_system_prompt_content.substring(0, SYSTEM_PROMPT_LIMIT);
204
- console.warn(`Combined system messages truncated from ${originalLength} to ${SYSTEM_PROMPT_LIMIT}`);
205
- }
206
- fixed_system_prompt_content = fixed_system_prompt_content.trim();
207
-
208
- // 3. 计算 system_prompt 中留给对话历史的剩余空间
209
- let space_occupied_by_fixed_system = 0;
210
- if (fixed_system_prompt_content.length > 0) {
211
- space_occupied_by_fixed_system = fixed_system_prompt_content.length + 4; // 预留 \n\n...\n\n 的长度
212
- }
213
- const remaining_system_limit = Math.max(0, SYSTEM_PROMPT_LIMIT - space_occupied_by_fixed_system);
214
- // console.log(`Trimmed fixed system prompt length: ${fixed_system_prompt_content.length}. Approx remaining system history limit: ${remaining_system_limit}`);
215
-
216
- // 4. 反向填充 User/Assistant 对话历史
217
- const prompt_history_blocks = [];
218
- const system_prompt_history_blocks = [];
219
- let current_prompt_length = 0;
220
- let current_system_history_length = 0;
221
- let promptFull = false;
222
- let systemHistoryFull = (remaining_system_limit <= 0);
223
-
224
- // console.log(`Processing ${conversation_message_blocks.length} user/assistant messages for recency filling.`);
225
- for (let i = conversation_message_blocks.length - 1; i >= 0; i--) {
226
- const message_block = conversation_message_blocks[i];
227
- const block_length = message_block.length;
228
-
229
- if (promptFull && systemHistoryFull) {
230
- // console.log(`Both prompt and system history slots full. Omitting older messages from index ${i}.`);
231
- break;
232
- }
233
-
234
- if (!promptFull) {
235
- if (current_prompt_length + block_length <= PROMPT_LIMIT) {
236
- prompt_history_blocks.unshift(message_block);
237
- current_prompt_length += block_length;
238
- continue;
239
- } else {
240
- promptFull = true;
241
- // console.log(`Prompt limit (${PROMPT_LIMIT}) reached. Trying system history slot.`);
242
- }
243
- }
244
-
245
- if (!systemHistoryFull) {
246
- if (current_system_history_length + block_length <= remaining_system_limit) {
247
- system_prompt_history_blocks.unshift(message_block);
248
- current_system_history_length += block_length;
249
- continue;
250
- } else {
251
- systemHistoryFull = true;
252
- // console.log(`System history limit (${remaining_system_limit}) reached.`);
253
- }
254
- }
255
- }
256
-
257
- // 5. 组合最终的 prompt 和 system_prompt
258
- const system_prompt_history_content = system_prompt_history_blocks.join('').trim();
259
- const final_prompt = prompt_history_blocks.join('').trim();
260
- const SEPARATOR = "\n\n-------下面是比较早之前的对话内容-----\n\n";
261
- let final_system_prompt = "";
262
- const hasFixedSystem = fixed_system_prompt_content.length > 0;
263
- const hasSystemHistory = system_prompt_history_content.length > 0;
264
-
265
- if (hasFixedSystem && hasSystemHistory) {
266
- final_system_prompt = fixed_system_prompt_content + SEPARATOR + system_prompt_history_content;
267
- // console.log("Combining fixed system prompt and history with separator.");
268
- } else if (hasFixedSystem) {
269
- final_system_prompt = fixed_system_prompt_content;
270
- // console.log("Using only fixed system prompt.");
271
- } else if (hasSystemHistory) {
272
- final_system_prompt = system_prompt_history_content;
273
- // console.log("Using only history in system prompt slot.");
274
- }
275
-
276
- const result = {
277
- system_prompt: final_system_prompt,
278
- prompt: final_prompt
279
- };
280
-
281
- // console.log(`Final system_prompt length (Sys+Separator+Hist): ${result.system_prompt.length}`);
282
- // console.log(`Final prompt length (Hist): ${result.prompt.length}`);
283
-
284
- return result;
285
- }
286
  // === convertMessagesToFalPrompt 函数结束 ===
287
 
288
 
289
- // --- Helper function to make Fal AI request with retries ---
290
  async function makeFalRequestWithRetry(falInput, stream = false) {
291
  let attempts = 0;
292
- const maxAttempts = falKeys.length; // Try each key at most once per request
293
- const attemptedKeysInThisRequest = new Set(); // Track keys tried for *this* specific request
 
 
 
 
294
 
295
  while (attempts < maxAttempts) {
296
- const keyInfo = getNextValidKey();
297
 
298
  if (!keyInfo) {
299
- // This happens if all keys are currently in the invalidKeys set
 
300
  throw new Error("No valid Fal AI keys available (all marked as invalid).");
301
  }
302
 
303
- // Avoid retrying the *exact same key* within the *same request attempt cycle*
304
- // This guards against potential infinite loops if getNextValidKey had issues
305
  if (attemptedKeysInThisRequest.has(keyInfo.key)) {
306
- console.warn(`Key at index ${keyInfo.index} already attempted for this request cycle. Skipping.`);
307
- // Don't increment attempts here, as we didn't actually *use* the key.
308
- // Let the loop continue to find the next *different* valid key.
309
- // If all keys are invalid, the check at the start of the loop handles it.
310
  continue;
311
  }
312
  attemptedKeysInThisRequest.add(keyInfo.key);
313
- attempts++; // Count this as a distinct attempt with a key
314
 
315
  try {
316
  console.log(`Attempt ${attempts}/${maxAttempts}: Trying Fal Key index ${keyInfo.index}...`);
317
 
318
- // *** CRITICAL: Reconfigure fal client with the selected key ***
319
- console.warn("Concurrency Warning: Reconfiguring global fal client. Ensure sufficient instance isolation if under high load.");
320
- fal.config({ credentials: keyInfo.key });
 
 
 
321
 
322
  if (stream) {
323
- // Return the stream directly for the caller to handle
324
  const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
325
  console.log(`Successfully initiated stream with key index ${keyInfo.index}.`);
326
- return falStream; // Success, let the caller handle iteration
 
327
  } else {
328
- // For non-stream, wait for the result here
329
  console.log(`Executing non-stream request with key index ${keyInfo.index}...`);
 
 
330
  const result = await fal.subscribe("fal-ai/any-llm", { input: falInput, logs: true });
331
  console.log(`Successfully received non-stream result with key index ${keyInfo.index}.`);
332
 
333
- // Check for errors *within* the successful response structure
334
- if (result && result.error) {
335
- console.error(`Fal-ai returned an error in non-stream result (Key Index ${keyInfo.index}):`, result.error);
336
- // Treat this like a general Fal error, not necessarily a key error unless message indicates it
337
- // Convert it to a standard Error object to be caught below
338
- throw new Error(`Fal-ai error in result: ${JSON.stringify(result.error)}`);
 
 
 
 
 
 
 
339
  }
340
- return result; // Success
 
341
  }
342
  } catch (error) {
343
  console.error(`Error using Fal Key index ${keyInfo.index}:`, error.message || error);
344
 
 
345
  if (isKeyRelatedError(error)) {
346
- console.warn(`Marking Fal Key index ${keyInfo.index} as invalid due to error.`);
 
347
  invalidKeys.add(keyInfo.key);
348
- // Continue to the next iteration to try another key
349
  } else {
350
- // Not identified as a key-related error (e.g., network issue, bad input, internal Fal error)
351
- // Fail the request immediately, don't retry with other keys for this type of error.
352
- console.error("Error does not appear to be key-related. Failing request without further retries.");
353
- throw error; // Re-throw the original error to be caught by the main handler
354
  }
355
  }
356
- }
357
 
358
- // If the loop finishes, it means all keys were tried and marked invalid *within this request cycle*
359
  throw new Error(`Request failed after trying ${attempts} unique Fal key(s). All failed with key-related errors or were already marked invalid.`);
360
  }
361
 
362
 
363
- // POST /v1/chat/completions endpoint (Modified to use retry logic)
364
  app.post('/v1/chat/completions', async (req, res) => {
365
- const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body;
366
 
367
- // Basic logging for request entry
368
  console.log(`--> POST /v1/chat/completions | Model: ${model} | Stream: ${stream}`);
369
 
 
370
  if (!FAL_SUPPORTED_MODELS.includes(model)) {
371
- console.warn(`Warning: Requested model '${model}' is not in the explicitly supported list. Proxy will still attempt.`);
372
  }
373
  if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
374
  console.error("Invalid request: Missing 'model' or 'messages' array.");
@@ -376,111 +277,105 @@ app.post('/v1/chat/completions', async (req, res) => {
376
  }
377
 
378
  try {
379
- // --- Prepare Input ---
380
  const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
381
  const falInput = {
382
  model: model,
383
  prompt: prompt,
384
  ...(system_prompt && { system_prompt: system_prompt }),
385
- reasoning: !!reasoning, // Ensure boolean
386
  };
387
- // console.log("Fal Input:", JSON.stringify(falInput, null, 2)); // Verbose logging
388
  console.log("Attempting Fal request with key rotation/retry...");
389
 
390
- // --- Handle Stream vs Non-Stream ---
391
  if (stream) {
392
  res.setHeader('Content-Type', 'text/event-stream; charset=utf-8');
393
  res.setHeader('Cache-Control', 'no-cache');
394
  res.setHeader('Connection', 'keep-alive');
395
- res.setHeader('Access-Control-Allow-Origin', '*'); // Consider restricting in production
396
  res.flushHeaders();
397
 
398
  let previousOutput = '';
399
- let falStream;
400
 
401
  try {
402
- // Initiate stream using the retry logic
403
- falStream = await makeFalRequestWithRetry(falInput, true);
404
 
405
- // Process the stream events
406
  for await (const event of falStream) {
407
  const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
408
  const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
409
  const errorInfo = (event && event.error) ? event.error : null;
410
 
411
- if (errorInfo) {
412
- // Log error from within the stream, but continue processing if possible
413
  console.error("Error received *within* fal stream event:", errorInfo);
414
- // Send an error chunk to the client (optional, depends on desired behavior)
415
- const errorChunk = { id: `chatcmpl-${Date.now()}-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Fal Stream Event Error: ${JSON.stringify(errorInfo)}` } }] };
416
- // Safety check before writing
417
  if (!res.writableEnded) {
418
  res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
419
- } else {
420
- console.warn("Stream already ended when trying to write stream event error.");
421
- }
422
- // Decide whether to break or continue based on error severity if needed
 
 
 
423
  }
424
 
425
- // Calculate delta (same logic as before)
426
  let deltaContent = '';
427
  if (currentOutput.startsWith(previousOutput)) {
428
- deltaContent = currentOutput.substring(previousOutput.length);
429
  } else if (currentOutput.length > 0) {
430
- // console.warn("Fal stream output mismatch. Sending full current output as delta."); // Less verbose
431
  deltaContent = currentOutput;
432
- previousOutput = ''; // Reset previous output on mismatch
433
  }
434
  previousOutput = currentOutput;
435
 
436
- // Send OpenAI compatible chunk
437
- if (deltaContent || !isPartial) { // Send even if delta is empty when finishing
438
  const openAIChunk = {
439
- id: `chatcmpl-${Date.now()}`, // Consider more unique ID if needed
440
  object: "chat.completion.chunk",
441
  created: Math.floor(Date.now() / 1000),
442
- model: model, // Echo back the requested model
443
- choices: [{
444
- index: 0,
445
- delta: { content: deltaContent },
446
- finish_reason: isPartial === false ? "stop" : null
447
- }]
448
  };
449
- // Safety check before writing
450
  if (!res.writableEnded) {
451
  res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
452
- } else {
453
- console.warn("Stream already ended when trying to write data chunk.");
454
- }
455
  }
456
- } // End for-await loop
457
-
458
- // Send the final [DONE] marker
459
- if (!res.writableEnded) {
460
- res.write(`data: [DONE]\n\n`);
461
- res.end();
462
- console.log("<-- Stream finished successfully.");
463
- } else {
464
- console.log("<-- Stream finished, but connection was already ended.");
465
- }
466
 
467
  } catch (streamError) {
468
- // Catches errors from makeFalRequestWithRetry OR the stream iteration itself
 
469
  console.error('Error during stream request processing:', streamError.message || streamError);
470
  try {
471
  if (!res.headersSent) {
472
  // Error likely occurred in makeFalRequestWithRetry before stream started
473
- res.status(502).json({ // 502 Bad Gateway might be appropriate
 
474
  error: 'Failed to initiate Fal stream',
475
- details: streamError.message || 'Underlying Fal request failed or timed out.'
476
  });
477
  console.log("<-- Stream initiation failed response sent.");
478
  } else if (!res.writableEnded) {
479
  // Stream started but failed during processing
480
  const errorDetails = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
481
- // Send error details in the stream if possible
482
  res.write(`data: ${JSON.stringify({ error: { message: "Stream processing error after initiation", type: "proxy_error", details: errorDetails } })}\n\n`);
483
- res.write(`data: [DONE]\n\n`); // Still send DONE after error for client handling
484
  res.end();
485
  console.log("<-- Stream error sent, stream ended.");
486
  } else {
@@ -488,7 +383,6 @@ app.post('/v1/chat/completions', async (req, res) => {
488
  }
489
  } catch (finalError) {
490
  console.error('Error sending stream error message to client:', finalError);
491
- // Ensure response is ended if possible
492
  if (!res.writableEnded) { res.end(); }
493
  }
494
  }
@@ -496,50 +390,38 @@ app.post('/v1/chat/completions', async (req, res) => {
496
  } else {
497
  // --- Non-Stream ---
498
  try {
499
- // Get the result using the retry logic
500
  const result = await makeFalRequestWithRetry(falInput, false);
501
- // console.log("Received non-stream result via retry function:", JSON.stringify(result, null, 2)); // Verbose
502
 
503
- // Construct OpenAI compatible response
504
  const openAIResponse = {
505
  id: `chatcmpl-${result.requestId || Date.now()}`,
506
  object: "chat.completion",
507
  created: Math.floor(Date.now() / 1000),
508
- model: model, // Echo back requested model
509
- choices: [{
510
- index: 0,
511
- message: {
512
- role: "assistant",
513
- content: result.output || "" // Ensure content is string
514
- },
515
- finish_reason: "stop" // Assume stop for non-stream success
516
- }],
517
- usage: { // Provide null usage as Fal doesn't return it
518
- prompt_tokens: null,
519
- completion_tokens: null,
520
- total_tokens: null
521
- },
522
- system_fingerprint: null, // Fal doesn't provide this
523
- ...(result.reasoning && { fal_reasoning: result.reasoning }), // Include Fal specific reasoning if present
524
  };
525
 
526
  res.json(openAIResponse);
527
  console.log("<-- Non-stream response sent successfully.");
528
 
529
  } catch (error) {
530
- // Catches errors from makeFalRequestWithRetry (e.g., all keys failed or non-key error)
 
531
  console.error('Error during non-stream request processing:', error.message || error);
532
  if (!res.headersSent) {
533
  const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
534
  // Check if it was the "all keys failed" error
535
  const finalMessage = errorMessage.includes("No valid Fal AI keys available") || errorMessage.includes("Request failed after trying")
536
- ? `Fal request failed after trying all available keys: ${errorMessage}`
537
  : `Internal Server Error processing Fal request: ${errorMessage}`;
538
- // Use 502 Bad Gateway if it's likely an upstream (Fal) failure
539
- res.status(502).json({ error: 'Fal Request Failed', details: finalMessage });
540
  console.log("<-- Non-stream error response sent.");
541
  } else {
542
- // Should be rare for non-stream, but handle just in case
543
  console.error("Headers already sent for non-stream error? This is unexpected.");
544
  if (!res.writableEnded) { res.end(); }
545
  }
@@ -568,9 +450,10 @@ app.listen(PORT, () => {
568
  console.log(` Listening on port : ${PORT}`);
569
  console.log(` Reading Fal Keys from : FAL_KEY environment variable (comma-separated)`);
570
  console.log(` Loaded Keys Count : ${falKeys.length}`);
 
571
  console.log(` API Key Auth : ${API_KEY ? 'Enabled (using API_KEY env var)' : 'Disabled'}`);
572
  console.log(` Input Limits : System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
573
- console.log(` Concurrency Warning : Global Fal client reconfigured per request.`);
574
  console.log(`---------------------------------------------------------------------`);
575
  console.log(` Endpoints:`);
576
  console.log(` POST http://localhost:${PORT}/v1/chat/completions`);
@@ -580,5 +463,5 @@ app.listen(PORT, () => {
580
 
581
  // 根路径响应 (Updated message)
582
  app.get('/', (req, res) => {
583
- res.send(`Fal OpenAI Proxy (Multi-Key Rotation from FAL_KEY) is running. Loaded ${falKeys.length} key(s).`);
584
  });
 
1
  import express from 'express';
2
+ // **CRITICAL: Import the 'fal' object directly for configuration**
3
+ import { fal } from '@fal-ai/client'; // Make sure 'fal' is the object you configure
4
 
5
  // --- Key Management Setup ---
6
  // Read comma-separated keys from the SINGLE environment variable FAL_KEY
 
30
  }
31
 
32
  let currentKeyIndex = 0;
33
+ // **NEW: Keep track of keys that failed persistently during runtime**
34
+ const invalidKeys = new Set();
35
 
36
  console.log(`Loaded ${falKeys.length} Fal AI Key(s) from the FAL_KEY environment variable.`);
37
 
38
+ // **MODIFIED: Function to get the next *valid* key**
39
  function getNextValidKey() {
40
+ // Check if all keys have been marked as invalid
41
  if (invalidKeys.size >= falKeys.length) {
42
  console.error("All Fal AI keys are marked as invalid.");
43
  return null; // No valid keys left
44
  }
45
 
46
  const initialIndex = currentKeyIndex;
47
+ let attempts = 0; // Prevent infinite loops in edge cases
48
  while (attempts < falKeys.length) {
49
  const keyIndex = currentKeyIndex % falKeys.length;
50
  const key = falKeys[keyIndex];
51
 
52
+ // Move to the next index for the *next* call, regardless of validity
53
  currentKeyIndex = (keyIndex + 1) % falKeys.length;
54
 
55
+ // Check if the current key is NOT in the invalid set
56
  if (!invalidKeys.has(key)) {
57
  // Found a valid key
58
  console.log(`Using Fal Key index: ${keyIndex} (from FAL_KEY list)`);
59
+ return { key, index: keyIndex }; // Return the key and its original index
60
+ } else {
61
+ console.log(`Skipping invalid Fal Key index: ${keyIndex}`);
62
  }
63
 
64
  attempts++;
65
+ // If the loop started at an invalid key and wrapped around,
66
+ // check if we've checked all keys since the start
67
+ if (currentKeyIndex === initialIndex && attempts > 0) {
68
+ // This check might be redundant due to the invalidKeys.size check at the top,
69
+ // but serves as an extra safeguard.
70
+ console.warn("Looped through all keys, potentially all are invalid.");
71
+ break;
72
+ }
73
  }
74
 
75
+ // If we exit the loop, it means no valid key was found
76
+ console.error("Could not find a valid Fal AI key after checking all potentially available keys.");
77
  return null;
78
  }
79
 
80
+ // **NEW/MODIFIED: Function to check if an error is likely related to a bad key**
81
  // NOTE: This is a heuristic. You might need to adjust based on actual errors from Fal AI.
82
  function isKeyRelatedError(error) {
83
+ // Check if error itself is null/undefined
84
+ if (!error) return false;
85
+
86
+ const message = error.message?.toLowerCase() || '';
87
+ // Check if the error object has a 'status' property (common in HTTP errors)
88
+ const status = error.status || error.statusCode; // Check common status properties
89
 
90
  // Check for specific HTTP status codes indicative of auth/permission issues
91
+ // 401: Unauthorized, 403: Forbidden, 429: Too Many Requests (often quota related)
92
+ if (status === 401 || status === 403 || status === 429) {
93
  console.warn(`Detected potential key-related error (HTTP Status: ${status}).`);
94
  return true;
95
  }
96
+
97
+ // Check for common error message patterns (case-insensitive)
98
+ // Add more specific Fal AI error messages as you encounter them
99
  if (message.includes('invalid api key') ||
100
  message.includes('authentication failed') ||
101
  message.includes('permission denied') ||
102
+ message.includes('quota exceeded') || // Treat quota errors as key-related for rotation
103
  message.includes('forbidden') ||
104
+ message.includes('unauthorized') ||
105
+ message.includes('rate limit') || // Often linked to key limits
106
+ message.includes('credentials')) // Generic credential errors
107
+ {
108
+ console.warn(`Detected potential key-related error (message: "${message}")`);
109
  return true;
110
  }
111
+
112
  // Add more specific checks based on observed Fal AI errors if needed
113
+ // Example: Check for specific error codes if Fal AI provides them
114
+ // if (error.code === 'SOME_FAL_AUTH_ERROR_CODE') {
115
+ // return true;
116
+ // }
117
+
118
  return false;
119
  }
120
  // --- End Key Management Setup ---
 
157
  // === 限制定义结束 ===
158
 
159
  // 定义 fal-ai/any-llm 支持的模型列表 (unchanged)
160
+ const FAL_SUPPORTED_MODELS = [ /* ... model list ... */ ];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  // Helper function getOwner (unchanged)
162
+ const getOwner = (modelId) => { /* ... */ };
 
 
 
 
 
 
163
  // GET /v1/models endpoint (unchanged)
164
+ app.get('/v1/models', (req, res) => { /* ... */ });
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  // convertMessagesToFalPrompt 函数 (unchanged)
166
+ function convertMessagesToFalPrompt(messages) { /* ... */ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  // === convertMessagesToFalPrompt 函数结束 ===
168
 
169
 
170
+ // --- NEW: Helper function to make Fal AI request with retries ---
171
  async function makeFalRequestWithRetry(falInput, stream = false) {
172
  let attempts = 0;
173
+ // Max attempts should be the total number of keys initially available
174
+ const maxAttempts = falKeys.length;
175
+ // Keep track of keys tried *within this specific request attempt*
176
+ // This prevents infinite loops if getNextValidKey had issues,
177
+ // and ensures we try each *available* key at most once per request.
178
+ const attemptedKeysInThisRequest = new Set();
179
 
180
  while (attempts < maxAttempts) {
181
+ const keyInfo = getNextValidKey(); // Get the next *valid* key info { key, index }
182
 
183
  if (!keyInfo) {
184
+ // This should only happen if all keys are in the invalidKeys set
185
+ console.error("makeFalRequestWithRetry: No valid Fal AI keys remaining.");
186
  throw new Error("No valid Fal AI keys available (all marked as invalid).");
187
  }
188
 
189
+ // Prevent retrying the exact same key if getNextValidKey logic had an issue
190
+ // or if a key wasn't marked invalid correctly on a previous attempt within this request.
191
  if (attemptedKeysInThisRequest.has(keyInfo.key)) {
192
+ console.warn(`Key at index ${keyInfo.index} was already attempted for this request. Skipping to find next.`);
193
+ // Don't increment 'attempts' here as we didn't use the key. Let the loop find the next.
194
+ // If all keys end up being skipped, the `!keyInfo` check or `attempts < maxAttempts` will eventually handle it.
 
195
  continue;
196
  }
197
  attemptedKeysInThisRequest.add(keyInfo.key);
198
+ attempts++; // Count this as a distinct attempt with a unique key for this request
199
 
200
  try {
201
  console.log(`Attempt ${attempts}/${maxAttempts}: Trying Fal Key index ${keyInfo.index}...`);
202
 
203
+ // *** CRITICAL: Reconfigure the global fal client with the selected key for this attempt ***
204
+ // Warning: This reconfigures the GLOBAL client. If you have many concurrent requests,
205
+ // this could lead to race conditions. Consider instance isolation or a pool manager
206
+ // for high-concurrency scenarios if the library doesn't support per-request credentials easily.
207
+ console.warn(`Configuring GLOBAL fal client with key index ${keyInfo.index}. Ensure this is safe for your concurrency model.`);
208
+ fal.config({ credentials: keyInfo.key }); // Use the specific key for this attempt
209
 
210
  if (stream) {
211
+ // Use the configured global 'fal' object for the stream request
212
  const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
213
  console.log(`Successfully initiated stream with key index ${keyInfo.index}.`);
214
+ // Success! Return the stream iterator directly
215
+ return falStream;
216
  } else {
217
+ // Use the configured global 'fal' object for the non-stream request
218
  console.log(`Executing non-stream request with key index ${keyInfo.index}...`);
219
+ // Assuming fal.subscribe or similar method for non-streaming
220
+ // Adapt this line if your non-stream method is different
221
  const result = await fal.subscribe("fal-ai/any-llm", { input: falInput, logs: true });
222
  console.log(`Successfully received non-stream result with key index ${keyInfo.index}.`);
223
 
224
+ // Check for errors *within* the successful response structure (if applicable)
225
+ if (result && result.error) {
226
+ console.error(`Fal-ai returned an error in the result payload (Key Index ${keyInfo.index}):`, result.error);
227
+ // Decide if this specific payload error should also invalidate the key
228
+ if (isKeyRelatedError(result.error)) { // Reuse the checker
229
+ console.warn(`Marking Fal Key index ${keyInfo.index} as invalid due to error in response payload.`);
230
+ invalidKeys.add(keyInfo.key);
231
+ // Continue the loop to try the next key
232
+ continue; // Go to the next iteration of the while loop
233
+ } else {
234
+ // Throw an error that will be caught by the outer handler, not retried
235
+ throw new Error(`Fal-ai error in result payload: ${JSON.stringify(result.error)}`);
236
+ }
237
  }
238
+ // Success! Return the result
239
+ return result;
240
  }
241
  } catch (error) {
242
  console.error(`Error using Fal Key index ${keyInfo.index}:`, error.message || error);
243
 
244
+ // Check if the caught error indicates the key is invalid
245
  if (isKeyRelatedError(error)) {
246
+ console.warn(`Marking Fal Key index ${keyInfo.index} as invalid due to caught error.`);
247
+ // **ACTION: Add the failed key to the set of invalid keys**
248
  invalidKeys.add(keyInfo.key);
249
+ // Continue to the next iteration of the while loop to try another key
250
  } else {
251
+ // Error is not key-related (e.g., network issue, bad input, internal Fal error)
252
+ // Don't retry with other keys for this type of error. Fail the request immediately.
253
+ console.error("Error does not appear to be key-related. Failing request without further key retries.");
254
+ throw error; // Re-throw the original error to be caught by the main endpoint handler
255
  }
256
  }
257
+ } // End while loop
258
 
259
+ // If the loop finishes, it means all available keys were tried and failed with key-related errors
260
  throw new Error(`Request failed after trying ${attempts} unique Fal key(s). All failed with key-related errors or were already marked invalid.`);
261
  }
262
 
263
 
264
+ // POST /v1/chat/completions endpoint (MODIFIED to use retry logic)
265
  app.post('/v1/chat/completions', async (req, res) => {
266
+ const { model, messages, stream = false, reasoning = false, /* ...restOpenAIParams */ } = req.body;
267
 
 
268
  console.log(`--> POST /v1/chat/completions | Model: ${model} | Stream: ${stream}`);
269
 
270
+ // Basic Validation (unchanged)
271
  if (!FAL_SUPPORTED_MODELS.includes(model)) {
272
+ console.warn(`Warning: Requested model '${model}' is not in the explicitly supported list. Proxy will still attempt.`);
273
  }
274
  if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
275
  console.error("Invalid request: Missing 'model' or 'messages' array.");
 
277
  }
278
 
279
  try {
280
+ // --- Prepare Input (unchanged) ---
281
  const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
282
  const falInput = {
283
  model: model,
284
  prompt: prompt,
285
  ...(system_prompt && { system_prompt: system_prompt }),
286
+ reasoning: !!reasoning,
287
  };
 
288
  console.log("Attempting Fal request with key rotation/retry...");
289
 
290
+ // --- Handle Stream vs Non-Stream using the retry helper ---
291
  if (stream) {
292
  res.setHeader('Content-Type', 'text/event-stream; charset=utf-8');
293
  res.setHeader('Cache-Control', 'no-cache');
294
  res.setHeader('Connection', 'keep-alive');
295
+ res.setHeader('Access-Control-Allow-Origin', '*');
296
  res.flushHeaders();
297
 
298
  let previousOutput = '';
299
+ let falStream; // Declare falStream here
300
 
301
  try {
302
+ // **MODIFIED: Initiate stream using the retry logic helper**
303
+ falStream = await makeFalRequestWithRetry(falInput, true);
304
 
305
+ // Process the stream events (logic mostly unchanged)
306
  for await (const event of falStream) {
307
  const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
308
  const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
309
  const errorInfo = (event && event.error) ? event.error : null;
310
 
311
+ // Handle errors *within* the stream payload
312
+ if (errorInfo) {
313
  console.error("Error received *within* fal stream event:", errorInfo);
314
+ // Optionally send an error chunk (check if needed)
315
+ const errorChunk = { /* ... error chunk details ... */ };
 
316
  if (!res.writableEnded) {
317
  res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
318
+ } else { console.warn("Stream ended before writing event error."); }
319
+ // Decide if this stream error should also invalidate the key
320
+ // Note: The key might have already been marked invalid if the error happened during initial connection
321
+ // You might need more context to decide if an *in-stream* error means the key is bad.
322
+ // For now, we just log it. If it causes the stream to terminate prematurely,
323
+ // the outer catch might handle it, but key invalidation might not occur unless the
324
+ // error object passed to the catch block triggers isKeyRelatedError.
325
  }
326
 
327
+ // Calculate delta (logic unchanged)
328
  let deltaContent = '';
329
  if (currentOutput.startsWith(previousOutput)) {
330
+ deltaContent = currentOutput.substring(previousOutput.length);
331
  } else if (currentOutput.length > 0) {
 
332
  deltaContent = currentOutput;
333
+ previousOutput = '';
334
  }
335
  previousOutput = currentOutput;
336
 
337
+ // Send OpenAI compatible chunk (logic unchanged)
338
+ if (deltaContent || !isPartial) {
339
  const openAIChunk = {
340
+ id: `chatcmpl-${Date.now()}`,
341
  object: "chat.completion.chunk",
342
  created: Math.floor(Date.now() / 1000),
343
+ model: model,
344
+ choices: [{ index: 0, delta: { content: deltaContent }, finish_reason: isPartial === false ? "stop" : null }]
 
 
 
 
345
  };
 
346
  if (!res.writableEnded) {
347
  res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
348
+ } else { console.warn("Stream ended before writing data chunk."); }
 
 
349
  }
350
+ } // End for-await loop
351
+
352
+ // Send the final [DONE] marker (logic unchanged)
353
+ if (!res.writableEnded) {
354
+ res.write(`data: [DONE]\n\n`);
355
+ res.end();
356
+ console.log("<-- Stream finished successfully.");
357
+ } else {
358
+ console.log("<-- Stream finished, but connection was already ended.");
359
+ }
360
 
361
  } catch (streamError) {
362
+ // **MODIFIED: Catches errors from makeFalRequestWithRetry OR stream iteration**
363
+ // This error could be a non-key-related error, or the "all keys failed" error.
364
  console.error('Error during stream request processing:', streamError.message || streamError);
365
  try {
366
  if (!res.headersSent) {
367
  // Error likely occurred in makeFalRequestWithRetry before stream started
368
+ const errorMessage = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
369
+ res.status(502).json({ // 502 Bad Gateway is appropriate for upstream failure
370
  error: 'Failed to initiate Fal stream',
371
+ details: errorMessage
372
  });
373
  console.log("<-- Stream initiation failed response sent.");
374
  } else if (!res.writableEnded) {
375
  // Stream started but failed during processing
376
  const errorDetails = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
 
377
  res.write(`data: ${JSON.stringify({ error: { message: "Stream processing error after initiation", type: "proxy_error", details: errorDetails } })}\n\n`);
378
+ res.write(`data: [DONE]\n\n`); // Still send DONE for client handling
379
  res.end();
380
  console.log("<-- Stream error sent, stream ended.");
381
  } else {
 
383
  }
384
  } catch (finalError) {
385
  console.error('Error sending stream error message to client:', finalError);
 
386
  if (!res.writableEnded) { res.end(); }
387
  }
388
  }
 
390
  } else {
391
  // --- Non-Stream ---
392
  try {
393
+ // **MODIFIED: Get the result using the retry logic helper**
394
  const result = await makeFalRequestWithRetry(falInput, false);
395
+ // console.log("Received non-stream result via retry function:", JSON.stringify(result, null, 2));
396
 
397
+ // Construct OpenAI compatible response (logic unchanged)
398
  const openAIResponse = {
399
  id: `chatcmpl-${result.requestId || Date.now()}`,
400
  object: "chat.completion",
401
  created: Math.floor(Date.now() / 1000),
402
+ model: model,
403
+ choices: [{ index: 0, message: { role: "assistant", content: result.output || "" }, finish_reason: "stop" }],
404
+ usage: { prompt_tokens: null, completion_tokens: null, total_tokens: null },
405
+ system_fingerprint: null,
406
+ ...(result.reasoning && { fal_reasoning: result.reasoning }),
 
 
 
 
 
 
 
 
 
 
 
407
  };
408
 
409
  res.json(openAIResponse);
410
  console.log("<-- Non-stream response sent successfully.");
411
 
412
  } catch (error) {
413
+ // **MODIFIED: Catches errors from makeFalRequestWithRetry**
414
+ // This error could be a non-key-related error, or the "all keys failed" error.
415
  console.error('Error during non-stream request processing:', error.message || error);
416
  if (!res.headersSent) {
417
  const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
418
  // Check if it was the "all keys failed" error
419
  const finalMessage = errorMessage.includes("No valid Fal AI keys available") || errorMessage.includes("Request failed after trying")
420
+ ? `Fal request failed: ${errorMessage}` // Simplified message
421
  : `Internal Server Error processing Fal request: ${errorMessage}`;
422
+ res.status(502).json({ error: 'Fal Request Failed', details: finalMessage }); // 502 Bad Gateway
 
423
  console.log("<-- Non-stream error response sent.");
424
  } else {
 
425
  console.error("Headers already sent for non-stream error? This is unexpected.");
426
  if (!res.writableEnded) { res.end(); }
427
  }
 
450
  console.log(` Listening on port : ${PORT}`);
451
  console.log(` Reading Fal Keys from : FAL_KEY environment variable (comma-separated)`);
452
  console.log(` Loaded Keys Count : ${falKeys.length}`);
453
+ console.log(` Invalid Keys Set : Initialized (size: ${invalidKeys.size})`); // Show invalid set size
454
  console.log(` API Key Auth : ${API_KEY ? 'Enabled (using API_KEY env var)' : 'Disabled'}`);
455
  console.log(` Input Limits : System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
456
+ console.log(` Concurrency Warning : Global Fal client reconfigured per request attempt!`);
457
  console.log(`---------------------------------------------------------------------`);
458
  console.log(` Endpoints:`);
459
  console.log(` POST http://localhost:${PORT}/v1/chat/completions`);
 
463
 
464
  // 根路径响应 (Updated message)
465
  app.get('/', (req, res) => {
466
+ res.send(`Fal OpenAI Proxy (Multi-Key Rotation from FAL_KEY) is running. Loaded ${falKeys.length} key(s). Currently ${invalidKeys.size} key(s) marked as invalid.`); // Show invalid count
467
  });