Calmlo commited on
Commit
9df5dcc
·
verified ·
1 Parent(s): e6d9a41

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +434 -175
server.js CHANGED
@@ -1,16 +1,17 @@
1
  import express from 'express';
2
- // **CRITICAL: Import the 'fal' object directly for configuration**
3
- import { fal } from '@fal-ai/client'; // Make sure 'fal' is the object you configure
4
 
5
  // --- Key Management Setup ---
6
  // Read comma-separated keys from the SINGLE environment variable FAL_KEY
7
  const FAL_KEY_STRING = process.env.FAL_KEY;
8
- const API_KEY = process.env.API_KEY; // Custom API Key for proxy auth
 
9
 
10
  if (!FAL_KEY_STRING) {
11
- console.error("Error: FAL_KEY environment variable is not set.");
12
  console.error("Ensure FAL_KEY contains a comma-separated list of your Fal AI keys.");
13
- process.exit(1);
14
  }
15
 
16
  // Parse the comma-separated keys from FAL_KEY_STRING
@@ -19,23 +20,27 @@ const falKeys = FAL_KEY_STRING.split(',')
19
  .filter(key => key.length > 0); // Remove any empty strings resulting from extra commas
20
 
21
  if (falKeys.length === 0) {
22
- console.error("Error: No valid FAL keys found in the FAL_KEY environment variable after parsing.");
23
  console.error("Ensure FAL_KEY is a comma-separated list, e.g., 'key1,key2,key3'.");
24
- process.exit(1);
25
  }
26
 
27
  if (!API_KEY) {
28
- console.error("Error: API_KEY environment variable is not set.");
29
- process.exit(1);
30
  }
31
 
32
  let currentKeyIndex = 0;
33
- // **NEW: Keep track of keys that failed persistently during runtime**
34
  const invalidKeys = new Set();
35
 
36
  console.log(`Loaded ${falKeys.length} Fal AI Key(s) from the FAL_KEY environment variable.`);
37
 
38
- // **MODIFIED: Function to get the next *valid* key**
 
 
 
 
39
  function getNextValidKey() {
40
  // Check if all keys have been marked as invalid
41
  if (invalidKeys.size >= falKeys.length) {
@@ -58,16 +63,14 @@ function getNextValidKey() {
58
  console.log(`Using Fal Key index: ${keyIndex} (from FAL_KEY list)`);
59
  return { key, index: keyIndex }; // Return the key and its original index
60
  } else {
61
- console.log(`Skipping invalid Fal Key index: ${keyIndex}`);
62
  }
63
 
64
  attempts++;
65
- // If the loop started at an invalid key and wrapped around,
66
- // check if we've checked all keys since the start
67
  if (currentKeyIndex === initialIndex && attempts > 0) {
68
- // This check might be redundant due to the invalidKeys.size check at the top,
69
- // but serves as an extra safeguard.
70
- console.warn("Looped through all keys, potentially all are invalid.");
71
  break;
72
  }
73
  }
@@ -77,55 +80,51 @@ function getNextValidKey() {
77
  return null;
78
  }
79
 
80
- // **NEW/MODIFIED: Function to check if an error is likely related to a bad key**
81
- // NOTE: This is a heuristic. You might need to adjust based on actual errors from Fal AI.
 
 
 
 
82
  function isKeyRelatedError(error) {
83
- // Check if error itself is null/undefined
84
- if (!error) return false;
85
 
86
  const message = error.message?.toLowerCase() || '';
87
- // Check if the error object has a 'status' property (common in HTTP errors)
88
- const status = error.status || error.statusCode; // Check common status properties
89
 
90
- // Check for specific HTTP status codes indicative of auth/permission issues
91
- // 401: Unauthorized, 403: Forbidden, 429: Too Many Requests (often quota related)
92
  if (status === 401 || status === 403 || status === 429) {
93
  console.warn(`Detected potential key-related error (HTTP Status: ${status}).`);
94
  return true;
95
  }
96
 
97
  // Check for common error message patterns (case-insensitive)
98
- // Add more specific Fal AI error messages as you encounter them
99
- if (message.includes('invalid api key') ||
100
- message.includes('authentication failed') ||
101
- message.includes('permission denied') ||
102
- message.includes('quota exceeded') || // Treat quota errors as key-related for rotation
103
- message.includes('forbidden') ||
104
- message.includes('unauthorized') ||
105
- message.includes('rate limit') || // Often linked to key limits
106
- message.includes('credentials')) // Generic credential errors
107
- {
108
- console.warn(`Detected potential key-related error (message: "${message}")`);
109
  return true;
110
  }
111
 
112
  // Add more specific checks based on observed Fal AI errors if needed
113
- // Example: Check for specific error codes if Fal AI provides them
114
- // if (error.code === 'SOME_FAL_AUTH_ERROR_CODE') {
115
- // return true;
116
- // }
117
 
118
  return false;
119
  }
120
  // --- End Key Management Setup ---
121
 
122
  const app = express();
 
123
  app.use(express.json({ limit: '50mb' }));
124
  app.use(express.urlencoded({ extended: true, limit: '50mb' }));
125
 
126
  const PORT = process.env.PORT || 3000;
127
 
128
- // API Key 鉴权中间件 (unchanged)
129
  const apiKeyAuth = (req, res, next) => {
130
  const authHeader = req.headers['authorization'];
131
 
@@ -134,65 +133,279 @@ const apiKeyAuth = (req, res, next) => {
134
  return res.status(401).json({ error: 'Unauthorized: No API Key provided' });
135
  }
136
 
 
137
  const authParts = authHeader.split(' ');
138
  if (authParts.length !== 2 || authParts[0].toLowerCase() !== 'bearer') {
139
- console.warn('Unauthorized: Invalid Authorization header format');
140
  return res.status(401).json({ error: 'Unauthorized: Invalid Authorization header format' });
141
  }
142
 
143
  const providedKey = authParts[1];
144
  if (providedKey !== API_KEY) {
145
- console.warn('Unauthorized: Invalid API Key');
146
  return res.status(401).json({ error: 'Unauthorized: Invalid API Key' });
147
  }
148
 
 
149
  next();
150
  };
151
 
 
152
  app.use(['/v1/models', '/v1/chat/completions'], apiKeyAuth);
153
 
154
- // === 全局定义限制 === (unchanged)
155
- const PROMPT_LIMIT = 4800;
156
- const SYSTEM_PROMPT_LIMIT = 4800;
157
- // === 限制定义结束 ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
- // 定义 fal-ai/any-llm 支持的模型列表 (unchanged)
160
- const FAL_SUPPORTED_MODELS = [ /* ... model list ... */ ];
161
- // Helper function getOwner (unchanged)
162
- const getOwner = (modelId) => { /* ... */ };
163
- // GET /v1/models endpoint (unchanged)
164
- app.get('/v1/models', (req, res) => { /* ... */ });
165
- // convertMessagesToFalPrompt 函数 (unchanged)
166
- function convertMessagesToFalPrompt(messages) { /* ... */ }
167
- // === convertMessagesToFalPrompt 函数结束 ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
- // --- NEW: Helper function to make Fal AI request with retries ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  async function makeFalRequestWithRetry(falInput, stream = false) {
172
  let attempts = 0;
173
- // Max attempts should be the total number of keys initially available
174
  const maxAttempts = falKeys.length;
175
- // Keep track of keys tried *within this specific request attempt*
176
- // This prevents infinite loops if getNextValidKey had issues,
177
- // and ensures we try each *available* key at most once per request.
178
  const attemptedKeysInThisRequest = new Set();
179
 
180
  while (attempts < maxAttempts) {
181
  const keyInfo = getNextValidKey(); // Get the next *valid* key info { key, index }
182
 
183
  if (!keyInfo) {
184
- // This should only happen if all keys are in the invalidKeys set
185
  console.error("makeFalRequestWithRetry: No valid Fal AI keys remaining.");
186
  throw new Error("No valid Fal AI keys available (all marked as invalid).");
187
  }
188
 
189
- // Prevent retrying the exact same key if getNextValidKey logic had an issue
190
- // or if a key wasn't marked invalid correctly on a previous attempt within this request.
191
  if (attemptedKeysInThisRequest.has(keyInfo.key)) {
192
- console.warn(`Key at index ${keyInfo.index} was already attempted for this request. Skipping to find next.`);
193
- // Don't increment 'attempts' here as we didn't use the key. Let the loop find the next.
194
- // If all keys end up being skipped, the `!keyInfo` check or `attempts < maxAttempts` will eventually handle it.
195
- continue;
196
  }
197
  attemptedKeysInThisRequest.add(keyInfo.key);
198
  attempts++; // Count this as a distinct attempt with a unique key for this request
@@ -200,46 +413,47 @@ async function makeFalRequestWithRetry(falInput, stream = false) {
200
  try {
201
  console.log(`Attempt ${attempts}/${maxAttempts}: Trying Fal Key index ${keyInfo.index}...`);
202
 
203
- // *** CRITICAL: Reconfigure the global fal client with the selected key for this attempt ***
204
- // Warning: This reconfigures the GLOBAL client. If you have many concurrent requests,
205
- // this could lead to race conditions. Consider instance isolation or a pool manager
206
- // for high-concurrency scenarios if the library doesn't support per-request credentials easily.
207
- console.warn(`Configuring GLOBAL fal client with key index ${keyInfo.index}. Ensure this is safe for your concurrency model.`);
208
  fal.config({ credentials: keyInfo.key }); // Use the specific key for this attempt
209
 
210
  if (stream) {
211
- // Use the configured global 'fal' object for the stream request
212
  const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
213
  console.log(`Successfully initiated stream with key index ${keyInfo.index}.`);
214
- // Success! Return the stream iterator directly
215
  return falStream;
216
  } else {
217
- // Use the configured global 'fal' object for the non-stream request
218
  console.log(`Executing non-stream request with key index ${keyInfo.index}...`);
219
- // Assuming fal.subscribe or similar method for non-streaming
220
- // Adapt this line if your non-stream method is different
221
- const result = await fal.subscribe("fal-ai/any-llm", { input: falInput, logs: true });
 
 
222
  console.log(`Successfully received non-stream result with key index ${keyInfo.index}.`);
223
 
224
- // Check for errors *within* the successful response structure (if applicable)
225
  if (result && result.error) {
226
- console.error(`Fal-ai returned an error in the result payload (Key Index ${keyInfo.index}):`, result.error);
227
  // Decide if this specific payload error should also invalidate the key
228
  if (isKeyRelatedError(result.error)) { // Reuse the checker
229
  console.warn(`Marking Fal Key index ${keyInfo.index} as invalid due to error in response payload.`);
230
  invalidKeys.add(keyInfo.key);
231
- // Continue the loop to try the next key
232
- continue; // Go to the next iteration of the while loop
233
  } else {
234
- // Throw an error that will be caught by the outer handler, not retried
235
- throw new Error(`Fal-ai error in result payload: ${JSON.stringify(result.error)}`);
236
  }
237
  }
238
- // Success! Return the result
239
  return result;
240
  }
241
  } catch (error) {
242
- console.error(`Error using Fal Key index ${keyInfo.index}:`, error.message || error);
243
 
244
  // Check if the caught error indicates the key is invalid
245
  if (isKeyRelatedError(error)) {
@@ -248,161 +462,202 @@ async function makeFalRequestWithRetry(falInput, stream = false) {
248
  invalidKeys.add(keyInfo.key);
249
  // Continue to the next iteration of the while loop to try another key
250
  } else {
251
- // Error is not key-related (e.g., network issue, bad input, internal Fal error)
252
- // Don't retry with other keys for this type of error. Fail the request immediately.
253
  console.error("Error does not appear to be key-related. Failing request without further key retries.");
254
  throw error; // Re-throw the original error to be caught by the main endpoint handler
255
  }
256
  }
257
  } // End while loop
258
 
259
- // If the loop finishes, it means all available keys were tried and failed with key-related errors
260
  throw new Error(`Request failed after trying ${attempts} unique Fal key(s). All failed with key-related errors or were already marked invalid.`);
261
  }
262
 
263
 
264
- // POST /v1/chat/completions endpoint (MODIFIED to use retry logic)
265
  app.post('/v1/chat/completions', async (req, res) => {
266
- const { model, messages, stream = false, reasoning = false, /* ...restOpenAIParams */ } = req.body;
 
267
 
268
  console.log(`--> POST /v1/chat/completions | Model: ${model} | Stream: ${stream}`);
269
 
270
- // Basic Validation (unchanged)
271
  if (!FAL_SUPPORTED_MODELS.includes(model)) {
272
- console.warn(`Warning: Requested model '${model}' is not in the explicitly supported list. Proxy will still attempt.`);
 
273
  }
274
  if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
275
- console.error("Invalid request: Missing 'model' or 'messages' array.");
276
- return res.status(400).json({ error: 'Missing or invalid parameters: model and messages array are required.' });
277
  }
278
 
279
  try {
280
- // --- Prepare Input (unchanged) ---
 
281
  const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
 
282
  const falInput = {
283
- model: model,
284
- prompt: prompt,
285
- ...(system_prompt && { system_prompt: system_prompt }),
286
- reasoning: !!reasoning,
 
287
  };
288
- console.log("Attempting Fal request with key rotation/retry...");
289
 
290
- // --- Handle Stream vs Non-Stream using the retry helper ---
 
 
 
 
291
  if (stream) {
 
292
  res.setHeader('Content-Type', 'text/event-stream; charset=utf-8');
293
  res.setHeader('Cache-Control', 'no-cache');
294
  res.setHeader('Connection', 'keep-alive');
295
- res.setHeader('Access-Control-Allow-Origin', '*');
296
- res.flushHeaders();
297
 
298
- let previousOutput = '';
299
- let falStream; // Declare falStream here
300
 
301
  try {
302
- // **MODIFIED: Initiate stream using the retry logic helper**
303
  falStream = await makeFalRequestWithRetry(falInput, true);
304
 
305
- // Process the stream events (logic mostly unchanged)
306
  for await (const event of falStream) {
 
307
  const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
 
308
  const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
309
- const errorInfo = (event && event.error) ? event.error : null;
310
 
311
- // Handle errors *within* the stream payload
312
  if (errorInfo) {
313
- console.error("Error received *within* fal stream event:", errorInfo);
314
- // Optionally send an error chunk (check if needed)
315
- const errorChunk = { /* ... error chunk details ... */ };
 
 
 
 
316
  if (!res.writableEnded) {
317
  res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
318
- } else { console.warn("Stream ended before writing event error."); }
319
- // Decide if this stream error should also invalidate the key
320
- // Note: The key might have already been marked invalid if the error happened during initial connection
321
- // You might need more context to decide if an *in-stream* error means the key is bad.
322
- // For now, we just log it. If it causes the stream to terminate prematurely,
323
- // the outer catch might handle it, but key invalidation might not occur unless the
324
- // error object passed to the catch block triggers isKeyRelatedError.
325
  }
326
 
327
- // Calculate delta (logic unchanged)
328
  let deltaContent = '';
329
  if (currentOutput.startsWith(previousOutput)) {
330
- deltaContent = currentOutput.substring(previousOutput.length);
 
331
  } else if (currentOutput.length > 0) {
332
- deltaContent = currentOutput;
333
- previousOutput = '';
334
- }
335
- previousOutput = currentOutput;
336
-
337
- // Send OpenAI compatible chunk (logic unchanged)
 
 
338
  if (deltaContent || !isPartial) {
339
  const openAIChunk = {
340
- id: `chatcmpl-${Date.now()}`,
341
  object: "chat.completion.chunk",
342
  created: Math.floor(Date.now() / 1000),
343
- model: model,
344
- choices: [{ index: 0, delta: { content: deltaContent }, finish_reason: isPartial === false ? "stop" : null }]
 
 
 
 
 
 
345
  };
 
346
  if (!res.writableEnded) {
347
  res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
348
- } else { console.warn("Stream ended before writing data chunk."); }
 
 
349
  }
350
- } // End for-await loop
351
 
352
- // Send the final [DONE] marker (logic unchanged)
353
  if (!res.writableEnded) {
354
  res.write(`data: [DONE]\n\n`);
355
- res.end();
356
- console.log("<-- Stream finished successfully.");
357
  } else {
358
- console.log("<-- Stream finished, but connection was already ended.");
359
  }
360
 
361
  } catch (streamError) {
362
- // **MODIFIED: Catches errors from makeFalRequestWithRetry OR stream iteration**
363
- // This error could be a non-key-related error, or the "all keys failed" error.
364
  console.error('Error during stream request processing:', streamError.message || streamError);
365
  try {
 
366
  if (!res.headersSent) {
367
- // Error likely occurred in makeFalRequestWithRetry before stream started
368
  const errorMessage = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
369
- res.status(502).json({ // 502 Bad Gateway is appropriate for upstream failure
 
370
  error: 'Failed to initiate Fal stream',
371
- details: errorMessage
372
  });
373
- console.log("<-- Stream initiation failed response sent.");
374
  } else if (!res.writableEnded) {
375
- // Stream started but failed during processing
376
  const errorDetails = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
 
377
  res.write(`data: ${JSON.stringify({ error: { message: "Stream processing error after initiation", type: "proxy_error", details: errorDetails } })}\n\n`);
378
- res.write(`data: [DONE]\n\n`); // Still send DONE for client handling
379
  res.end();
380
- console.log("<-- Stream error sent, stream ended.");
381
  } else {
382
- console.log("<-- Stream error occurred, but connection already ended.");
 
383
  }
384
  } catch (finalError) {
 
385
  console.error('Error sending stream error message to client:', finalError);
 
386
  if (!res.writableEnded) { res.end(); }
387
  }
388
  }
389
 
390
  } else {
391
- // --- Non-Stream ---
392
  try {
393
- // **MODIFIED: Get the result using the retry logic helper**
394
  const result = await makeFalRequestWithRetry(falInput, false);
395
- // console.log("Received non-stream result via retry function:", JSON.stringify(result, null, 2));
396
 
397
- // Construct OpenAI compatible response (logic unchanged)
398
  const openAIResponse = {
399
- id: `chatcmpl-${result.requestId || Date.now()}`,
400
  object: "chat.completion",
401
  created: Math.floor(Date.now() / 1000),
402
- model: model,
403
- choices: [{ index: 0, message: { role: "assistant", content: result.output || "" }, finish_reason: "stop" }],
404
- usage: { prompt_tokens: null, completion_tokens: null, total_tokens: null },
405
- system_fingerprint: null,
 
 
 
 
 
 
 
 
 
 
 
 
406
  ...(result.reasoning && { fal_reasoning: result.reasoning }),
407
  };
408
 
@@ -410,31 +665,35 @@ app.post('/v1/chat/completions', async (req, res) => {
410
  console.log("<-- Non-stream response sent successfully.");
411
 
412
  } catch (error) {
413
- // **MODIFIED: Catches errors from makeFalRequestWithRetry**
414
- // This error could be a non-key-related error, or the "all keys failed" error.
415
  console.error('Error during non-stream request processing:', error.message || error);
 
416
  if (!res.headersSent) {
417
  const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
418
- // Check if it was the "all keys failed" error
419
  const finalMessage = errorMessage.includes("No valid Fal AI keys available") || errorMessage.includes("Request failed after trying")
420
- ? `Fal request failed: ${errorMessage}` // Simplified message
421
  : `Internal Server Error processing Fal request: ${errorMessage}`;
422
- res.status(502).json({ error: 'Fal Request Failed', details: finalMessage }); // 502 Bad Gateway
423
- console.log("<-- Non-stream error response sent.");
 
424
  } else {
425
- console.error("Headers already sent for non-stream error? This is unexpected.");
426
- if (!res.writableEnded) { res.end(); }
 
427
  }
428
  }
429
  }
430
 
431
  } catch (error) {
432
- // Catch errors from parameter validation or prompt conversion *before* calling Fal
433
- console.error('Unhandled error before initiating Fal request:', error.message || error);
 
434
  if (!res.headersSent) {
435
  const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
 
436
  res.status(500).json({ error: 'Internal Server Error in Proxy Setup', details: errorMessage });
437
- console.log("<-- Proxy setup error response sent.");
438
  } else {
439
  console.error("Headers already sent when catching setup error. Ending response.");
440
  if (!res.writableEnded) { res.end(); }
@@ -442,26 +701,26 @@ app.post('/v1/chat/completions', async (req, res) => {
442
  }
443
  });
444
 
445
- // 启动服务器 (Updated startup message)
446
  app.listen(PORT, () => {
447
  console.log(`=====================================================================`);
448
  console.log(` Fal OpenAI Proxy Server (Multi-Key Rotation & Failover)`);
449
  console.log(`---------------------------------------------------------------------`);
450
- console.log(` Listening on port : ${PORT}`);
451
  console.log(` Reading Fal Keys from : FAL_KEY environment variable (comma-separated)`);
452
  console.log(` Loaded Keys Count : ${falKeys.length}`);
453
- console.log(` Invalid Keys Set : Initialized (size: ${invalidKeys.size})`); // Show invalid set size
454
- console.log(` API Key Auth : ${API_KEY ? 'Enabled (using API_KEY env var)' : 'Disabled'}`);
455
  console.log(` Input Limits : System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
456
  console.log(` Concurrency Warning : Global Fal client reconfigured per request attempt!`);
457
  console.log(`---------------------------------------------------------------------`);
458
- console.log(` Endpoints:`);
459
  console.log(` POST http://localhost:${PORT}/v1/chat/completions`);
460
  console.log(` GET http://localhost:${PORT}/v1/models`);
461
  console.log(`=====================================================================`);
462
  });
463
 
464
- // 根路径响应 (Updated message)
465
  app.get('/', (req, res) => {
466
- res.send(`Fal OpenAI Proxy (Multi-Key Rotation from FAL_KEY) is running. Loaded ${falKeys.length} key(s). Currently ${invalidKeys.size} key(s) marked as invalid.`); // Show invalid count
467
  });
 
1
  import express from 'express';
2
+ // Import the 'fal' object directly for configuration within the retry loop
3
+ import { fal } from '@fal-ai/client';
4
 
5
  // --- Key Management Setup ---
6
  // Read comma-separated keys from the SINGLE environment variable FAL_KEY
7
  const FAL_KEY_STRING = process.env.FAL_KEY;
8
+ // Read the custom API Key for proxy authentication
9
+ const API_KEY = process.env.API_KEY;
10
 
11
  if (!FAL_KEY_STRING) {
12
+ console.error("ERROR: FAL_KEY environment variable is not set.");
13
  console.error("Ensure FAL_KEY contains a comma-separated list of your Fal AI keys.");
14
+ process.exit(1); // Exit if no Fal keys are provided
15
  }
16
 
17
  // Parse the comma-separated keys from FAL_KEY_STRING
 
20
  .filter(key => key.length > 0); // Remove any empty strings resulting from extra commas
21
 
22
  if (falKeys.length === 0) {
23
+ console.error("ERROR: No valid Fal keys found in the FAL_KEY environment variable after parsing.");
24
  console.error("Ensure FAL_KEY is a comma-separated list, e.g., 'key1,key2,key3'.");
25
+ process.exit(1); // Exit if parsing results in zero valid keys
26
  }
27
 
28
  if (!API_KEY) {
29
+ console.error("ERROR: API_KEY environment variable is not set.");
30
+ process.exit(1); // Exit if the proxy auth key is missing
31
  }
32
 
33
  let currentKeyIndex = 0;
34
+ // Keep track of keys that failed persistently during runtime
35
  const invalidKeys = new Set();
36
 
37
  console.log(`Loaded ${falKeys.length} Fal AI Key(s) from the FAL_KEY environment variable.`);
38
 
39
+ /**
40
+ * Gets the next available valid Fal AI key in a round-robin fashion.
41
+ * Skips keys that have been marked as invalid.
42
+ * @returns {object|null} An object containing the key and its original index { key, index }, or null if no valid keys remain.
43
+ */
44
  function getNextValidKey() {
45
  // Check if all keys have been marked as invalid
46
  if (invalidKeys.size >= falKeys.length) {
 
63
  console.log(`Using Fal Key index: ${keyIndex} (from FAL_KEY list)`);
64
  return { key, index: keyIndex }; // Return the key and its original index
65
  } else {
66
+ console.log(`Skipping invalid Fal Key index: ${keyIndex}`);
67
  }
68
 
69
  attempts++;
70
+ // Safety check: If we've looped back to the start after trying, break.
71
+ // This is mostly redundant due to the invalidKeys.size check, but acts as a safeguard.
72
  if (currentKeyIndex === initialIndex && attempts > 0) {
73
+ console.warn("Looped through all keys, potentially all are invalid.");
 
 
74
  break;
75
  }
76
  }
 
80
  return null;
81
  }
82
 
83
+ /**
84
+ * Checks if an error object likely indicates an issue with the Fal AI API key.
85
+ * This is heuristic-based and may need refinement based on observed Fal errors.
86
+ * @param {Error|object} error - The error object caught.
87
+ * @returns {boolean} True if the error seems key-related, false otherwise.
88
+ */
89
  function isKeyRelatedError(error) {
90
+ if (!error) return false; // Handle null/undefined errors
 
91
 
92
  const message = error.message?.toLowerCase() || '';
93
+ // Check common HTTP status properties
94
+ const status = error.status || error.statusCode;
95
 
96
+ // Check for specific HTTP status codes (401: Unauthorized, 403: Forbidden, 429: Too Many Requests/Quota)
 
97
  if (status === 401 || status === 403 || status === 429) {
98
  console.warn(`Detected potential key-related error (HTTP Status: ${status}).`);
99
  return true;
100
  }
101
 
102
  // Check for common error message patterns (case-insensitive)
103
+ const keyErrorPatterns = [
104
+ 'invalid api key', 'authentication failed', 'permission denied',
105
+ 'quota exceeded', 'forbidden', 'unauthorized', 'rate limit',
106
+ 'credentials', 'api key missing', 'invalid credential'
107
+ ];
108
+ if (keyErrorPatterns.some(pattern => message.includes(pattern))) {
109
+ console.warn(`Detected potential key-related error (message contains relevant pattern: "${message}")`);
 
 
 
 
110
  return true;
111
  }
112
 
113
  // Add more specific checks based on observed Fal AI errors if needed
114
+ // e.g., if (error.code === 'FAL_AUTH_FAILURE') return true;
 
 
 
115
 
116
  return false;
117
  }
118
  // --- End Key Management Setup ---
119
 
120
  const app = express();
121
+ // Increase payload size limits if needed
122
  app.use(express.json({ limit: '50mb' }));
123
  app.use(express.urlencoded({ extended: true, limit: '50mb' }));
124
 
125
  const PORT = process.env.PORT || 3000;
126
 
127
+ // API Key Authentication Middleware
128
  const apiKeyAuth = (req, res, next) => {
129
  const authHeader = req.headers['authorization'];
130
 
 
133
  return res.status(401).json({ error: 'Unauthorized: No API Key provided' });
134
  }
135
 
136
+ // Expecting "Bearer YOUR_API_KEY"
137
  const authParts = authHeader.split(' ');
138
  if (authParts.length !== 2 || authParts[0].toLowerCase() !== 'bearer') {
139
+ console.warn('Unauthorized: Invalid Authorization header format. Expected "Bearer <key>".');
140
  return res.status(401).json({ error: 'Unauthorized: Invalid Authorization header format' });
141
  }
142
 
143
  const providedKey = authParts[1];
144
  if (providedKey !== API_KEY) {
145
+ console.warn('Unauthorized: Invalid API Key provided.');
146
  return res.status(401).json({ error: 'Unauthorized: Invalid API Key' });
147
  }
148
 
149
+ // Key is valid, proceed to the next middleware or route handler
150
  next();
151
  };
152
 
153
+ // Apply API Key Authentication to relevant endpoints
154
  app.use(['/v1/models', '/v1/chat/completions'], apiKeyAuth);
155
 
156
+ // === Global Limits Definition ===
157
+ const PROMPT_LIMIT = 4800; // Max length for the main 'prompt' field
158
+ const SYSTEM_PROMPT_LIMIT = 4800; // Max length for the 'system_prompt' field
159
+ // === End Limits Definition ===
160
+
161
+ // Define the list of models supported by fal-ai/any-llm (Update as needed)
162
+ const FAL_SUPPORTED_MODELS = [
163
+ "anthropic/claude-3.7-sonnet",
164
+ "anthropic/claude-3.5-sonnet",
165
+ "anthropic/claude-3-5-haiku",
166
+ "anthropic/claude-3-haiku",
167
+ "google/gemini-pro-1.5",
168
+ "google/gemini-flash-1.5",
169
+ "google/gemini-flash-1.5-8b",
170
+ "google/gemini-2.0-flash-001",
171
+ "meta-llama/llama-3.2-1b-instruct",
172
+ "meta-llama/llama-3.2-3b-instruct",
173
+ "meta-llama/llama-3.1-8b-instruct",
174
+ "meta-llama/llama-3.1-70b-instruct",
175
+ "openai/gpt-4o-mini",
176
+ "openai/gpt-4o",
177
+ "deepseek/deepseek-r1",
178
+ "meta-llama/llama-4-maverick",
179
+ "meta-llama/llama-4-scout"
180
+ // Add or remove models here
181
+ ];
182
+
183
+ // Helper function to extract the owner/organization from a model ID string
184
+ const getOwner = (modelId) => {
185
+ if (modelId && typeof modelId === 'string' && modelId.includes('/')) {
186
+ return modelId.split('/')[0];
187
+ }
188
+ // Default owner if format is unexpected or missing
189
+ return 'fal-ai';
190
+ }
191
 
192
+ // GET /v1/models endpoint - Returns the list of supported models
193
+ app.get('/v1/models', (req, res) => {
194
+ console.log("Received request for GET /v1/models");
195
+ try {
196
+ const modelsData = FAL_SUPPORTED_MODELS.map(modelId => ({
197
+ id: modelId,
198
+ object: "model",
199
+ created: Math.floor(Date.now() / 1000), // Use current timestamp
200
+ owned_by: getOwner(modelId)
201
+ }));
202
+ res.json({ object: "list", data: modelsData });
203
+ console.log("Successfully returned model list.");
204
+ } catch (error) {
205
+ console.error("Error processing GET /v1/models:", error);
206
+ res.status(500).json({ error: "Failed to retrieve model list." });
207
+ }
208
+ });
209
+
210
+
211
+ /**
212
+ * Converts OpenAI-style messages array to Fal AI's prompt and system_prompt format.
213
+ * Implements System prompt top-priority, separator, and recency-based history filling.
214
+ * Includes robustness checks for input validation and internal errors.
215
+ * @param {Array<object>} messages - Array of message objects ({ role: string, content: string }).
216
+ * @returns {object} An object containing { system_prompt: string, prompt: string }.
217
+ * @throws {Error} If input is invalid or an internal processing error occurs.
218
+ */
219
+ function convertMessagesToFalPrompt(messages) {
220
+ // console.log("Entering convertMessagesToFalPrompt with messages:", JSON.stringify(messages, null, 2)); // Debug log
221
+
222
+ // --- Input Validation ---
223
+ if (!Array.isArray(messages)) {
224
+ console.error("Error in convertMessagesToFalPrompt: Input 'messages' is not an array.");
225
+ throw new Error("Invalid input: 'messages' must be an array.");
226
+ }
227
+ if (messages.length === 0) {
228
+ console.warn("Warning in convertMessagesToFalPrompt: Input 'messages' array is empty.");
229
+ return { system_prompt: "", prompt: "" }; // Return empty if no messages
230
+ }
231
+ // --- End Input Validation ---
232
+
233
+ try { // Wrap main logic in try...catch for internal errors
234
+ let fixed_system_prompt_content = "";
235
+ const conversation_message_blocks = [];
236
+ // console.log(`Original messages count: ${messages.length}`);
237
+
238
+ // 1. Separate System messages, format User/Assistant messages
239
+ for (const message of messages) {
240
+ // Validate individual message structure
241
+ if (!message || typeof message !== 'object' || typeof message.role !== 'string') {
242
+ console.warn(`Skipping invalid message object in convertMessagesToFalPrompt: ${JSON.stringify(message)}`);
243
+ continue; // Skip malformed message
244
+ }
245
 
246
+ // Safely handle content (null/undefined become empty string)
247
+ let content = (message.content === null || message.content === undefined) ? "" : String(message.content);
248
+
249
+ switch (message.role) {
250
+ case 'system':
251
+ // Append all system messages together
252
+ fixed_system_prompt_content += `System: ${content}\n\n`;
253
+ break;
254
+ case 'user':
255
+ conversation_message_blocks.push(`Human: ${content}\n\n`);
256
+ break;
257
+ case 'assistant':
258
+ conversation_message_blocks.push(`Assistant: ${content}\n\n`);
259
+ break;
260
+ default:
261
+ // Log unsupported roles but continue processing
262
+ console.warn(`Unsupported role encountered in convertMessagesToFalPrompt: ${message.role}. Skipping message.`);
263
+ continue;
264
+ }
265
+ }
266
+
267
+ // 2. Truncate combined system messages if they exceed the limit
268
+ if (fixed_system_prompt_content.length > SYSTEM_PROMPT_LIMIT) {
269
+ const originalLength = fixed_system_prompt_content.length;
270
+ fixed_system_prompt_content = fixed_system_prompt_content.substring(0, SYSTEM_PROMPT_LIMIT);
271
+ console.warn(`Combined system messages truncated from ${originalLength} to ${SYSTEM_PROMPT_LIMIT} characters.`);
272
+ }
273
+ // Trim trailing whitespace from the fixed system content
274
+ fixed_system_prompt_content = fixed_system_prompt_content.trim();
275
+
276
+ // 3. Calculate remaining space in system_prompt for history
277
+ // Consider potential separator length later if needed
278
+ let space_occupied_by_fixed_system = 0;
279
+ if (fixed_system_prompt_content.length > 0) {
280
+ // Approximate space: content + potential separator overhead (\n\n...\n\n)
281
+ space_occupied_by_fixed_system = fixed_system_prompt_content.length + 4; // Heuristic for spacing
282
+ }
283
+ const remaining_system_limit = Math.max(0, SYSTEM_PROMPT_LIMIT - space_occupied_by_fixed_system);
284
+ // console.log(`Trimmed fixed system prompt length: ${fixed_system_prompt_content.length}. Approx remaining system history limit: ${remaining_system_limit}`);
285
+
286
+ // 4. Fill history backwards (recency): Prioritize 'prompt', then 'system_prompt' overflow
287
+ const prompt_history_blocks = []; // For the main 'prompt' field
288
+ const system_prompt_history_blocks = []; // For history overflowing into 'system_prompt'
289
+ let current_prompt_length = 0;
290
+ let current_system_history_length = 0;
291
+ let promptFull = (PROMPT_LIMIT <= 0); // Handle zero limit case
292
+ let systemHistoryFull = (remaining_system_limit <= 0);
293
+
294
+ // console.log(`Processing ${conversation_message_blocks.length} user/assistant messages for recency filling.`);
295
+ for (let i = conversation_message_blocks.length - 1; i >= 0; i--) {
296
+ const message_block = conversation_message_blocks[i];
297
+ // Ensure message_block is a string before getting length
298
+ const block_length = (typeof message_block === 'string') ? message_block.length : 0;
299
+
300
+ if (block_length === 0) continue; // Skip empty blocks
301
+
302
+ // If both slots are full, stop processing older messages
303
+ if (promptFull && systemHistoryFull) {
304
+ // console.log(`Both prompt and system history slots full. Omitting older messages from index ${i}.`);
305
+ break;
306
+ }
307
+
308
+ // Try fitting into the main 'prompt' first
309
+ if (!promptFull) {
310
+ if (current_prompt_length + block_length <= PROMPT_LIMIT) {
311
+ prompt_history_blocks.unshift(message_block); // Add to beginning
312
+ current_prompt_length += block_length;
313
+ continue; // Message placed, move to next older message
314
+ } else {
315
+ promptFull = true; // Main prompt is now full
316
+ // console.log(`Prompt limit (${PROMPT_LIMIT}) reached. Trying system history slot.`);
317
+ }
318
+ }
319
 
320
+ // If prompt is full, try fitting into the 'system_prompt' remaining space
321
+ if (!systemHistoryFull) {
322
+ if (current_system_history_length + block_length <= remaining_system_limit) {
323
+ system_prompt_history_blocks.unshift(message_block); // Add to beginning
324
+ current_system_history_length += block_length;
325
+ continue; // Message placed, move to next older message
326
+ } else {
327
+ systemHistoryFull = true; // System history slot is now full
328
+ // console.log(`System history limit (${remaining_system_limit}) reached.`);
329
+ }
330
+ }
331
+ }
332
+
333
+ // 5. Combine the final prompt and system_prompt parts
334
+ const system_prompt_history_content = system_prompt_history_blocks.join('').trim();
335
+ const final_prompt = prompt_history_blocks.join('').trim(); // Main prompt content
336
+
337
+ // Separator to distinguish fixed system prompt from overflow history
338
+ const SEPARATOR = "\n\n------- Earlier Conversation History -------\n\n";
339
+
340
+ let final_system_prompt = "";
341
+
342
+ // Check if we have content for each part
343
+ const hasFixedSystem = fixed_system_prompt_content.length > 0;
344
+ const hasSystemHistory = system_prompt_history_content.length > 0;
345
+
346
+ if (hasFixedSystem && hasSystemHistory) {
347
+ // Both parts exist: Combine with separator
348
+ final_system_prompt = fixed_system_prompt_content + SEPARATOR + system_prompt_history_content;
349
+ // console.log("Combining fixed system prompt and history with separator.");
350
+ } else if (hasFixedSystem) {
351
+ // Only fixed system prompt exists
352
+ final_system_prompt = fixed_system_prompt_content;
353
+ // console.log("Using only fixed system prompt.");
354
+ } else if (hasSystemHistory) {
355
+ // Only overflow history exists (fixed system prompt was empty)
356
+ final_system_prompt = system_prompt_history_content;
357
+ // console.log("Using only history in system prompt slot.");
358
+ }
359
+ // If both are empty, final_system_prompt remains ""
360
+
361
+ // 6. Return the structured result
362
+ const result = {
363
+ system_prompt: final_system_prompt,
364
+ prompt: final_prompt
365
+ };
366
+
367
+ // console.log(`Final system_prompt length: ${result.system_prompt.length}`); // Debug log
368
+ // console.log(`Final prompt length: ${result.prompt.length}`); // Debug log
369
+ return result;
370
+
371
+ } catch (internalError) {
372
+ console.error("!!! CRITICAL ERROR inside convertMessagesToFalPrompt:", internalError);
373
+ console.error("!!! Failing messages input was:", JSON.stringify(messages, null, 2)); // Log the problematic input
374
+ // Re-throw the error to be caught by the main handler, indicating a setup failure
375
+ throw new Error(`Failed to process messages internally: ${internalError.message}`);
376
+ }
377
+ }
378
+ // === End convertMessagesToFalPrompt function ===
379
+
380
+
381
+ /**
382
+ * Makes a request to the Fal AI API, handling key rotation and retries on key-related errors.
383
+ * @param {object} falInput - The input object for the Fal AI API call.
384
+ * @param {boolean} [stream=false] - Whether to make a streaming request.
385
+ * @returns {Promise<object|AsyncIterable<object>>} The result object or async iterator for streams.
386
+ * @throws {Error} If the request fails after trying all valid keys, or if a non-key-related error occurs.
387
+ */
388
  async function makeFalRequestWithRetry(falInput, stream = false) {
389
  let attempts = 0;
390
+ // Maximum attempts equals the initial number of keys
391
  const maxAttempts = falKeys.length;
392
+ // Track keys attempted *within this specific request* to avoid infinite loops on retry logic issues
 
 
393
  const attemptedKeysInThisRequest = new Set();
394
 
395
  while (attempts < maxAttempts) {
396
  const keyInfo = getNextValidKey(); // Get the next *valid* key info { key, index }
397
 
398
  if (!keyInfo) {
399
+ // This should only happen if all keys are currently in the invalidKeys set
400
  console.error("makeFalRequestWithRetry: No valid Fal AI keys remaining.");
401
  throw new Error("No valid Fal AI keys available (all marked as invalid).");
402
  }
403
 
404
+ // Prevent retrying the exact same key within this single request flow
 
405
  if (attemptedKeysInThisRequest.has(keyInfo.key)) {
406
+ console.warn(`Key at index ${keyInfo.index} was already attempted for this request. Skipping to find next different key.`);
407
+ // Do not increment 'attempts' here as we didn't actually use the key. Let the loop find the next.
408
+ continue;
 
409
  }
410
  attemptedKeysInThisRequest.add(keyInfo.key);
411
  attempts++; // Count this as a distinct attempt with a unique key for this request
 
413
  try {
414
  console.log(`Attempt ${attempts}/${maxAttempts}: Trying Fal Key index ${keyInfo.index}...`);
415
 
416
+ // *** CRITICAL: Reconfigure the global fal client with the selected key for THIS attempt ***
417
+ // WARNING: This reconfigures the GLOBAL client instance. In high-concurrency scenarios,
418
+ // this might lead to race conditions where one request configures the key just before another uses it.
419
+ // Consider instance pooling or check if fal-ai client offers per-request credentials if this becomes an issue.
420
+ console.warn(`Configuring GLOBAL fal client with key index ${keyInfo.index}. Review concurrency implications.`);
421
  fal.config({ credentials: keyInfo.key }); // Use the specific key for this attempt
422
 
423
  if (stream) {
424
+ // Use the now-configured global 'fal' object for the stream request
425
  const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
426
  console.log(`Successfully initiated stream with key index ${keyInfo.index}.`);
427
+ // Success! Return the stream iterator directly for the caller to handle
428
  return falStream;
429
  } else {
430
+ // Use the now-configured global 'fal' object for the non-stream request
431
  console.log(`Executing non-stream request with key index ${keyInfo.index}...`);
432
+ // Use fal.subscribe (or appropriate non-stream method)
433
+ const result = await fal.subscribe("fal-ai/any-llm", {
434
+ input: falInput,
435
+ logs: true // Enable logs if helpful for debugging Fal side
436
+ });
437
  console.log(`Successfully received non-stream result with key index ${keyInfo.index}.`);
438
 
439
+ // Optional: Check for errors *within* a successful-looking response structure
440
  if (result && result.error) {
441
+ console.error(`Fal AI returned an error object within the non-stream result payload (Key Index ${keyInfo.index}):`, result.error);
442
  // Decide if this specific payload error should also invalidate the key
443
  if (isKeyRelatedError(result.error)) { // Reuse the checker
444
  console.warn(`Marking Fal Key index ${keyInfo.index} as invalid due to error in response payload.`);
445
  invalidKeys.add(keyInfo.key);
446
+ continue; // Go to the next iteration of the while loop (try next key)
 
447
  } else {
448
+ // Throw an error that will be caught by the outer handler, not retried with other keys
449
+ throw new Error(`Fal AI error reported in result payload: ${JSON.stringify(result.error)}`);
450
  }
451
  }
452
+ // Success! Return the result
453
  return result;
454
  }
455
  } catch (error) {
456
+ console.error(`Error caught using Fal Key index ${keyInfo.index}:`, error.message || error);
457
 
458
  // Check if the caught error indicates the key is invalid
459
  if (isKeyRelatedError(error)) {
 
462
  invalidKeys.add(keyInfo.key);
463
  // Continue to the next iteration of the while loop to try another key
464
  } else {
465
+ // Error does not appear key-related (e.g., network issue, bad input format, internal Fal server error)
466
+ // Do not retry with other keys for this type of error. Fail the request immediately.
467
  console.error("Error does not appear to be key-related. Failing request without further key retries.");
468
  throw error; // Re-throw the original error to be caught by the main endpoint handler
469
  }
470
  }
471
  } // End while loop
472
 
473
+ // If the loop finishes without returning/throwing earlier, it means all available keys were tried and failed with key-related errors
474
  throw new Error(`Request failed after trying ${attempts} unique Fal key(s). All failed with key-related errors or were already marked invalid.`);
475
  }
476
 
477
 
478
+ // POST /v1/chat/completions endpoint - Handles chat requests, uses key rotation/failover
479
  app.post('/v1/chat/completions', async (req, res) => {
480
+ // Extract parameters from request body
481
+ const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body; // restOpenAIParams currently ignored but captured
482
 
483
  console.log(`--> POST /v1/chat/completions | Model: ${model} | Stream: ${stream}`);
484
 
485
+ // Basic Input Validation
486
  if (!FAL_SUPPORTED_MODELS.includes(model)) {
487
+ // Log warning but allow attempt if model isn't in the known list
488
+ console.warn(`Warning: Requested model '${model}' is not in the explicitly supported list. Proxy will still attempt the request.`);
489
  }
490
  if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
491
+ console.error("Invalid request: Missing 'model' or 'messages' array is empty/invalid.");
492
+ return res.status(400).json({ error: 'Bad Request: `model` and a non-empty `messages` array are required.' });
493
  }
494
 
495
  try {
496
+ // --- Prepare Fal AI Input using the conversion function ---
497
+ // This step might throw an error if messages are invalid, caught by the outer catch block
498
  const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
499
+
500
  const falInput = {
501
+ model: model, // Pass the requested model
502
+ prompt: prompt, // The main prompt constructed from recent history
503
+ // Only include system_prompt if it has content
504
+ ...(system_prompt && system_prompt.length > 0 && { system_prompt: system_prompt }),
505
+ reasoning: !!reasoning, // Ensure boolean, pass reasoning flag if provided
506
  };
 
507
 
508
+ // console.debug("Prepared Fal Input:", JSON.stringify(falInput, null, 2)); // Verbose debug log
509
+ console.log("Attempting Fal request with key rotation/retry logic...");
510
+ console.log(`Prepared Input Lengths - System Prompt: ${system_prompt?.length || 0}, Prompt: ${prompt?.length || 0}`);
511
+
512
+ // --- Handle Stream vs Non-Stream using the retry helper function ---
513
  if (stream) {
514
+ // Set headers for Server-Sent Events (SSE)
515
  res.setHeader('Content-Type', 'text/event-stream; charset=utf-8');
516
  res.setHeader('Cache-Control', 'no-cache');
517
  res.setHeader('Connection', 'keep-alive');
518
+ res.setHeader('Access-Control-Allow-Origin', '*'); // Adjust CORS for production if needed
519
+ res.flushHeaders(); // Send headers immediately
520
 
521
+ let previousOutput = ''; // Track previous output for delta calculation
522
+ let falStream; // Variable to hold the stream iterator
523
 
524
  try {
525
+ // **Initiate the stream using the retry helper**
526
  falStream = await makeFalRequestWithRetry(falInput, true);
527
 
528
+ // Process the stream events asynchronously
529
  for await (const event of falStream) {
530
+ // Safely extract data from the event
531
  const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
532
+ // Default to partial=true if missing
533
  const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
534
+ const errorInfo = (event && event.error) ? event.error : null; // Check for errors within the stream event
535
 
536
+ // Handle errors reported *within* a stream event payload
537
  if (errorInfo) {
538
+ console.error("Error received *within* fal stream event payload:", errorInfo);
539
+ // Optionally send an error chunk to the client
540
+ const errorChunk = {
541
+ id: `chatcmpl-${Date.now()}-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model,
542
+ choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Fal Stream Event Error: ${JSON.stringify(errorInfo)}` } }]
543
+ };
544
+ // Check if stream is still writable before sending
545
  if (!res.writableEnded) {
546
  res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
547
+ } else {
548
+ console.warn("Stream already ended when trying to write stream event error.");
549
+ }
550
+ // Depending on the error, you might want to break or continue
551
+ // break; // Uncomment to stop processing on first stream error
 
 
552
  }
553
 
554
+ // Calculate the delta (new content) since the last event
555
  let deltaContent = '';
556
  if (currentOutput.startsWith(previousOutput)) {
557
+ // Normal case: current output contains previous plus new content
558
+ deltaContent = currentOutput.substring(previousOutput.length);
559
  } else if (currentOutput.length > 0) {
560
+ // Output mismatch or reset: send the entire current output as delta
561
+ console.warn("Fal stream output mismatch or reset detected. Sending full current output as delta.");
562
+ deltaContent = currentOutput;
563
+ previousOutput = ''; // Reset comparison base on mismatch
564
+ } // If currentOutput is empty, deltaContent remains empty
565
+ previousOutput = currentOutput; // Update for the next iteration
566
+
567
+ // Send OpenAI-compatible SSE chunk if there's new content or if it's the final chunk
568
  if (deltaContent || !isPartial) {
569
  const openAIChunk = {
570
+ id: `chatcmpl-${Date.now()}`, // Consider more robust ID generation if needed
571
  object: "chat.completion.chunk",
572
  created: Math.floor(Date.now() / 1000),
573
+ model: model, // Echo back the requested model
574
+ choices: [{
575
+ index: 0,
576
+ delta: { content: deltaContent }, // The new part of the content
577
+ // Set finish_reason only on the final chunk
578
+ finish_reason: isPartial === false ? "stop" : null
579
+ }]
580
+ // system_fingerprint is not provided by Fal, so omit or set to null
581
  };
582
+ // Check if stream is still writable before sending
583
  if (!res.writableEnded) {
584
  res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
585
+ } else {
586
+ console.warn("Stream already ended when trying to write data chunk.");
587
+ }
588
  }
589
+ } // End for-await loop over falStream
590
 
591
+ // Send the final [DONE] marker to indicate stream completion
592
  if (!res.writableEnded) {
593
  res.write(`data: [DONE]\n\n`);
594
+ res.end(); // Close the connection
595
+ console.log("<-- Stream finished successfully and [DONE] sent.");
596
  } else {
597
+ console.log("<-- Stream processing finished, but connection was already ended before [DONE].");
598
  }
599
 
600
  } catch (streamError) {
601
+ // Catches errors from makeFalRequestWithRetry OR the stream iteration itself (e.g., network drop)
 
602
  console.error('Error during stream request processing:', streamError.message || streamError);
603
  try {
604
+ // If headers haven't been sent, the error likely happened during initial connection (makeFalRequestWithRetry)
605
  if (!res.headersSent) {
 
606
  const errorMessage = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
607
+ // Use 502 Bad Gateway for upstream failures (like all keys failing)
608
+ res.status(502).json({
609
  error: 'Failed to initiate Fal stream',
610
+ details: errorMessage // Include the error message from the helper
611
  });
612
+ console.log("<-- Stream initiation failed response sent (502).");
613
  } else if (!res.writableEnded) {
614
+ // Stream started but failed mid-way. Try to send an error message within the stream context.
615
  const errorDetails = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
616
+ // Send an error object in the SSE stream format
617
  res.write(`data: ${JSON.stringify({ error: { message: "Stream processing error after initiation", type: "proxy_error", details: errorDetails } })}\n\n`);
618
+ res.write(`data: [DONE]\n\n`); // Still send DONE after error for robust client handling
619
  res.end();
620
+ console.log("<-- Stream error sent within stream, stream ended.");
621
  } else {
622
+ // Stream already ended, just log the error server-side.
623
+ console.log("<-- Stream error occurred, but connection was already ended.");
624
  }
625
  } catch (finalError) {
626
+ // Error trying to send the error message itself (rare)
627
  console.error('Error sending stream error message to client:', finalError);
628
+ // Ensure response is ended if possible
629
  if (!res.writableEnded) { res.end(); }
630
  }
631
  }
632
 
633
  } else {
634
+ // --- Non-Stream Request ---
635
  try {
636
+ // **Get the result using the retry helper**
637
  const result = await makeFalRequestWithRetry(falInput, false);
638
+ // console.debug("Received non-stream result via retry function:", JSON.stringify(result, null, 2)); // Verbose debug
639
 
640
+ // --- Construct OpenAI compatible response ---
641
  const openAIResponse = {
642
+ id: `chatcmpl-${result.requestId || Date.now()}`, // Use Fal's requestId if available
643
  object: "chat.completion",
644
  created: Math.floor(Date.now() / 1000),
645
+ model: model, // Echo back the requested model
646
+ choices: [{
647
+ index: 0,
648
+ message: {
649
+ role: "assistant",
650
+ content: result.output || "" // Ensure content is a string, default to empty if missing
651
+ },
652
+ finish_reason: "stop" // Assume 'stop' for successful non-stream completion
653
+ }],
654
+ usage: { // Fal doesn't provide token usage, return nulls
655
+ prompt_tokens: null,
656
+ completion_tokens: null,
657
+ total_tokens: null
658
+ },
659
+ system_fingerprint: null, // Fal doesn't provide this
660
+ // Include Fal specific reasoning if present and requested
661
  ...(result.reasoning && { fal_reasoning: result.reasoning }),
662
  };
663
 
 
665
  console.log("<-- Non-stream response sent successfully.");
666
 
667
  } catch (error) {
668
+ // Catches errors from makeFalRequestWithRetry (e.g., all keys failed or a non-key-related Fal error)
 
669
  console.error('Error during non-stream request processing:', error.message || error);
670
+ // Check if response can still be sent
671
  if (!res.headersSent) {
672
  const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
673
+ // Customize error message if it's the specific "all keys failed" error
674
  const finalMessage = errorMessage.includes("No valid Fal AI keys available") || errorMessage.includes("Request failed after trying")
675
+ ? `Fal request failed: ${errorMessage}` // More direct message
676
  : `Internal Server Error processing Fal request: ${errorMessage}`;
677
+ // Use 502 Bad Gateway to indicate upstream failure
678
+ res.status(502).json({ error: 'Fal Request Failed', details: finalMessage });
679
+ console.log("<-- Non-stream error response sent (502).");
680
  } else {
681
+ // Should be rare for non-stream, but log if headers were already sent
682
+ console.error("Headers already sent for non-stream error response? This is unexpected.");
683
+ if (!res.writableEnded) { res.end(); } // Attempt to end response if possible
684
  }
685
  }
686
  }
687
 
688
  } catch (error) {
689
+ // Catch errors occurring *before* the Fal request attempt
690
+ // (e.g., error during `convertMessagesToFalPrompt`, JSON parsing errors)
691
+ console.error('Unhandled error before initiating Fal request (likely setup or input conversion):', error.message || error);
692
  if (!res.headersSent) {
693
  const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
694
+ // Use 500 Internal Server Error for issues within the proxy itself
695
  res.status(500).json({ error: 'Internal Server Error in Proxy Setup', details: errorMessage });
696
+ console.log("<-- Proxy setup error response sent (500).");
697
  } else {
698
  console.error("Headers already sent when catching setup error. Ending response.");
699
  if (!res.writableEnded) { res.end(); }
 
701
  }
702
  });
703
 
704
+ // Start the Express server
705
  app.listen(PORT, () => {
706
  console.log(`=====================================================================`);
707
  console.log(` Fal OpenAI Proxy Server (Multi-Key Rotation & Failover)`);
708
  console.log(`---------------------------------------------------------------------`);
709
+ console.log(` Listening on port : ${PORT}`);
710
  console.log(` Reading Fal Keys from : FAL_KEY environment variable (comma-separated)`);
711
  console.log(` Loaded Keys Count : ${falKeys.length}`);
712
+ console.log(` Invalid Keys Set : Initialized (size: ${invalidKeys.size})`);
713
+ console.log(` Proxy API Key Auth : ${API_KEY ? 'Enabled (using API_KEY env var)' : 'DISABLED'}`);
714
  console.log(` Input Limits : System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
715
  console.log(` Concurrency Warning : Global Fal client reconfigured per request attempt!`);
716
  console.log(`---------------------------------------------------------------------`);
717
+ console.log(` Endpoints Available:`);
718
  console.log(` POST http://localhost:${PORT}/v1/chat/completions`);
719
  console.log(` GET http://localhost:${PORT}/v1/models`);
720
  console.log(`=====================================================================`);
721
  });
722
 
723
+ // Root path handler for basic health check / info
724
  app.get('/', (req, res) => {
725
+ res.send(`Fal OpenAI Proxy (Multi-Key Rotation/Failover from FAL_KEY) is running. Loaded ${falKeys.length} key(s). Currently ${invalidKeys.size} key(s) marked as invalid.`);
726
  });