Calmlo commited on
Commit
984e8a0
·
verified ·
1 Parent(s): 9df5dcc

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +144 -483
server.js CHANGED
@@ -1,5 +1,4 @@
1
  import express from 'express';
2
- // Import the 'fal' object directly for configuration within the retry loop
3
  import { fal } from '@fal-ai/client';
4
 
5
  // --- Key Management Setup ---
@@ -8,690 +7,372 @@ const FAL_KEY_STRING = process.env.FAL_KEY;
8
  // Read the custom API Key for proxy authentication
9
  const API_KEY = process.env.API_KEY;
10
 
 
11
  if (!FAL_KEY_STRING) {
12
  console.error("ERROR: FAL_KEY environment variable is not set.");
13
  console.error("Ensure FAL_KEY contains a comma-separated list of your Fal AI keys.");
14
- process.exit(1); // Exit if no Fal keys are provided
15
  }
16
-
17
- // Parse the comma-separated keys from FAL_KEY_STRING
18
  const falKeys = FAL_KEY_STRING.split(',')
19
- .map(key => key.trim()) // Remove leading/trailing whitespace
20
- .filter(key => key.length > 0); // Remove any empty strings resulting from extra commas
21
-
22
  if (falKeys.length === 0) {
23
  console.error("ERROR: No valid Fal keys found in the FAL_KEY environment variable after parsing.");
24
  console.error("Ensure FAL_KEY is a comma-separated list, e.g., 'key1,key2,key3'.");
25
- process.exit(1); // Exit if parsing results in zero valid keys
26
  }
27
-
28
  if (!API_KEY) {
29
  console.error("ERROR: API_KEY environment variable is not set.");
30
- process.exit(1); // Exit if the proxy auth key is missing
31
  }
 
 
32
 
33
  let currentKeyIndex = 0;
34
- // Keep track of keys that failed persistently during runtime
35
  const invalidKeys = new Set();
36
-
37
  console.log(`Loaded ${falKeys.length} Fal AI Key(s) from the FAL_KEY environment variable.`);
38
 
39
- /**
40
- * Gets the next available valid Fal AI key in a round-robin fashion.
41
- * Skips keys that have been marked as invalid.
42
- * @returns {object|null} An object containing the key and its original index { key, index }, or null if no valid keys remain.
43
- */
44
  function getNextValidKey() {
45
- // Check if all keys have been marked as invalid
46
  if (invalidKeys.size >= falKeys.length) {
47
  console.error("All Fal AI keys are marked as invalid.");
48
- return null; // No valid keys left
49
  }
50
-
51
  const initialIndex = currentKeyIndex;
52
- let attempts = 0; // Prevent infinite loops in edge cases
53
  while (attempts < falKeys.length) {
54
  const keyIndex = currentKeyIndex % falKeys.length;
55
  const key = falKeys[keyIndex];
56
-
57
- // Move to the next index for the *next* call, regardless of validity
58
  currentKeyIndex = (keyIndex + 1) % falKeys.length;
59
-
60
- // Check if the current key is NOT in the invalid set
61
  if (!invalidKeys.has(key)) {
62
- // Found a valid key
63
  console.log(`Using Fal Key index: ${keyIndex} (from FAL_KEY list)`);
64
- return { key, index: keyIndex }; // Return the key and its original index
65
  } else {
66
  console.log(`Skipping invalid Fal Key index: ${keyIndex}`);
67
  }
68
-
69
  attempts++;
70
- // Safety check: If we've looped back to the start after trying, break.
71
- // This is mostly redundant due to the invalidKeys.size check, but acts as a safeguard.
72
  if (currentKeyIndex === initialIndex && attempts > 0) {
73
  console.warn("Looped through all keys, potentially all are invalid.");
74
  break;
75
  }
76
  }
77
-
78
- // If we exit the loop, it means no valid key was found
79
  console.error("Could not find a valid Fal AI key after checking all potentially available keys.");
80
  return null;
81
  }
82
 
83
- /**
84
- * Checks if an error object likely indicates an issue with the Fal AI API key.
85
- * This is heuristic-based and may need refinement based on observed Fal errors.
86
- * @param {Error|object} error - The error object caught.
87
- * @returns {boolean} True if the error seems key-related, false otherwise.
88
- */
89
  function isKeyRelatedError(error) {
90
- if (!error) return false; // Handle null/undefined errors
91
-
92
  const message = error.message?.toLowerCase() || '';
93
- // Check common HTTP status properties
94
  const status = error.status || error.statusCode;
95
-
96
- // Check for specific HTTP status codes (401: Unauthorized, 403: Forbidden, 429: Too Many Requests/Quota)
97
  if (status === 401 || status === 403 || status === 429) {
98
  console.warn(`Detected potential key-related error (HTTP Status: ${status}).`);
99
  return true;
100
  }
101
-
102
- // Check for common error message patterns (case-insensitive)
103
  const keyErrorPatterns = [
104
  'invalid api key', 'authentication failed', 'permission denied',
105
  'quota exceeded', 'forbidden', 'unauthorized', 'rate limit',
106
- 'credentials', 'api key missing', 'invalid credential'
 
107
  ];
108
  if (keyErrorPatterns.some(pattern => message.includes(pattern))) {
109
  console.warn(`Detected potential key-related error (message contains relevant pattern: "${message}")`);
110
  return true;
111
  }
112
-
113
- // Add more specific checks based on observed Fal AI errors if needed
114
- // e.g., if (error.code === 'FAL_AUTH_FAILURE') return true;
115
-
 
 
 
 
116
  return false;
117
  }
118
  // --- End Key Management Setup ---
119
 
120
  const app = express();
121
- // Increase payload size limits if needed
122
  app.use(express.json({ limit: '50mb' }));
123
  app.use(express.urlencoded({ extended: true, limit: '50mb' }));
124
-
125
  const PORT = process.env.PORT || 3000;
126
 
127
- // API Key Authentication Middleware
128
  const apiKeyAuth = (req, res, next) => {
129
  const authHeader = req.headers['authorization'];
130
-
131
  if (!authHeader) {
132
  console.warn('Unauthorized: No Authorization header provided');
133
  return res.status(401).json({ error: 'Unauthorized: No API Key provided' });
134
  }
135
-
136
- // Expecting "Bearer YOUR_API_KEY"
137
  const authParts = authHeader.split(' ');
138
  if (authParts.length !== 2 || authParts[0].toLowerCase() !== 'bearer') {
139
  console.warn('Unauthorized: Invalid Authorization header format. Expected "Bearer <key>".');
140
  return res.status(401).json({ error: 'Unauthorized: Invalid Authorization header format' });
141
  }
142
-
143
  const providedKey = authParts[1];
144
  if (providedKey !== API_KEY) {
145
  console.warn('Unauthorized: Invalid API Key provided.');
146
  return res.status(401).json({ error: 'Unauthorized: Invalid API Key' });
147
  }
148
-
149
- // Key is valid, proceed to the next middleware or route handler
150
  next();
151
  };
152
-
153
- // Apply API Key Authentication to relevant endpoints
154
  app.use(['/v1/models', '/v1/chat/completions'], apiKeyAuth);
155
 
156
- // === Global Limits Definition ===
157
- const PROMPT_LIMIT = 4800; // Max length for the main 'prompt' field
158
- const SYSTEM_PROMPT_LIMIT = 4800; // Max length for the 'system_prompt' field
159
- // === End Limits Definition ===
160
-
161
- // Define the list of models supported by fal-ai/any-llm (Update as needed)
162
- const FAL_SUPPORTED_MODELS = [
163
- "anthropic/claude-3.7-sonnet",
164
- "anthropic/claude-3.5-sonnet",
165
- "anthropic/claude-3-5-haiku",
166
- "anthropic/claude-3-haiku",
167
- "google/gemini-pro-1.5",
168
- "google/gemini-flash-1.5",
169
- "google/gemini-flash-1.5-8b",
170
- "google/gemini-2.0-flash-001",
171
- "meta-llama/llama-3.2-1b-instruct",
172
- "meta-llama/llama-3.2-3b-instruct",
173
- "meta-llama/llama-3.1-8b-instruct",
174
- "meta-llama/llama-3.1-70b-instruct",
175
- "openai/gpt-4o-mini",
176
- "openai/gpt-4o",
177
- "deepseek/deepseek-r1",
178
- "meta-llama/llama-4-maverick",
179
- "meta-llama/llama-4-scout"
180
- // Add or remove models here
181
- ];
182
-
183
- // Helper function to extract the owner/organization from a model ID string
184
- const getOwner = (modelId) => {
185
- if (modelId && typeof modelId === 'string' && modelId.includes('/')) {
186
- return modelId.split('/')[0];
187
- }
188
- // Default owner if format is unexpected or missing
189
- return 'fal-ai';
190
- }
191
 
192
- // GET /v1/models endpoint - Returns the list of supported models
193
- app.get('/v1/models', (req, res) => {
194
- console.log("Received request for GET /v1/models");
195
- try {
196
- const modelsData = FAL_SUPPORTED_MODELS.map(modelId => ({
197
- id: modelId,
198
- object: "model",
199
- created: Math.floor(Date.now() / 1000), // Use current timestamp
200
- owned_by: getOwner(modelId)
201
- }));
202
- res.json({ object: "list", data: modelsData });
203
- console.log("Successfully returned model list.");
204
- } catch (error) {
205
- console.error("Error processing GET /v1/models:", error);
206
- res.status(500).json({ error: "Failed to retrieve model list." });
207
- }
208
- });
209
 
 
 
210
 
211
- /**
212
- * Converts OpenAI-style messages array to Fal AI's prompt and system_prompt format.
213
- * Implements System prompt top-priority, separator, and recency-based history filling.
214
- * Includes robustness checks for input validation and internal errors.
215
- * @param {Array<object>} messages - Array of message objects ({ role: string, content: string }).
216
- * @returns {object} An object containing { system_prompt: string, prompt: string }.
217
- * @throws {Error} If input is invalid or an internal processing error occurs.
218
- */
219
- function convertMessagesToFalPrompt(messages) {
220
- // console.log("Entering convertMessagesToFalPrompt with messages:", JSON.stringify(messages, null, 2)); // Debug log
221
-
222
- // --- Input Validation ---
223
- if (!Array.isArray(messages)) {
224
- console.error("Error in convertMessagesToFalPrompt: Input 'messages' is not an array.");
225
- throw new Error("Invalid input: 'messages' must be an array.");
226
- }
227
- if (messages.length === 0) {
228
- console.warn("Warning in convertMessagesToFalPrompt: Input 'messages' array is empty.");
229
- return { system_prompt: "", prompt: "" }; // Return empty if no messages
230
- }
231
- // --- End Input Validation ---
232
-
233
- try { // Wrap main logic in try...catch for internal errors
234
- let fixed_system_prompt_content = "";
235
- const conversation_message_blocks = [];
236
- // console.log(`Original messages count: ${messages.length}`);
237
-
238
- // 1. Separate System messages, format User/Assistant messages
239
- for (const message of messages) {
240
- // Validate individual message structure
241
- if (!message || typeof message !== 'object' || typeof message.role !== 'string') {
242
- console.warn(`Skipping invalid message object in convertMessagesToFalPrompt: ${JSON.stringify(message)}`);
243
- continue; // Skip malformed message
244
- }
245
-
246
- // Safely handle content (null/undefined become empty string)
247
- let content = (message.content === null || message.content === undefined) ? "" : String(message.content);
248
-
249
- switch (message.role) {
250
- case 'system':
251
- // Append all system messages together
252
- fixed_system_prompt_content += `System: ${content}\n\n`;
253
- break;
254
- case 'user':
255
- conversation_message_blocks.push(`Human: ${content}\n\n`);
256
- break;
257
- case 'assistant':
258
- conversation_message_blocks.push(`Assistant: ${content}\n\n`);
259
- break;
260
- default:
261
- // Log unsupported roles but continue processing
262
- console.warn(`Unsupported role encountered in convertMessagesToFalPrompt: ${message.role}. Skipping message.`);
263
- continue;
264
- }
265
- }
266
-
267
- // 2. Truncate combined system messages if they exceed the limit
268
- if (fixed_system_prompt_content.length > SYSTEM_PROMPT_LIMIT) {
269
- const originalLength = fixed_system_prompt_content.length;
270
- fixed_system_prompt_content = fixed_system_prompt_content.substring(0, SYSTEM_PROMPT_LIMIT);
271
- console.warn(`Combined system messages truncated from ${originalLength} to ${SYSTEM_PROMPT_LIMIT} characters.`);
272
- }
273
- // Trim trailing whitespace from the fixed system content
274
- fixed_system_prompt_content = fixed_system_prompt_content.trim();
275
-
276
- // 3. Calculate remaining space in system_prompt for history
277
- // Consider potential separator length later if needed
278
- let space_occupied_by_fixed_system = 0;
279
- if (fixed_system_prompt_content.length > 0) {
280
- // Approximate space: content + potential separator overhead (\n\n...\n\n)
281
- space_occupied_by_fixed_system = fixed_system_prompt_content.length + 4; // Heuristic for spacing
282
- }
283
- const remaining_system_limit = Math.max(0, SYSTEM_PROMPT_LIMIT - space_occupied_by_fixed_system);
284
- // console.log(`Trimmed fixed system prompt length: ${fixed_system_prompt_content.length}. Approx remaining system history limit: ${remaining_system_limit}`);
285
-
286
- // 4. Fill history backwards (recency): Prioritize 'prompt', then 'system_prompt' overflow
287
- const prompt_history_blocks = []; // For the main 'prompt' field
288
- const system_prompt_history_blocks = []; // For history overflowing into 'system_prompt'
289
- let current_prompt_length = 0;
290
- let current_system_history_length = 0;
291
- let promptFull = (PROMPT_LIMIT <= 0); // Handle zero limit case
292
- let systemHistoryFull = (remaining_system_limit <= 0);
293
-
294
- // console.log(`Processing ${conversation_message_blocks.length} user/assistant messages for recency filling.`);
295
- for (let i = conversation_message_blocks.length - 1; i >= 0; i--) {
296
- const message_block = conversation_message_blocks[i];
297
- // Ensure message_block is a string before getting length
298
- const block_length = (typeof message_block === 'string') ? message_block.length : 0;
299
-
300
- if (block_length === 0) continue; // Skip empty blocks
301
-
302
- // If both slots are full, stop processing older messages
303
- if (promptFull && systemHistoryFull) {
304
- // console.log(`Both prompt and system history slots full. Omitting older messages from index ${i}.`);
305
- break;
306
- }
307
-
308
- // Try fitting into the main 'prompt' first
309
- if (!promptFull) {
310
- if (current_prompt_length + block_length <= PROMPT_LIMIT) {
311
- prompt_history_blocks.unshift(message_block); // Add to beginning
312
- current_prompt_length += block_length;
313
- continue; // Message placed, move to next older message
314
- } else {
315
- promptFull = true; // Main prompt is now full
316
- // console.log(`Prompt limit (${PROMPT_LIMIT}) reached. Trying system history slot.`);
317
- }
318
- }
319
-
320
- // If prompt is full, try fitting into the 'system_prompt' remaining space
321
- if (!systemHistoryFull) {
322
- if (current_system_history_length + block_length <= remaining_system_limit) {
323
- system_prompt_history_blocks.unshift(message_block); // Add to beginning
324
- current_system_history_length += block_length;
325
- continue; // Message placed, move to next older message
326
- } else {
327
- systemHistoryFull = true; // System history slot is now full
328
- // console.log(`System history limit (${remaining_system_limit}) reached.`);
329
- }
330
- }
331
- }
332
-
333
- // 5. Combine the final prompt and system_prompt parts
334
- const system_prompt_history_content = system_prompt_history_blocks.join('').trim();
335
- const final_prompt = prompt_history_blocks.join('').trim(); // Main prompt content
336
-
337
- // Separator to distinguish fixed system prompt from overflow history
338
- const SEPARATOR = "\n\n------- Earlier Conversation History -------\n\n";
339
-
340
- let final_system_prompt = "";
341
-
342
- // Check if we have content for each part
343
- const hasFixedSystem = fixed_system_prompt_content.length > 0;
344
- const hasSystemHistory = system_prompt_history_content.length > 0;
345
-
346
- if (hasFixedSystem && hasSystemHistory) {
347
- // Both parts exist: Combine with separator
348
- final_system_prompt = fixed_system_prompt_content + SEPARATOR + system_prompt_history_content;
349
- // console.log("Combining fixed system prompt and history with separator.");
350
- } else if (hasFixedSystem) {
351
- // Only fixed system prompt exists
352
- final_system_prompt = fixed_system_prompt_content;
353
- // console.log("Using only fixed system prompt.");
354
- } else if (hasSystemHistory) {
355
- // Only overflow history exists (fixed system prompt was empty)
356
- final_system_prompt = system_prompt_history_content;
357
- // console.log("Using only history in system prompt slot.");
358
- }
359
- // If both are empty, final_system_prompt remains ""
360
-
361
- // 6. Return the structured result
362
- const result = {
363
- system_prompt: final_system_prompt,
364
- prompt: final_prompt
365
- };
366
-
367
- // console.log(`Final system_prompt length: ${result.system_prompt.length}`); // Debug log
368
- // console.log(`Final prompt length: ${result.prompt.length}`); // Debug log
369
- return result;
370
-
371
- } catch (internalError) {
372
- console.error("!!! CRITICAL ERROR inside convertMessagesToFalPrompt:", internalError);
373
- console.error("!!! Failing messages input was:", JSON.stringify(messages, null, 2)); // Log the problematic input
374
- // Re-throw the error to be caught by the main handler, indicating a setup failure
375
- throw new Error(`Failed to process messages internally: ${internalError.message}`);
376
- }
377
- }
378
- // === End convertMessagesToFalPrompt function ===
379
 
380
 
381
  /**
382
- * Makes a request to the Fal AI API, handling key rotation and retries on key-related errors.
 
383
  * @param {object} falInput - The input object for the Fal AI API call.
384
  * @param {boolean} [stream=false] - Whether to make a streaming request.
385
- * @returns {Promise<object|AsyncIterable<object>>} The result object or async iterator for streams.
386
- * @throws {Error} If the request fails after trying all valid keys, or if a non-key-related error occurs.
 
387
  */
388
  async function makeFalRequestWithRetry(falInput, stream = false) {
389
  let attempts = 0;
390
- // Maximum attempts equals the initial number of keys
391
  const maxAttempts = falKeys.length;
392
- // Track keys attempted *within this specific request* to avoid infinite loops on retry logic issues
393
  const attemptedKeysInThisRequest = new Set();
394
 
395
  while (attempts < maxAttempts) {
396
- const keyInfo = getNextValidKey(); // Get the next *valid* key info { key, index }
397
-
398
  if (!keyInfo) {
399
- // This should only happen if all keys are currently in the invalidKeys set
400
  console.error("makeFalRequestWithRetry: No valid Fal AI keys remaining.");
401
  throw new Error("No valid Fal AI keys available (all marked as invalid).");
402
  }
403
-
404
- // Prevent retrying the exact same key within this single request flow
405
  if (attemptedKeysInThisRequest.has(keyInfo.key)) {
406
  console.warn(`Key at index ${keyInfo.index} was already attempted for this request. Skipping to find next different key.`);
407
- // Do not increment 'attempts' here as we didn't actually use the key. Let the loop find the next.
408
  continue;
409
  }
410
  attemptedKeysInThisRequest.add(keyInfo.key);
411
- attempts++; // Count this as a distinct attempt with a unique key for this request
412
 
413
  try {
414
  console.log(`Attempt ${attempts}/${maxAttempts}: Trying Fal Key index ${keyInfo.index}...`);
415
-
416
- // *** CRITICAL: Reconfigure the global fal client with the selected key for THIS attempt ***
417
- // WARNING: This reconfigures the GLOBAL client instance. In high-concurrency scenarios,
418
- // this might lead to race conditions where one request configures the key just before another uses it.
419
- // Consider instance pooling or check if fal-ai client offers per-request credentials if this becomes an issue.
420
  console.warn(`Configuring GLOBAL fal client with key index ${keyInfo.index}. Review concurrency implications.`);
421
- fal.config({ credentials: keyInfo.key }); // Use the specific key for this attempt
422
 
423
  if (stream) {
424
- // Use the now-configured global 'fal' object for the stream request
425
  const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
426
  console.log(`Successfully initiated stream with key index ${keyInfo.index}.`);
427
- // Success! Return the stream iterator directly for the caller to handle
428
- return falStream;
429
  } else {
430
- // Use the now-configured global 'fal' object for the non-stream request
431
  console.log(`Executing non-stream request with key index ${keyInfo.index}...`);
432
- // Use fal.subscribe (or appropriate non-stream method)
433
- const result = await fal.subscribe("fal-ai/any-llm", {
434
- input: falInput,
435
- logs: true // Enable logs if helpful for debugging Fal side
436
- });
437
  console.log(`Successfully received non-stream result with key index ${keyInfo.index}.`);
438
-
439
- // Optional: Check for errors *within* a successful-looking response structure
440
  if (result && result.error) {
441
  console.error(`Fal AI returned an error object within the non-stream result payload (Key Index ${keyInfo.index}):`, result.error);
442
- // Decide if this specific payload error should also invalidate the key
443
- if (isKeyRelatedError(result.error)) { // Reuse the checker
444
  console.warn(`Marking Fal Key index ${keyInfo.index} as invalid due to error in response payload.`);
445
  invalidKeys.add(keyInfo.key);
446
- continue; // Go to the next iteration of the while loop (try next key)
447
  } else {
448
- // Throw an error that will be caught by the outer handler, not retried with other keys
449
  throw new Error(`Fal AI error reported in result payload: ${JSON.stringify(result.error)}`);
450
  }
451
  }
452
- // Success! Return the result
453
- return result;
454
  }
455
  } catch (error) {
456
- console.error(`Error caught using Fal Key index ${keyInfo.index}:`, error.message || error);
457
-
458
- // Check if the caught error indicates the key is invalid
459
  if (isKeyRelatedError(error)) {
460
- console.warn(`Marking Fal Key index ${keyInfo.index} as invalid due to caught error.`);
461
- // **ACTION: Add the failed key to the set of invalid keys**
462
  invalidKeys.add(keyInfo.key);
463
- // Continue to the next iteration of the while loop to try another key
464
  } else {
465
- // Error does not appear key-related (e.g., network issue, bad input format, internal Fal server error)
466
- // Do not retry with other keys for this type of error. Fail the request immediately.
467
- console.error("Error does not appear to be key-related. Failing request without further key retries.");
468
- throw error; // Re-throw the original error to be caught by the main endpoint handler
469
  }
470
  }
471
  } // End while loop
472
 
473
- // If the loop finishes without returning/throwing earlier, it means all available keys were tried and failed with key-related errors
474
- throw new Error(`Request failed after trying ${attempts} unique Fal key(s). All failed with key-related errors or were already marked invalid.`);
475
  }
476
 
477
 
478
- // POST /v1/chat/completions endpoint - Handles chat requests, uses key rotation/failover
479
  app.post('/v1/chat/completions', async (req, res) => {
480
- // Extract parameters from request body
481
- const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body; // restOpenAIParams currently ignored but captured
482
-
483
  console.log(`--> POST /v1/chat/completions | Model: ${model} | Stream: ${stream}`);
484
 
485
- // Basic Input Validation
486
- if (!FAL_SUPPORTED_MODELS.includes(model)) {
487
- // Log warning but allow attempt if model isn't in the known list
488
- console.warn(`Warning: Requested model '${model}' is not in the explicitly supported list. Proxy will still attempt the request.`);
489
- }
490
- if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
491
  console.error("Invalid request: Missing 'model' or 'messages' array is empty/invalid.");
492
  return res.status(400).json({ error: 'Bad Request: `model` and a non-empty `messages` array are required.' });
493
  }
494
 
 
 
 
495
  try {
496
- // --- Prepare Fal AI Input using the conversion function ---
497
- // This step might throw an error if messages are invalid, caught by the outer catch block
498
  const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
 
 
 
 
 
 
 
499
 
500
- const falInput = {
501
- model: model, // Pass the requested model
502
- prompt: prompt, // The main prompt constructed from recent history
503
- // Only include system_prompt if it has content
504
- ...(system_prompt && system_prompt.length > 0 && { system_prompt: system_prompt }),
505
- reasoning: !!reasoning, // Ensure boolean, pass reasoning flag if provided
506
- };
507
-
508
- // console.debug("Prepared Fal Input:", JSON.stringify(falInput, null, 2)); // Verbose debug log
509
  console.log("Attempting Fal request with key rotation/retry logic...");
510
  console.log(`Prepared Input Lengths - System Prompt: ${system_prompt?.length || 0}, Prompt: ${prompt?.length || 0}`);
511
 
512
- // --- Handle Stream vs Non-Stream using the retry helper function ---
513
  if (stream) {
514
- // Set headers for Server-Sent Events (SSE)
515
  res.setHeader('Content-Type', 'text/event-stream; charset=utf-8');
 
516
  res.setHeader('Cache-Control', 'no-cache');
517
  res.setHeader('Connection', 'keep-alive');
518
- res.setHeader('Access-Control-Allow-Origin', '*'); // Adjust CORS for production if needed
519
- res.flushHeaders(); // Send headers immediately
520
 
521
- let previousOutput = ''; // Track previous output for delta calculation
522
- let falStream; // Variable to hold the stream iterator
523
 
524
  try {
525
- // **Initiate the stream using the retry helper**
526
- falStream = await makeFalRequestWithRetry(falInput, true);
 
 
 
527
 
528
  // Process the stream events asynchronously
529
  for await (const event of falStream) {
530
- // Safely extract data from the event
531
  const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
532
- // Default to partial=true if missing
533
  const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
534
- const errorInfo = (event && event.error) ? event.error : null; // Check for errors within the stream event
535
 
536
- // Handle errors reported *within* a stream event payload
537
  if (errorInfo) {
 
538
  console.error("Error received *within* fal stream event payload:", errorInfo);
539
- // Optionally send an error chunk to the client
540
- const errorChunk = {
541
- id: `chatcmpl-${Date.now()}-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model,
542
- choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Fal Stream Event Error: ${JSON.stringify(errorInfo)}` } }]
543
- };
544
- // Check if stream is still writable before sending
545
- if (!res.writableEnded) {
546
- res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
547
- } else {
548
- console.warn("Stream already ended when trying to write stream event error.");
549
- }
550
- // Depending on the error, you might want to break or continue
551
- // break; // Uncomment to stop processing on first stream error
552
  }
553
-
554
- // Calculate the delta (new content) since the last event
555
  let deltaContent = '';
556
  if (currentOutput.startsWith(previousOutput)) {
557
- // Normal case: current output contains previous plus new content
558
  deltaContent = currentOutput.substring(previousOutput.length);
559
  } else if (currentOutput.length > 0) {
560
- // Output mismatch or reset: send the entire current output as delta
561
- console.warn("Fal stream output mismatch or reset detected. Sending full current output as delta.");
562
- deltaContent = currentOutput;
563
- previousOutput = ''; // Reset comparison base on mismatch
564
- } // If currentOutput is empty, deltaContent remains empty
565
- previousOutput = currentOutput; // Update for the next iteration
566
-
567
- // Send OpenAI-compatible SSE chunk if there's new content or if it's the final chunk
568
  if (deltaContent || !isPartial) {
569
- const openAIChunk = {
570
- id: `chatcmpl-${Date.now()}`, // Consider more robust ID generation if needed
571
- object: "chat.completion.chunk",
572
- created: Math.floor(Date.now() / 1000),
573
- model: model, // Echo back the requested model
574
- choices: [{
575
- index: 0,
576
- delta: { content: deltaContent }, // The new part of the content
577
- // Set finish_reason only on the final chunk
578
- finish_reason: isPartial === false ? "stop" : null
579
- }]
580
- // system_fingerprint is not provided by Fal, so omit or set to null
581
- };
582
- // Check if stream is still writable before sending
583
- if (!res.writableEnded) {
584
- res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
585
- } else {
586
- console.warn("Stream already ended when trying to write data chunk.");
587
- }
588
  }
589
- } // End for-await loop over falStream
 
590
 
591
- // Send the final [DONE] marker to indicate stream completion
592
  if (!res.writableEnded) {
593
  res.write(`data: [DONE]\n\n`);
594
- res.end(); // Close the connection
595
  console.log("<-- Stream finished successfully and [DONE] sent.");
596
  } else {
597
- console.log("<-- Stream processing finished, but connection was already ended before [DONE].");
598
  }
599
 
600
  } catch (streamError) {
601
- // Catches errors from makeFalRequestWithRetry OR the stream iteration itself (e.g., network drop)
 
 
602
  console.error('Error during stream request processing:', streamError.message || streamError);
 
 
 
 
 
 
 
 
 
 
 
 
 
603
  try {
604
- // If headers haven't been sent, the error likely happened during initial connection (makeFalRequestWithRetry)
605
  if (!res.headersSent) {
 
606
  const errorMessage = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
607
- // Use 502 Bad Gateway for upstream failures (like all keys failing)
608
- res.status(502).json({
609
- error: 'Failed to initiate Fal stream',
610
- details: errorMessage // Include the error message from the helper
611
- });
612
  console.log("<-- Stream initiation failed response sent (502).");
613
  } else if (!res.writableEnded) {
614
- // Stream started but failed mid-way. Try to send an error message within the stream context.
615
  const errorDetails = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
616
- // Send an error object in the SSE stream format
617
  res.write(`data: ${JSON.stringify({ error: { message: "Stream processing error after initiation", type: "proxy_error", details: errorDetails } })}\n\n`);
618
- res.write(`data: [DONE]\n\n`); // Still send DONE after error for robust client handling
619
  res.end();
620
  console.log("<-- Stream error sent within stream, stream ended.");
621
  } else {
622
- // Stream already ended, just log the error server-side.
623
  console.log("<-- Stream error occurred, but connection was already ended.");
624
  }
625
  } catch (finalError) {
626
- // Error trying to send the error message itself (rare)
627
  console.error('Error sending stream error message to client:', finalError);
628
- // Ensure response is ended if possible
629
  if (!res.writableEnded) { res.end(); }
630
  }
 
631
  }
632
 
633
  } else {
634
- // --- Non-Stream Request ---
635
  try {
636
- // **Get the result using the retry helper**
637
  const result = await makeFalRequestWithRetry(falInput, false);
638
- // console.debug("Received non-stream result via retry function:", JSON.stringify(result, null, 2)); // Verbose debug
639
-
640
- // --- Construct OpenAI compatible response ---
641
- const openAIResponse = {
642
- id: `chatcmpl-${result.requestId || Date.now()}`, // Use Fal's requestId if available
643
- object: "chat.completion",
644
- created: Math.floor(Date.now() / 1000),
645
- model: model, // Echo back the requested model
646
- choices: [{
647
- index: 0,
648
- message: {
649
- role: "assistant",
650
- content: result.output || "" // Ensure content is a string, default to empty if missing
651
- },
652
- finish_reason: "stop" // Assume 'stop' for successful non-stream completion
653
- }],
654
- usage: { // Fal doesn't provide token usage, return nulls
655
- prompt_tokens: null,
656
- completion_tokens: null,
657
- total_tokens: null
658
- },
659
- system_fingerprint: null, // Fal doesn't provide this
660
- // Include Fal specific reasoning if present and requested
661
- ...(result.reasoning && { fal_reasoning: result.reasoning }),
662
- };
663
 
664
  res.json(openAIResponse);
665
  console.log("<-- Non-stream response sent successfully.");
666
-
667
  } catch (error) {
668
- // Catches errors from makeFalRequestWithRetry (e.g., all keys failed or a non-key-related Fal error)
669
- console.error('Error during non-stream request processing:', error.message || error);
670
- // Check if response can still be sent
671
  if (!res.headersSent) {
672
  const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
673
- // Customize error message if it's the specific "all keys failed" error
674
  const finalMessage = errorMessage.includes("No valid Fal AI keys available") || errorMessage.includes("Request failed after trying")
675
- ? `Fal request failed: ${errorMessage}` // More direct message
676
  : `Internal Server Error processing Fal request: ${errorMessage}`;
677
- // Use 502 Bad Gateway to indicate upstream failure
678
  res.status(502).json({ error: 'Fal Request Failed', details: finalMessage });
679
  console.log("<-- Non-stream error response sent (502).");
680
  } else {
681
- // Should be rare for non-stream, but log if headers were already sent
682
  console.error("Headers already sent for non-stream error response? This is unexpected.");
683
- if (!res.writableEnded) { res.end(); } // Attempt to end response if possible
684
  }
685
  }
686
  }
687
 
688
  } catch (error) {
689
- // Catch errors occurring *before* the Fal request attempt
690
- // (e.g., error during `convertMessagesToFalPrompt`, JSON parsing errors)
691
  console.error('Unhandled error before initiating Fal request (likely setup or input conversion):', error.message || error);
692
  if (!res.headersSent) {
693
  const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
694
- // Use 500 Internal Server Error for issues within the proxy itself
695
  res.status(500).json({ error: 'Internal Server Error in Proxy Setup', details: errorMessage });
696
  console.log("<-- Proxy setup error response sent (500).");
697
  } else {
@@ -701,26 +382,6 @@ app.post('/v1/chat/completions', async (req, res) => {
701
  }
702
  });
703
 
704
- // Start the Express server
705
- app.listen(PORT, () => {
706
- console.log(`=====================================================================`);
707
- console.log(` Fal OpenAI Proxy Server (Multi-Key Rotation & Failover)`);
708
- console.log(`---------------------------------------------------------------------`);
709
- console.log(` Listening on port : ${PORT}`);
710
- console.log(` Reading Fal Keys from : FAL_KEY environment variable (comma-separated)`);
711
- console.log(` Loaded Keys Count : ${falKeys.length}`);
712
- console.log(` Invalid Keys Set : Initialized (size: ${invalidKeys.size})`);
713
- console.log(` Proxy API Key Auth : ${API_KEY ? 'Enabled (using API_KEY env var)' : 'DISABLED'}`);
714
- console.log(` Input Limits : System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
715
- console.log(` Concurrency Warning : Global Fal client reconfigured per request attempt!`);
716
- console.log(`---------------------------------------------------------------------`);
717
- console.log(` Endpoints Available:`);
718
- console.log(` POST http://localhost:${PORT}/v1/chat/completions`);
719
- console.log(` GET http://localhost:${PORT}/v1/models`);
720
- console.log(`=====================================================================`);
721
- });
722
-
723
- // Root path handler for basic health check / info
724
- app.get('/', (req, res) => {
725
- res.send(`Fal OpenAI Proxy (Multi-Key Rotation/Failover from FAL_KEY) is running. Loaded ${falKeys.length} key(s). Currently ${invalidKeys.size} key(s) marked as invalid.`);
726
- });
 
1
  import express from 'express';
 
2
  import { fal } from '@fal-ai/client';
3
 
4
  // --- Key Management Setup ---
 
7
  // Read the custom API Key for proxy authentication
8
  const API_KEY = process.env.API_KEY;
9
 
10
+ // --- (Initial checks for FAL_KEY_STRING, API_KEY, parsing falKeys remain the same) ---
11
  if (!FAL_KEY_STRING) {
12
  console.error("ERROR: FAL_KEY environment variable is not set.");
13
  console.error("Ensure FAL_KEY contains a comma-separated list of your Fal AI keys.");
14
+ process.exit(1);
15
  }
 
 
16
  const falKeys = FAL_KEY_STRING.split(',')
17
+ .map(key => key.trim())
18
+ .filter(key => key.length > 0);
 
19
  if (falKeys.length === 0) {
20
  console.error("ERROR: No valid Fal keys found in the FAL_KEY environment variable after parsing.");
21
  console.error("Ensure FAL_KEY is a comma-separated list, e.g., 'key1,key2,key3'.");
22
+ process.exit(1);
23
  }
 
24
  if (!API_KEY) {
25
  console.error("ERROR: API_KEY environment variable is not set.");
26
+ process.exit(1);
27
  }
28
+ // --- (End initial checks) ---
29
+
30
 
31
  let currentKeyIndex = 0;
 
32
  const invalidKeys = new Set();
 
33
  console.log(`Loaded ${falKeys.length} Fal AI Key(s) from the FAL_KEY environment variable.`);
34
 
35
+ // --- (getNextValidKey function remains the same) ---
 
 
 
 
36
  function getNextValidKey() {
 
37
  if (invalidKeys.size >= falKeys.length) {
38
  console.error("All Fal AI keys are marked as invalid.");
39
+ return null;
40
  }
 
41
  const initialIndex = currentKeyIndex;
42
+ let attempts = 0;
43
  while (attempts < falKeys.length) {
44
  const keyIndex = currentKeyIndex % falKeys.length;
45
  const key = falKeys[keyIndex];
 
 
46
  currentKeyIndex = (keyIndex + 1) % falKeys.length;
 
 
47
  if (!invalidKeys.has(key)) {
 
48
  console.log(`Using Fal Key index: ${keyIndex} (from FAL_KEY list)`);
49
+ return { key, index: keyIndex };
50
  } else {
51
  console.log(`Skipping invalid Fal Key index: ${keyIndex}`);
52
  }
 
53
  attempts++;
 
 
54
  if (currentKeyIndex === initialIndex && attempts > 0) {
55
  console.warn("Looped through all keys, potentially all are invalid.");
56
  break;
57
  }
58
  }
 
 
59
  console.error("Could not find a valid Fal AI key after checking all potentially available keys.");
60
  return null;
61
  }
62
 
63
+ // --- (isKeyRelatedError function remains the same) ---
 
 
 
 
 
64
  function isKeyRelatedError(error) {
65
+ if (!error) return false;
 
66
  const message = error.message?.toLowerCase() || '';
 
67
  const status = error.status || error.statusCode;
 
 
68
  if (status === 401 || status === 403 || status === 429) {
69
  console.warn(`Detected potential key-related error (HTTP Status: ${status}).`);
70
  return true;
71
  }
 
 
72
  const keyErrorPatterns = [
73
  'invalid api key', 'authentication failed', 'permission denied',
74
  'quota exceeded', 'forbidden', 'unauthorized', 'rate limit',
75
+ 'credentials', 'api key missing', 'invalid credential',
76
+ 'exhausted balance', 'user is locked' // Add specific messages if observed
77
  ];
78
  if (keyErrorPatterns.some(pattern => message.includes(pattern))) {
79
  console.warn(`Detected potential key-related error (message contains relevant pattern: "${message}")`);
80
  return true;
81
  }
82
+ // Also check the body.detail if status is 403, as seen in the logs
83
+ if (status === 403 && error.body?.detail) {
84
+ const detailMessage = error.body.detail.toLowerCase();
85
+ if (keyErrorPatterns.some(pattern => detailMessage.includes(pattern))) {
86
+ console.warn(`Detected potential key-related error (body.detail contains relevant pattern: "${detailMessage}")`);
87
+ return true;
88
+ }
89
+ }
90
  return false;
91
  }
92
  // --- End Key Management Setup ---
93
 
94
  const app = express();
 
95
  app.use(express.json({ limit: '50mb' }));
96
  app.use(express.urlencoded({ extended: true, limit: '50mb' }));
 
97
  const PORT = process.env.PORT || 3000;
98
 
99
+ // --- (apiKeyAuth middleware remains the same) ---
100
  const apiKeyAuth = (req, res, next) => {
101
  const authHeader = req.headers['authorization'];
 
102
  if (!authHeader) {
103
  console.warn('Unauthorized: No Authorization header provided');
104
  return res.status(401).json({ error: 'Unauthorized: No API Key provided' });
105
  }
 
 
106
  const authParts = authHeader.split(' ');
107
  if (authParts.length !== 2 || authParts[0].toLowerCase() !== 'bearer') {
108
  console.warn('Unauthorized: Invalid Authorization header format. Expected "Bearer <key>".');
109
  return res.status(401).json({ error: 'Unauthorized: Invalid Authorization header format' });
110
  }
 
111
  const providedKey = authParts[1];
112
  if (providedKey !== API_KEY) {
113
  console.warn('Unauthorized: Invalid API Key provided.');
114
  return res.status(401).json({ error: 'Unauthorized: Invalid API Key' });
115
  }
 
 
116
  next();
117
  };
 
 
118
  app.use(['/v1/models', '/v1/chat/completions'], apiKeyAuth);
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ // --- (Global Limits, FAL_SUPPORTED_MODELS, getOwner remain the same) ---
122
+ const PROMPT_LIMIT = 4800;
123
+ const SYSTEM_PROMPT_LIMIT = 4800;
124
+ const FAL_SUPPORTED_MODELS = [ /* ... model list ... */ ];
125
+ const getOwner = (modelId) => { /* ... */ };
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ // --- (GET /v1/models endpoint remains the same) ---
128
+ app.get('/v1/models', (req, res) => { /* ... */ });
129
 
130
+ // --- (convertMessagesToFalPrompt function remains the same) ---
131
+ function convertMessagesToFalPrompt(messages) { /* ... */ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
 
134
  /**
135
+ * MODIFIED: Makes a request to the Fal AI API, handling key rotation and retries on key-related errors.
136
+ * For stream requests, returns the stream AND the key info used.
137
  * @param {object} falInput - The input object for the Fal AI API call.
138
  * @param {boolean} [stream=false] - Whether to make a streaming request.
139
+ * @returns {Promise<object|{stream: AsyncIterable<object>, keyUsed: string, indexUsed: number}>}
140
+ * The result object for non-stream, or an object containing the stream and key info for stream.
141
+ * @throws {Error} If the request fails after trying all valid keys, or if a non-key-related error occurs during *initiation*.
142
  */
143
  async function makeFalRequestWithRetry(falInput, stream = false) {
144
  let attempts = 0;
 
145
  const maxAttempts = falKeys.length;
 
146
  const attemptedKeysInThisRequest = new Set();
147
 
148
  while (attempts < maxAttempts) {
149
+ const keyInfo = getNextValidKey();
 
150
  if (!keyInfo) {
 
151
  console.error("makeFalRequestWithRetry: No valid Fal AI keys remaining.");
152
  throw new Error("No valid Fal AI keys available (all marked as invalid).");
153
  }
 
 
154
  if (attemptedKeysInThisRequest.has(keyInfo.key)) {
155
  console.warn(`Key at index ${keyInfo.index} was already attempted for this request. Skipping to find next different key.`);
 
156
  continue;
157
  }
158
  attemptedKeysInThisRequest.add(keyInfo.key);
159
+ attempts++;
160
 
161
  try {
162
  console.log(`Attempt ${attempts}/${maxAttempts}: Trying Fal Key index ${keyInfo.index}...`);
 
 
 
 
 
163
  console.warn(`Configuring GLOBAL fal client with key index ${keyInfo.index}. Review concurrency implications.`);
164
+ fal.config({ credentials: keyInfo.key });
165
 
166
  if (stream) {
 
167
  const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
168
  console.log(`Successfully initiated stream with key index ${keyInfo.index}.`);
169
+ // **MODIFIED: Return stream AND key info**
170
+ return { stream: falStream, keyUsed: keyInfo.key, indexUsed: keyInfo.index };
171
  } else {
172
+ // Non-stream logic remains the same
173
  console.log(`Executing non-stream request with key index ${keyInfo.index}...`);
174
+ const result = await fal.subscribe("fal-ai/any-llm", { input: falInput, logs: true });
 
 
 
 
175
  console.log(`Successfully received non-stream result with key index ${keyInfo.index}.`);
 
 
176
  if (result && result.error) {
177
  console.error(`Fal AI returned an error object within the non-stream result payload (Key Index ${keyInfo.index}):`, result.error);
178
+ if (isKeyRelatedError(result.error)) {
 
179
  console.warn(`Marking Fal Key index ${keyInfo.index} as invalid due to error in response payload.`);
180
  invalidKeys.add(keyInfo.key);
181
+ continue; // Try next key
182
  } else {
 
183
  throw new Error(`Fal AI error reported in result payload: ${JSON.stringify(result.error)}`);
184
  }
185
  }
186
+ return result; // Return only result for non-stream
 
187
  }
188
  } catch (error) {
189
+ // This catch block now primarily handles errors during *request initiation*
190
+ console.error(`Error caught during request initiation using Fal Key index ${keyInfo.index}:`, error.message || error);
 
191
  if (isKeyRelatedError(error)) {
192
+ console.warn(`Marking Fal Key index ${keyInfo.index} as invalid due to caught initiation error.`);
 
193
  invalidKeys.add(keyInfo.key);
194
+ // Continue loop to try the next key
195
  } else {
196
+ console.error("Initiation error does not appear to be key-related. Failing request without further key retries.");
197
+ throw error; // Re-throw non-key-related initiation error
 
 
198
  }
199
  }
200
  } // End while loop
201
 
202
+ // If loop finishes, all keys failed during initiation
203
+ throw new Error(`Request initiation failed after trying ${attempts} unique Fal key(s). All failed with key-related errors or were already marked invalid.`);
204
  }
205
 
206
 
207
+ // POST /v1/chat/completions endpoint - Handles chat requests
208
  app.post('/v1/chat/completions', async (req, res) => {
209
+ const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body;
 
 
210
  console.log(`--> POST /v1/chat/completions | Model: ${model} | Stream: ${stream}`);
211
 
212
+ // --- (Input validation for model, messages remains the same) ---
213
+ if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
 
 
 
 
214
  console.error("Invalid request: Missing 'model' or 'messages' array is empty/invalid.");
215
  return res.status(400).json({ error: 'Bad Request: `model` and a non-empty `messages` array are required.' });
216
  }
217
 
218
+ let keyUsedForRequest = null; // Variable to store the key used for this request, if successful initiation
219
+ let indexUsedForRequest = null;
220
+
221
  try {
 
 
222
  const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
223
+ const falInput = { /* ... falInput setup ... */ };
224
+ falInput.model = model;
225
+ falInput.prompt = prompt;
226
+ if (system_prompt && system_prompt.length > 0) {
227
+ falInput.system_prompt = system_prompt;
228
+ }
229
+ falInput.reasoning = !!reasoning;
230
 
 
 
 
 
 
 
 
 
 
231
  console.log("Attempting Fal request with key rotation/retry logic...");
232
  console.log(`Prepared Input Lengths - System Prompt: ${system_prompt?.length || 0}, Prompt: ${prompt?.length || 0}`);
233
 
 
234
  if (stream) {
 
235
  res.setHeader('Content-Type', 'text/event-stream; charset=utf-8');
236
+ /* ... other headers ... */
237
  res.setHeader('Cache-Control', 'no-cache');
238
  res.setHeader('Connection', 'keep-alive');
239
+ res.setHeader('Access-Control-Allow-Origin', '*');
240
+ res.flushHeaders();
241
 
242
+ let previousOutput = '';
243
+ let streamResult; // To hold the object { stream, keyUsed, indexUsed }
244
 
245
  try {
246
+ // **MODIFIED: Get stream and key info**
247
+ streamResult = await makeFalRequestWithRetry(falInput, true);
248
+ const falStream = streamResult.stream;
249
+ keyUsedForRequest = streamResult.keyUsed; // Store the key used for this stream
250
+ indexUsedForRequest = streamResult.indexUsed;
251
 
252
  // Process the stream events asynchronously
253
  for await (const event of falStream) {
254
+ // --- (Stream event processing logic remains the same) ---
255
  const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
 
256
  const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
257
+ const errorInfo = (event && event.error) ? event.error : null;
258
 
 
259
  if (errorInfo) {
260
+ // Log error from within the stream, but might continue processing
261
  console.error("Error received *within* fal stream event payload:", errorInfo);
262
+ const errorChunk = { /* ... error chunk details ... */ };
263
+ if (!res.writableEnded) { res.write(`data: ${JSON.stringify(errorChunk)}\n\n`); }
264
+ else { console.warn("Stream ended before writing event error."); }
265
+ // Decide whether to break or continue based on error severity if needed
 
 
 
 
 
 
 
 
 
266
  }
 
 
267
  let deltaContent = '';
268
  if (currentOutput.startsWith(previousOutput)) {
 
269
  deltaContent = currentOutput.substring(previousOutput.length);
270
  } else if (currentOutput.length > 0) {
271
+ console.warn("Fal stream output mismatch/reset. Sending full current output as delta.");
272
+ deltaContent = currentOutput;
273
+ previousOutput = '';
274
+ }
275
+ previousOutput = currentOutput;
 
 
 
276
  if (deltaContent || !isPartial) {
277
+ const openAIChunk = { /* ... chunk details ... */ };
278
+ openAIChunk.id = `chatcmpl-${Date.now()}`;
279
+ openAIChunk.object = "chat.completion.chunk";
280
+ openAIChunk.created = Math.floor(Date.now() / 1000);
281
+ openAIChunk.model = model;
282
+ openAIChunk.choices = [{ index: 0, delta: { content: deltaContent }, finish_reason: isPartial === false ? "stop" : null }];
283
+ if (!res.writableEnded) { res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`); }
284
+ else { console.warn("Stream ended before writing data chunk."); }
 
 
 
 
 
 
 
 
 
 
 
285
  }
286
+ // --- (End stream event processing) ---
287
+ } // End for-await loop
288
 
289
+ // Send [DONE] marker
290
  if (!res.writableEnded) {
291
  res.write(`data: [DONE]\n\n`);
292
+ res.end();
293
  console.log("<-- Stream finished successfully and [DONE] sent.");
294
  } else {
295
+ console.log("<-- Stream processing finished, but connection was already ended before [DONE].");
296
  }
297
 
298
  } catch (streamError) {
299
+ // **MODIFIED CATCH BLOCK for stream processing errors**
300
+ // This catches errors from makeFalRequestWithRetry (initiation failure)
301
+ // OR errors thrown during the 'for await...of falStream' loop.
302
  console.error('Error during stream request processing:', streamError.message || streamError);
303
+
304
+ // **NEW: Check if the error is key-related and invalidate the key if needed**
305
+ // We only do this if keyUsedForRequest has been set (meaning initiation succeeded)
306
+ // And if the error occurred *during* the stream processing, not during initiation
307
+ // (initiation errors are handled inside makeFalRequestWithRetry)
308
+ // The check `keyUsedForRequest !== null` helps distinguish.
309
+ if (keyUsedForRequest && isKeyRelatedError(streamError)) {
310
+ console.warn(`Marking Fal Key index ${indexUsedForRequest} as invalid due to error during stream processing.`);
311
+ invalidKeys.add(keyUsedForRequest);
312
+ }
313
+ // else: The error was either not key-related, or occurred during initiation (already handled)
314
+
315
+ // --- (Error reporting logic to client remains the same) ---
316
  try {
 
317
  if (!res.headersSent) {
318
+ // Error likely during initiation (caught from makeFalRequestWithRetry)
319
  const errorMessage = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
320
+ res.status(502).json({ error: 'Failed to initiate Fal stream', details: errorMessage });
 
 
 
 
321
  console.log("<-- Stream initiation failed response sent (502).");
322
  } else if (!res.writableEnded) {
323
+ // Error during stream processing after headers sent
324
  const errorDetails = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
 
325
  res.write(`data: ${JSON.stringify({ error: { message: "Stream processing error after initiation", type: "proxy_error", details: errorDetails } })}\n\n`);
326
+ res.write(`data: [DONE]\n\n`);
327
  res.end();
328
  console.log("<-- Stream error sent within stream, stream ended.");
329
  } else {
 
330
  console.log("<-- Stream error occurred, but connection was already ended.");
331
  }
332
  } catch (finalError) {
 
333
  console.error('Error sending stream error message to client:', finalError);
 
334
  if (!res.writableEnded) { res.end(); }
335
  }
336
+ // --- (End error reporting) ---
337
  }
338
 
339
  } else {
340
+ // --- Non-Stream Logic (remains the same, uses makeFalRequestWithRetry directly) ---
341
  try {
 
342
  const result = await makeFalRequestWithRetry(falInput, false);
343
+ const openAIResponse = { /* ... construct response ... */ };
344
+ openAIResponse.id = `chatcmpl-${result.requestId || Date.now()}`;
345
+ openAIResponse.object = "chat.completion";
346
+ openAIResponse.created = Math.floor(Date.now() / 1000);
347
+ openAIResponse.model = model;
348
+ openAIResponse.choices = [{ index: 0, message: { role: "assistant", content: result.output || "" }, finish_reason: "stop" }];
349
+ openAIResponse.usage = { prompt_tokens: null, completion_tokens: null, total_tokens: null };
350
+ openAIResponse.system_fingerprint = null;
351
+ if (result.reasoning) { openAIResponse.fal_reasoning = result.reasoning; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
  res.json(openAIResponse);
354
  console.log("<-- Non-stream response sent successfully.");
 
355
  } catch (error) {
356
+ console.error('Error during non-stream request processing:', error.message || error);
 
 
357
  if (!res.headersSent) {
358
  const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
 
359
  const finalMessage = errorMessage.includes("No valid Fal AI keys available") || errorMessage.includes("Request failed after trying")
360
+ ? `Fal request failed: ${errorMessage}`
361
  : `Internal Server Error processing Fal request: ${errorMessage}`;
 
362
  res.status(502).json({ error: 'Fal Request Failed', details: finalMessage });
363
  console.log("<-- Non-stream error response sent (502).");
364
  } else {
 
365
  console.error("Headers already sent for non-stream error response? This is unexpected.");
366
+ if (!res.writableEnded) { res.end(); }
367
  }
368
  }
369
  }
370
 
371
  } catch (error) {
372
+ // --- (Outer catch block for setup errors remains the same) ---
 
373
  console.error('Unhandled error before initiating Fal request (likely setup or input conversion):', error.message || error);
374
  if (!res.headersSent) {
375
  const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
 
376
  res.status(500).json({ error: 'Internal Server Error in Proxy Setup', details: errorMessage });
377
  console.log("<-- Proxy setup error response sent (500).");
378
  } else {
 
382
  }
383
  });
384
 
385
+ // --- (Server listen and root path handler remain the same) ---
386
+ app.listen(PORT, () => { /* ... startup messages ... */ });
387
+ app.get('/', (req, res) => { /* ... root message ... */ });