Spaces:

Calmlo
/

fal-openai-proxy

Running

App Files Files Community

Calmlo commited on Apr 12

Commit

8f3811c

verified ·

1 Parent(s): 46da92e

Update server.js

Browse files

Files changed (1) hide show

server.js +347 -244

server.js CHANGED Viewed

@@ -1,46 +1,132 @@
 import express from 'express';
 import { fal } from '@fal-ai/client';
-// --- Key Management ---
-const FAL_KEY_STRING = process.env.FAL_KEY;
-const API_KEY = process.env.API_KEY; // 自定义 API Key 环境变量保持不变
-if (!FAL_KEY_STRING) {
-    console.error("Error: FAL_KEY environment variable is not set.");
     process.exit(1);
 }
-// 解析 FAL_KEY 字符串为数组，去除空白并过滤空值
-const falKeys = FAL_KEY_STRING.split(',')
-                             .map(key => key.trim())
-                             .filter(key => key.length > 0);
 if (falKeys.length === 0) {
-    console.error("Error: FAL_KEY environment variable is set, but no valid keys were found after parsing.");
     process.exit(1);
 }
-console.log(`Loaded ${falKeys.length} Fal AI Keys initially.`);
-// 不再需要 currentFalKeyIndex
-// let currentFalKeyIndex = 0;
-// --- End Key Management ---
-if (!API_KEY) {
-    console.error("Error: API_KEY environment variable is not set.");
-    process.exit(1);
 }
 const app = express();
 app.use(express.json({ limit: '50mb' }));
 app.use(express.urlencoded({ extended: true, limit: '50mb' }));
 const PORT = process.env.PORT || 3000;
-// API Key 鉴权中间件 (保持不变)
 const apiKeyAuth = (req, res, next) => {
     const authHeader = req.headers['authorization'];
     if (!authHeader) {
@@ -63,16 +149,15 @@ const apiKeyAuth = (req, res, next) => {
     next();
 };
-// 应用 API Key 鉴权中间件到所有 API 路由 (保持不变)
 app.use(['/v1/models', '/v1/chat/completions'], apiKeyAuth);
-// === 全局定义限制 === (保持不变)
 const PROMPT_LIMIT = 4800;
 const SYSTEM_PROMPT_LIMIT = 4800;
 // === 限制定义结束 ===
-// 定义 fal-ai/any-llm 支持的模型列表 (保持不变)
-const FAL_SUPPORTED_MODELS = [
     "anthropic/claude-3.7-sonnet",
     "anthropic/claude-3.5-sonnet",
     "anthropic/claude-3-5-haiku",
@@ -92,16 +177,16 @@ const FAL_SUPPORTED_MODELS = [
     "meta-llama/llama-4-scout"
 ];
-// Helper function to get owner from model ID (保持不变)
-const getOwner = (modelId) => {
     if (modelId && modelId.includes('/')) {
         return modelId.split('/')[0];
     }
     return 'fal-ai';
-}
-// GET /v1/models endpoint (保持不变)
-app.get('/v1/models', (req, res) => {
     console.log("Received request for GET /v1/models");
     try {
         const modelsData = FAL_SUPPORTED_MODELS.map(modelId => ({
@@ -115,11 +200,11 @@ app.get('/v1/models', (req, res) => {
     }
 });
-// === convertMessagesToFalPrompt 函数 (保持不变) ===
-function convertMessagesToFalPrompt(messages) {
     let fixed_system_prompt_content = "";
     const conversation_message_blocks = [];
-    // console.log(`Original messages count: ${messages.length}`); // Reduced logging verbosity
     // 1. 分离 System 消息，格式化 User/Assistant 消息
     for (const message of messages) {
@@ -204,8 +289,12 @@ function convertMessagesToFalPrompt(messages) {
     // 5. *** 组合最终的 prompt 和 system_prompt (包含分隔符逻辑) ***
     const system_prompt_history_content = system_prompt_history_blocks.join('').trim();
     const final_prompt = prompt_history_blocks.join('').trim();
     const SEPARATOR = "\n\n-------下面是比较早之前的对话内容-----\n\n";
     let final_system_prompt = "";
     const hasFixedSystem = fixed_system_prompt_content.length > 0;
     const hasSystemHistory = system_prompt_history_content.length > 0;
@@ -220,259 +309,273 @@ function convertMessagesToFalPrompt(messages) {
         // console.log("Using only history in system prompt slot.");
     }
     const result = {
         system_prompt: final_system_prompt,
         prompt: final_prompt
     };
-    // console.log(`Final system_prompt length (Sys+Separator+Hist): ${result.system_prompt.length}`);
-    // console.log(`Final prompt length (Hist): ${result.prompt.length}`);
     return result;
 }
 // === convertMessagesToFalPrompt 函数结束 ===
-// POST /v1/chat/completions endpoint (随机 Key + 失败排除)
 app.post('/v1/chat/completions', async (req, res) => {
     const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body;
-    const requestId = `req-${Date.now()}`;
-    console.log(`[${requestId}] Received chat completion request for model: ${model}, stream: ${stream}. Strategy: Random key with exclusion.`);
     if (!FAL_SUPPORTED_MODELS.includes(model)) {
-        console.warn(`[${requestId}] Warning: Requested model '${model}' is not in the explicitly supported list.`);
     }
     if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
-        console.error(`[${requestId}] Invalid request parameters:`, { model, messages: Array.isArray(messages) ? messages.length : typeof messages });
         return res.status(400).json({ error: 'Missing or invalid parameters: model and messages array are required.' });
     }
-    let lastError = null;
-    let success = false;
-    let attempt = 0;
-    const maxAttempts = falKeys.length; // Safety limit
-    // *** 為此請求創建可用的 Key 列表副本 ***
-    let availableKeysForRequest = [...falKeys];
-    // 准备 Fal Input (只需要准备一次)
-    const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
-    const falInput = {
-        model: model,
-        prompt: prompt,
-        ...(system_prompt && { system_prompt: system_prompt }),
-        reasoning: !!reasoning,
-    };
-    console.log(`[${requestId}] Fal Input prepared. System Prompt Length: ${system_prompt?.length || 0}, Prompt Length: ${prompt?.length || 0}`);
-    // *** 重试循环：只要还有可用的 Key 且未达最大尝试次数 ***
-    while (availableKeysForRequest.length > 0 && attempt < maxAttempts) {
-        attempt++;
-        // *** 隨機選擇一個 Key ***
-        const randomIndex = Math.floor(Math.random() * availableKeysForRequest.length);
-        const selectedFalKey = availableKeysForRequest[randomIndex];
-        // Mask key in logs for security
-        const maskedKey = selectedFalKey.length > 8 ? `${selectedFalKey.substring(0, 4)}...${selectedFalKey.substring(selectedFalKey.length - 4)}` : selectedFalKey;
-        console.log(`[${requestId}] Attempt ${attempt}/${maxAttempts}: Trying random key (masked: ${maskedKey}). ${availableKeysForRequest.length} keys available.`);
-        try {
-            fal.config({ credentials: selectedFalKey });
-            if (stream) {
-                // --- 流式处理 ---
-                res.setHeader('Content-Type', 'text/event-stream; charset=utf-8');
-                res.setHeader('Cache-Control', 'no-cache');
-                res.setHeader('Connection', 'keep-alive');
-                res.setHeader('Access-Control-Allow-Origin', '*');
-                let previousOutput = '';
-                let firstEventProcessed = false;
-                let streamFailedMidway = false;
-                let keyConfirmedWorking = false;
-                const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
-                try {
-                    for await (const event of falStream) {
-                        const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
-                        const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
-                        const errorInfo = (event && event.error) ? event.error : null;
-                        const eventStatus = errorInfo?.status;
-                        if (errorInfo) {
-                            console.warn(`[${requestId}] Error in stream event (Key: ${maskedKey}):`, errorInfo);
-                            lastError = errorInfo;
-                            if (!firstEventProcessed && (eventStatus === 401 || eventStatus === 403 || eventStatus === 429)) {
-                                console.warn(`[${requestId}] Key ${maskedKey} failed (${eventStatus}) on first event. Excluding it for this request.`);
-                                availableKeysForRequest.splice(randomIndex, 1); // 从可用列表移除
-                                console.log(`[${requestId}] ${availableKeysForRequest.length} keys remaining for this request.`);
-                                break; // 中断内部循环，外部循环将尝试下一个随机 key
-                            } else {
-                                console.error(`[${requestId}] Unrecoverable stream error or error after stream start (Key: ${maskedKey}).`);
-                                streamFailedMidway = true;
-                                if (!res.headersSent) {
-                                     res.status(500).json({ object: "error", message: `Fal Stream Error: ${JSON.stringify(errorInfo)}`, type:"fal_stream_error"});
-                                } else if (!res.writableEnded) {
-                                    const errorChunk = { id: `chatcmpl-${Date.now()}-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Fal Stream Error: ${JSON.stringify(errorInfo)}` } }] };
-                                    res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
-                                }
-                                break; // 中断内部循环
-                            }
-                        }
-                        if (!keyConfirmedWorking && !errorInfo) {
-                             success = true;
-                             keyConfirmedWorking = true;
-                             console.log(`[${requestId}] Key ${maskedKey} confirmed working (stream).`);
-                             if (!res.headersSent) {
-                                 res.flushHeaders();
-                                 console.log(`[${requestId}] Stream headers flushed.`);
-                             }
-                             firstEventProcessed = true;
-                        }
-                        if (!errorInfo) {
-                            let deltaContent = '';
-                            if (currentOutput.startsWith(previousOutput)) {
-                                deltaContent = currentOutput.substring(previousOutput.length);
-                            } else if (currentOutput.length > 0) {
-                                deltaContent = currentOutput; previousOutput = '';
-                            }
-                            previousOutput = currentOutput;
-                            if (deltaContent || !isPartial) {
-                                const openAIChunk = { id: `chatcmpl-${Date.now()}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: { content: deltaContent }, finish_reason: isPartial === false ? "stop" : null }] };
-                                 if (!res.writableEnded) {
-                                    res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
-                                 }
-                            }
-                        }
-                    } // End `for await...of` loop
-                    if (streamFailedMidway) {
-                         if (!res.writableEnded) {
-                              res.write(`data: [DONE]\n\n`); res.end();
-                         }
-                         break; // 中断外部循环，因为流在中途失败
-                    } else if (keyConfirmedWorking) {
-                         if (!res.writableEnded) {
-                            res.write(`data: [DONE]\n\n`); res.end();
-                         }
-                         break; // 中断外部循环，因为成功
                     }
-                    // 如果内部循环因为首个事件是 key 错误而中断，外部循环会继续
-                } catch (streamProcessingError) {
-                    console.error(`[${requestId}] Error during fal stream processing loop logic (Key: ${maskedKey}):`, streamProcessingError);
-                    lastError = streamProcessingError;
-                    if (!res.headersSent) {
-                        res.status(500).json({ object: "error", message: `Proxy Stream Processing Error: ${streamProcessingError.message}`, type:"proxy_internal_error"});
-                    } else if (!res.writableEnded) {
-                        try {
-                             res.write(`data: ${JSON.stringify({ error: { message: "Proxy Stream processing error", type: "proxy_internal_error", details: streamProcessingError.message } })}\n\n`);
-                             res.write(`data: [DONE]\n\n`); res.end();
-                        } catch (finalError) { if (!res.writableEnded) { res.end(); } }
                     }
-                    break; // 中断外部循环
-                }
-                 // 如果流成功或中途失败，外部循环会 break；如果因首个 key 错误而中断内部循环，则外部循环继续
-                 if (success || streamFailedMidway) {
-                    break;
                  }
-            } else {
-                // --- 非流式处理 ---
-                console.log(`[${requestId}] Executing non-stream request (Key: ${maskedKey})...`);
-                const result = await fal.subscribe("fal-ai/any-llm", { input: falInput, logs: true });
-                if (result && result.error) {
-                     console.error(`[${requestId}] Fal-ai returned a business error (Key: ${maskedKey}):`, result.error);
-                     lastError = new Error(`Fal-ai error: ${JSON.stringify(result.error)}`);
-                     lastError.status = result.status || 500;
-                     lastError.type = "fal_ai_error";
-                     break; // 业务错误，中断重试
-                }
-                console.log(`[${requestId}] Received non-stream result (Key: ${maskedKey}).`);
-                success = true; // 标记成功
-                const openAIResponse = {
-                    id: `chatcmpl-${result.requestId || Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: model,
-                    choices: [{ index: 0, message: { role: "assistant", content: result.output || "" }, finish_reason: "stop" }],
-                    usage: { prompt_tokens: null, completion_tokens: null, total_tokens: null },
-                    system_fingerprint: null,
-                    ...(result.reasoning && { fal_reasoning: result.reasoning }),
-                };
-                res.json(openAIResponse);
-                break; // 成功，中断外部循环
             }
-        } catch (error) {
-            // Catch errors from fal.config, fal.stream/subscribe setup
-            lastError = error;
-            const status = error?.status;
-            const errorMessage = error?.body?.detail || error?.message || 'Unknown setup error';
-            console.warn(`[${requestId}] Attempt ${attempt} with key ${maskedKey} failed during setup. Status: ${status || 'N/A'}, Message: ${errorMessage}`);
-            // console.error("Setup Error details:", error); // Log full error for debug if needed
-            if (status === 401 || status === 403 || status === 429) {
-                console.warn(`[${requestId}] Key ${maskedKey} failed (${status}) during setup. Excluding it for this request.`);
-                availableKeysForRequest.splice(randomIndex, 1); // 从可用列表移除
-                console.log(`[${requestId}] ${availableKeysForRequest.length} keys remaining for this request.`);
-                // 继续外部循环
-            } else {
-                 console.error(`[${requestId}] Unrecoverable setup error encountered (Key: ${maskedKey}). Status: ${status || 'N/A'}. Stopping retries.`);
-                 break; // 中断外部循环
-            }
         }
-    } // --- 结束重试循环 ---
-    // 如果循环结束了还没有成功
-    if (!success) {
-        console.error(`[${requestId}] All attempts failed or an unrecoverable error occurred. No available keys left or max attempts reached.`);
         if (!res.headersSent) {
-            const statusCode = lastError?.status || 503;
-            const errorMessage = (lastError instanceof Error) ? lastError.message : JSON.stringify(lastError);
-            const detailMessage = lastError?.body?.detail || errorMessage;
-            const errorType = lastError?.type || (statusCode === 401 || statusCode === 403 || statusCode === 429 ? "key_error" : "proxy_error");
-             console.error(`[${requestId}] Sending final error response. Status: ${statusCode}, Type: ${errorType}, Message: ${detailMessage}`);
-             res.status(statusCode).json({
-                 object: "error",
-                 message: `All attempts failed. Last error: ${detailMessage}`,
-                 type: errorType,
-                 param: null,
-                 code: statusCode === 429 ? "rate_limit_exceeded" : (statusCode === 401 || statusCode === 403 ? "invalid_api_key" : "service_unavailable")
-             });
         } else if (!res.writableEnded) {
-            console.error(`[${requestId}] Headers potentially sent, but request failed. Ending stream.`);
-             try {
-                 res.write(`data: [DONE]\n\n`); res.end();
-             } catch (e) { if (!res.writableEnded) res.end(); }
-        } else {
-            console.error(`[${requestId}] Request failed, but response stream was already fully ended.`);
         }
     }
 });
-// 启动服务器
 app.listen(PORT, () => {
-    console.log(`===================================================`);
-    console.log(` Fal OpenAI Proxy Server (Random Key + Exclusion Strategy)`); // 更新描述
     console.log(` Listening on port: ${PORT}`);
-    console.log(` Loaded ${falKeys.length} Fal AI Keys initially.`);
-    console.log(` Using Limits: System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
     console.log(` API Key Auth Enabled: ${API_KEY ? 'Yes' : 'No'}`);
-    console.log(` Chat Completions Endpoint: POST http://localhost:${PORT}/v1/chat/completions`);
     console.log(` Models Endpoint: GET http://localhost:${PORT}/v1/models`);
-    console.log(`===================================================`);
 });
-// 根路径响应
 app.get('/', (req, res) => {
-    res.send('Fal OpenAI Proxy (Random Key + Exclusion Strategy) is running.'); // 更新描述
 });

 import express from 'express';
 import { fal } from '@fal-ai/client';
+// --- Multi-Key Configuration ---
+const rawFalKeys = process.env.FAL_KEYS; // Expect comma-separated keys: key1,key2,key3
+const API_KEY = process.env.API_KEY; // Custom API Key for proxy auth remains the same
+if (!rawFalKeys) {
+    console.error("Error: FAL_KEYS environment variable is not set (should be comma-separated).");
     process.exit(1);
 }
+if (!API_KEY) {
+    console.error("Error: API_KEY environment variable is not set.");
+    process.exit(1);
+}
+// Parse and prepare the keys
+let falKeys = rawFalKeys.split(',')
+    .map(key => key.trim())
+    .filter(key => key.length > 0)
+    .map(key => ({
+        key: key,
+        failed: false,          // Track if the key is currently considered failed
+        failedTimestamp: 0      // Timestamp when the key was marked as failed
+    }));
 if (falKeys.length === 0) {
+    console.error("Error: No valid FAL_KEYS found after processing the environment variable.");
     process.exit(1);
 }
+let currentKeyIndex = 0;
+const failedKeyCooldown = 60 * 1000; // Cooldown period in milliseconds (e.g., 60 seconds) before retrying a failed key
+console.log(`Loaded ${falKeys.length} FAL API Key(s).`);
+console.log(`Failed key cooldown period: ${failedKeyCooldown / 1000} seconds.`);
+// NOTE: We will configure fal client per request now, so initial global config is removed.
+// fal.config({ ... }); // Removed
+// --- Key Management Functions ---
+/**
+ * Selects the next available FAL key using round-robin and skipping recently failed keys.
+ * @returns {object | null} Key info object { key, failed, failedTimestamp } or null if all keys are failed.
+ */
+function getNextKey() {
+    const totalKeys = falKeys.length;
+    if (totalKeys === 0) return null;
+    let attempts = 0;
+    while (attempts < totalKeys) {
+        const keyIndex = currentKeyIndex % totalKeys;
+        const keyInfo = falKeys[keyIndex];
+        // Increment index for the *next* call, ensuring round-robin
+        currentKeyIndex = (currentKeyIndex + 1) % totalKeys;
+        // Check if key is marked as failed and if cooldown has passed
+        if (keyInfo.failed) {
+            const now = Date.now();
+            if (now - keyInfo.failedTimestamp < failedKeyCooldown) {
+                // console.log(`Key index ${keyIndex} is in cooldown. Skipping.`);
+                attempts++;
+                continue; // Skip this key, it's still in cooldown
+            } else {
+                console.log(`Cooldown finished for key index ${keyIndex}. Resetting failure status.`);
+                keyInfo.failed = false; // Cooldown expired, reset status
+                keyInfo.failedTimestamp = 0;
+            }
+        }
+        // console.log(`Selected key index: ${keyIndex}`);
+        return keyInfo; // Return the valid key info object
+    }
+    console.warn("All FAL keys are currently marked as failed and in cooldown.");
+    return null; // All keys are currently failed and within cooldown
+}
+/**
+ * Marks a specific key as failed.
+ * @param {object} keyInfo - The key info object to mark as failed.
+ */
+function markKeyFailed(keyInfo) {
+    if (keyInfo && !keyInfo.failed) { // Only mark if not already marked
+        keyInfo.failed = true;
+        keyInfo.failedTimestamp = Date.now();
+        const keyIndex = falKeys.findIndex(k => k.key === keyInfo.key);
+        console.warn(`Marking key index ${keyIndex} (ending ...${keyInfo.key.slice(-4)}) as failed.`);
+    }
+}
+/**
+ * Determines if an error likely indicates an API key issue (auth, quota, etc.).
+ * This needs refinement based on actual errors from fal.ai.
+ * @param {Error} error - The error object caught from the fal client.
+ * @returns {boolean} - True if the error suggests a key failure, false otherwise.
+ */
+function isKeyRelatedError(error) {
+    const errorMessage = error?.message?.toLowerCase() || '';
+    const errorStatus = error?.status; // Assuming the error object might have a status property
+    // Check for common indicators of key issues
+    if (errorStatus === 401 || errorStatus === 403 || // Unauthorized, Forbidden
+        errorMessage.includes('authentication failed') ||
+        errorMessage.includes('invalid api key') ||
+        errorMessage.includes('permission denied')) {
+        return true;
+    }
+    if (errorStatus === 429 || // Too Many Requests (Rate Limit / Quota)
+        errorMessage.includes('rate limit exceeded') ||
+        errorMessage.includes('quota exceeded')) {
+        return true;
+    }
+    // Add more specific error messages or codes from fal.ai if known
+    // console.log("Error does not appear to be key-related:", error); // Debugging
+    return false;
 }
+// --- Express App Setup ---
 const app = express();
 app.use(express.json({ limit: '50mb' }));
 app.use(express.urlencoded({ extended: true, limit: '50mb' }));
 const PORT = process.env.PORT || 3000;
+// API Key 鉴权中间件 (Remains the same, checks custom API_KEY)
 const apiKeyAuth = (req, res, next) => {
+    // ... (Keep existing apiKeyAuth middleware code) ...
     const authHeader = req.headers['authorization'];
     if (!authHeader) {
     next();
 };
 app.use(['/v1/models', '/v1/chat/completions'], apiKeyAuth);
+// === 全局定义限制 === (Remains the same)
 const PROMPT_LIMIT = 4800;
 const SYSTEM_PROMPT_LIMIT = 4800;
 // === 限制定义结束 ===
+// 定义 fal-ai/any-llm 支持的模型列表 (Remains the same)
+const FAL_SUPPORTED_MODELS = [ /* ... model list ... */
     "anthropic/claude-3.7-sonnet",
     "anthropic/claude-3.5-sonnet",
     "anthropic/claude-3-5-haiku",
     "meta-llama/llama-4-scout"
 ];
+// Helper function to get owner from model ID (Remains the same)
+const getOwner = (modelId) => { /* ... */
     if (modelId && modelId.includes('/')) {
         return modelId.split('/')[0];
     }
     return 'fal-ai';
+};
+// GET /v1/models endpoint (Remains the same)
+app.get('/v1/models', (req, res) => { /* ... */
     console.log("Received request for GET /v1/models");
     try {
         const modelsData = FAL_SUPPORTED_MODELS.map(modelId => ({
     }
 });
+// === convertMessagesToFalPrompt 函数 (Remains the same) ===
+function convertMessagesToFalPrompt(messages) { /* ... */
     let fixed_system_prompt_content = "";
     const conversation_message_blocks = [];
+    // console.log(`Original messages count: ${messages.length}`); // Less verbose logging
     // 1. 分离 System 消息，格式化 User/Assistant 消息
     for (const message of messages) {
     // 5. *** 组合最终的 prompt 和 system_prompt (包含分隔符逻辑) ***
     const system_prompt_history_content = system_prompt_history_blocks.join('').trim();
     const final_prompt = prompt_history_blocks.join('').trim();
+    // 定义分隔符
     const SEPARATOR = "\n\n-------下面是比较早之前的对话内容-----\n\n";
     let final_system_prompt = "";
     const hasFixedSystem = fixed_system_prompt_content.length > 0;
     const hasSystemHistory = system_prompt_history_content.length > 0;
         // console.log("Using only history in system prompt slot.");
     }
+    // 6. 返回结果
     const result = {
         system_prompt: final_system_prompt,
         prompt: final_prompt
     };
+    console.log(`Final system_prompt length: ${result.system_prompt.length}, Final prompt length: ${result.prompt.length}`);
     return result;
 }
 // === convertMessagesToFalPrompt 函数结束 ===
+/**
+ * Wraps the fal.ai API call with retry logic using available keys.
+ * @param {'stream' | 'subscribe'} operation - The fal operation to perform.
+ * @param {string} functionId - The fal function ID (e.g., "fal-ai/any-llm").
+ * @param {object} params - The parameters for the fal function call (input, logs, etc.).
+ * @returns {Promise<any>} - The result from the successful fal call (stream or subscription result).
+ * @throws {Error} - Throws an error if all keys fail or a non-key-related error occurs.
+ */
+async function tryFalCallWithFailover(operation, functionId, params) {
+    const maxRetries = falKeys.length; // Try each key at most once per request cycle
+    let lastError = null;
+    for (let i = 0; i < maxRetries; i++) {
+        const keyInfo = getNextKey();
+        if (!keyInfo) {
+            throw new Error(lastError ? `All FAL keys failed. Last error: ${lastError.message}` : "All FAL keys are currently unavailable (failed or in cooldown).");
+        }
+        const currentFalKey = keyInfo.key;
+        console.log(`Attempt ${i + 1}/${maxRetries}: Using key ending in ...${currentFalKey.slice(-4)}`);
+        try {
+            // --- Configure fal client with the selected key for this attempt ---
+            // WARNING: This global config change might have concurrency issues in high-load scenarios
+            // if the fal client library doesn't isolate requests properly.
+            // A better approach would be per-request credentials if the library supported it.
+            fal.config({ credentials: currentFalKey });
+            if (operation === 'stream') {
+                // For streams, the retry logic primarily applies to *initiating* the stream.
+                // If the stream starts but fails later, this loop won't restart it.
+                const streamResult = await fal.stream(functionId, params);
+                console.log(`Successfully initiated stream with key ending in ...${currentFalKey.slice(-4)}`);
+                // If successful, return the stream iterator
+                return streamResult;
+            } else { // 'subscribe' (non-stream)
+                const result = await fal.subscribe(functionId, params);
+                console.log(`Successfully completed subscribe request with key ending in ...${currentFalKey.slice(-4)}`);
+                // Check for application-level errors *returned* by fal within the result object
+                // These are usually model errors, not key errors. Let them propagate.
+                if (result && result.error) {
+                     console.warn(`Fal-ai returned an application error (non-stream) with key ...${currentFalKey.slice(-4)}: ${JSON.stringify(result.error)}`);
+                     // Don't mark key as failed for application errors unless specifically known.
+                }
+                // Return the result object (which might contain an error)
+                return result;
+            }
+        } catch (error) {
+            console.error(`Error using key ending in ...${currentFalKey.slice(-4)}:`, error.message || error);
+            lastError = error; // Store the error
+            // Check if the error is likely related to the key itself
+            if (isKeyRelatedError(error)) {
+                markKeyFailed(keyInfo);
+                console.log(`Key marked as failed. Trying next key if available...`);
+                // Continue the loop to try the next key
+            } else {
+                // If the error is not key-related (e.g., network issue, fal internal error),
+                // stop retrying and propagate the error immediately.
+                console.error("Non-key related error occurred. Aborting retries.");
+                throw error; // Re-throw the error
+            }
+        }
+    }
+    // If the loop finishes, all keys were tried and failed with key-related errors.
+    console.error("All FAL keys failed after attempting each one.");
+    throw new Error(lastError ? `All FAL keys failed. Last error: ${lastError.message}` : "All FAL API keys failed.");
+}
+// POST /v1/chat/completions endpoint (Modified to use tryFalCallWithFailover)
 app.post('/v1/chat/completions', async (req, res) => {
     const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body;
+    console.log(`Received chat completion request for model: ${model}, stream: ${stream}`);
     if (!FAL_SUPPORTED_MODELS.includes(model)) {
+         console.warn(`Warning: Requested model '${model}' is not in the explicitly supported list.`);
+         // Allow proceeding, maybe fal-ai/any-llm supports it dynamically
     }
     if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
+        console.error("Invalid request parameters:", { model, messages: Array.isArray(messages) ? messages.length : typeof messages });
         return res.status(400).json({ error: 'Missing or invalid parameters: model and messages array are required.' });
     }
+    try {
+        const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
+        const falInput = {
+            model: model,
+            prompt: prompt,
+            ...(system_prompt && { system_prompt: system_prompt }),
+            reasoning: !!reasoning, // Ensure boolean
+            // Spread any other OpenAI compatible params if needed, though fal might ignore them
+            // ...restOpenAIParams // Be careful with spreading unknown params
+        };
+        console.log("Prepared Fal Input (lengths):", { system_prompt: system_prompt?.length, prompt: prompt?.length });
+        // Optional: Log full input for debugging (can be verbose)
+        // console.log("Full Fal Input:", JSON.stringify(falInput, null, 2));
+        // --- Use the failover wrapper for the Fal API call ---
+        if (stream) {
+            res.setHeader('Content-Type', 'text/event-stream; charset=utf-8');
+            res.setHeader('Cache-Control', 'no-cache');
+            res.setHeader('Connection', 'keep-alive');
+            res.setHeader('Access-Control-Allow-Origin', '*'); // Keep CORS header if needed
+            res.flushHeaders();
+            let previousOutput = '';
+            let falStream;
+            try {
+                 // --- Initiate stream using failover ---
+                 falStream = await tryFalCallWithFailover('stream', "fal-ai/any-llm", { input: falInput });
+                 // --- Process the stream (existing logic) ---
+                 for await (const event of falStream) {
+                    const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
+                    const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
+                    const errorInfo = (event && event.error) ? event.error : null;
+                    if (errorInfo) {
+                        console.error("Error received *during* fal stream:", errorInfo);
+                        // Note: This error happened *after* successful stream initiation.
+                        // We send an error chunk, but don't mark the key failed here as the connection worked initially.
+                        const errorChunk = { id: `chatcmpl-${Date.now()}-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Fal Stream Error: ${JSON.stringify(errorInfo)}` } }] };
+                        res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
+                        break; // Stop processing this stream
                     }
+                    let deltaContent = '';
+                    if (currentOutput.startsWith(previousOutput)) {
+                        deltaContent = currentOutput.substring(previousOutput.length);
+                    } else if (currentOutput.length > 0) {
+                        console.warn("Fal stream output mismatch detected. Sending full current output as delta.", { previousLength: previousOutput.length, currentLength: currentOutput.length });
+                        deltaContent = currentOutput;
+                        previousOutput = ''; // Reset previous since we sent full
                     }
+                    previousOutput = currentOutput; // Update previousOutput for next iteration
+                    if (deltaContent || !isPartial) { // Send delta or final chunk
+                        const openAIChunk = { id: `chatcmpl-${Date.now()}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: { content: deltaContent }, finish_reason: isPartial === false ? "stop" : null }] };
+                        res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
+                    }
+                 }
+                 res.write(`data: [DONE]\n\n`);
+                 res.end();
+                 console.log("Stream finished successfully.");
+            } catch (streamError) {
+                // This catch handles errors from tryFalCallWithFailover OR the stream processing loop
+                console.error('Error during stream processing:', streamError);
+                // Don't try to write to response if headers already sent and stream failed mid-way uncleanly
+                if (!res.writableEnded) {
+                     try {
+                         // Send a final error chunk if possible
+                         const errorDetails = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
+                         const finalErrorChunk = { error: { message: "Stream failed", type: "proxy_error", details: errorDetails } };
+                         res.write(`data: ${JSON.stringify(finalErrorChunk)}\n\n`);
+                         res.write(`data: [DONE]\n\n`);
+                         res.end();
+                     } catch (finalError) {
+                         console.error('Error sending final stream error message to client:', finalError);
+                         if (!res.writableEnded) { res.end(); } // Ensure response ends
+                     }
                  }
             }
+        } else { // Non-stream
+             console.log("Executing non-stream request with failover...");
+             // --- Call subscribe using failover ---
+             const result = await tryFalCallWithFailover('subscribe', "fal-ai/any-llm", { input: falInput, logs: true });
+             console.log("Received non-stream result from fal-ai via failover wrapper.");
+             // Optional: Log full result for debugging
+             // console.log("Full non-stream result:", JSON.stringify(result, null, 2));
+             // Check for application-level errors *within* the successful response
+             if (result && result.error) {
+                 console.error("Fal-ai returned an application error in non-stream mode (after successful API call):", result.error);
+                 // Return a 500 status but format it like OpenAI error if possible
+                 return res.status(500).json({
+                      object: "error",
+                      message: `Fal-ai application error: ${JSON.stringify(result.error)}`,
+                      type: "fal_ai_error",
+                      param: null,
+                      code: result.error.code || null // Include code if available
+                 });
+             }
+             // --- Format successful non-stream response (existing logic) ---
+             const openAIResponse = {
+                 id: `chatcmpl-${result?.requestId || Date.now()}`, // Use requestId if available
+                 object: "chat.completion",
+                 created: Math.floor(Date.now() / 1000),
+                 model: model, // Use the requested model ID
+                 choices: [{
+                     index: 0,
+                     message: {
+                         role: "assistant",
+                         content: result?.output || "" // Safely access output
+                     },
+                     finish_reason: "stop" // Assume stop for non-stream
+                 }],
+                 usage: { // Fal doesn't provide token usage
+                     prompt_tokens: null,
+                     completion_tokens: null,
+                     total_tokens: null
+                 },
+                 system_fingerprint: null, // Not provided by fal
+                 ...(result?.reasoning && { fal_reasoning: result.reasoning }), // Include reasoning if present
+             };
+             res.json(openAIResponse);
+             console.log("Returned non-stream response successfully.");
         }
+    } catch (error) {
+        // This catches errors from setup, convertMessagesToFalPrompt, or tryFalCallWithFailover (if all keys failed or non-key error occurred)
+        console.error('Unhandled error in /v1/chat/completions:', error);
         if (!res.headersSent) {
+            const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
+            // Provide a more informative error message
+            const errorType = error.message?.includes("All FAL keys failed") ? "api_key_error" : "proxy_internal_error";
+            res.status(500).json({
+                 error: {
+                     message: `Internal Server Error in Proxy: ${errorMessage}`,
+                     type: errorType,
+                     details: error.stack // Optional: include stack in dev/debug mode
+                 }
+            });
         } else if (!res.writableEnded) {
+             console.error("Headers already sent, attempting to end response after error.");
+             res.end(); // Try to end the response if possible
         }
     }
 });
+// --- Server Start ---
 app.listen(PORT, () => {
+    console.log(`===========================================================`);
+    console.log(` Fal OpenAI Proxy Server (Multi-Key Failover)`);
     console.log(` Listening on port: ${PORT}`);
+    console.log(` Loaded ${falKeys.length} FAL API Key(s).`);
     console.log(` API Key Auth Enabled: ${API_KEY ? 'Yes' : 'No'}`);
+    console.log(` Limits: System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
+    console.log(` Chat Completions: POST http://localhost:${PORT}/v1/chat/completions`);
     console.log(` Models Endpoint: GET http://localhost:${PORT}/v1/models`);
+    console.log(`===========================================================`);
 });
+// Root path response (Remains the same)
 app.get('/', (req, res) => {
+    res.send('Fal OpenAI Proxy (Multi-Key Failover) is running.');
 });