Spaces:

Calmlo
/

fal-openai-proxy

Running

File size: 18,232 Bytes

ed8e0ee
9df5dcc
ed8e0ee
5de0798
713f2f6
5de0798
2cae477
713f2f6
5de0798
 
713f2f6
c9e0fd6
5de0798
713f2f6
5de0798
 
c9e0fd6
713f2f6
5de0798
 
ed8e0ee
c9e0fd6
5de0798
c9e0fd6
5de0798
c9e0fd6
5de0798
 
 
 
 
 
 
713f2f6
5de0798
713f2f6
c9e0fd6
5de0798
 
 
713f2f6
5de0798
 
 
 
 
ed8e0ee
 
 
 
c9e0fd6
ed8e0ee
 
5de0798
2cae477
 
c9e0fd6
2cae477
 
 
 
c9e0fd6
2cae477
 
5de0798
2cae477
 
c9e0fd6
2cae477
 
5de0798
2cae477
 
c9e0fd6
2cae477
 
c9e0fd6
5de0798
2cae477
 
5de0798
 
 
 
c9e0fd6
5de0798
c9e0fd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5de0798
c9e0fd6
5de0798
c9e0fd6
 
 
 
ed8e0ee
5de0798
c9e0fd6
 
 
 
5de0798
c9e0fd6
 
 
 
 
 
 
 
9df5dcc
5de0798
c9e0fd6
5de0798
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c9e0fd6
5de0798
 
 
 
 
 
c9e0fd6
5de0798
 
c9e0fd6
 
5de0798
 
 
 
 
 
 
c9e0fd6
 
5de0798
 
 
 
 
 
 
c9e0fd6
5de0798
 
 
 
c9e0fd6
5de0798
 
 
713f2f6
5de0798
 
 
 
 
 
 
713f2f6
5de0798
 
713f2f6
5de0798
 
 
 
 
 
 
 
713f2f6
5de0798
 
713f2f6
 
5de0798
 
 
 
 
713f2f6
5de0798
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
713f2f6
5de0798
713f2f6
 
5de0798
ed8e0ee
984e8a0
ed8e0ee
5de0798
 
c9e0fd6
5de0798
 
 
 
 
c9e0fd6
984e8a0
ed8e0ee
5de0798
 
 
 
 
 
 
ed8e0ee
c9e0fd6
 
 
 
5de0798
c9e0fd6
 
5de0798
 
 
 
 
 
 
 
 
 
 
 
 
ed8e0ee
 
 
 
5de0798
 
 
 
ed8e0ee
5de0798
ed8e0ee
 
 
 
 
984e8a0
ed8e0ee
e6d9a41
5de0798
 
 
 
ed8e0ee
c9e0fd6
ed8e0ee
 
9df5dcc
ed8e0ee
5de0798
 
 
984e8a0
 
c9e0fd6
5de0798
e6d9a41
5de0798
 
ed8e0ee
5de0798
 
e6d9a41
 
5de0798
 
e6d9a41
ed8e0ee
 
5de0798
 
 
 
 
 
 
 
 
 
 
 
 
984e8a0
ed8e0ee
 
5de0798
 
 
 
 
 
 
 
ed8e0ee
5de0798
 
 
 
 
 
 
 
 
 
ed8e0ee
 
5de0798
ed8e0ee
 
5de0798
 
 
 
ed8e0ee
 
 
 
5de0798
c9e0fd6
5de0798
 
 
 
 
 
 
 
 
c9e0fd6
 
5de0798
c9e0fd6
5de0798
c9e0fd6

import express from 'express';
import { fal } from '@fal-ai/client';

// --- Key Management ---
const FAL_KEY_STRING = process.env.FAL_KEY;
const API_KEY = process.env.API_KEY; // 自定义 API Key 环境变量保持不变

if (!FAL_KEY_STRING) {
    console.error("Error: FAL_KEY environment variable is not set.");
    process.exit(1);
}

// 解析 FAL_KEY 字符串为数组，去除空白并过滤空值
const falKeys = FAL_KEY_STRING.split(',')
                             .map(key => key.trim())
                             .filter(key => key.length > 0);

if (falKeys.length === 0) {
    console.error("Error: FAL_KEY environment variable is set, but no valid keys were found after parsing.");
    process.exit(1);
}

console.log(`Loaded ${falKeys.length} Fal AI Keys.`);

let currentFalKeyIndex = 0;

// 获取下一个 Fal Key 并循环
function getNextFalKey() {
    const key = falKeys[currentFalKeyIndex];
    const usedIndex = currentFalKeyIndex; // 记录本次使用的索引，用于日志
    currentFalKeyIndex = (currentFalKeyIndex + 1) % falKeys.length; // 移动到下一个，如果到末尾则回到开头
    console.log(`Using Fal Key at index: ${usedIndex}`);
    return key;
}
// --- End Key Management ---


if (!API_KEY) {
    console.error("Error: API_KEY environment variable is not set.");
    process.exit(1);
}

// 注意：不再在这里全局配置 fal.config
// fal.config({
//     credentials: FAL_KEY, // 移除这行
// });

const app = express();
app.use(express.json({ limit: '50mb' }));
app.use(express.urlencoded({ extended: true, limit: '50mb' }));

const PORT = process.env.PORT || 3000;

// API Key 鉴权中间件 (保持不变)
const apiKeyAuth = (req, res, next) => {
    const authHeader = req.headers['authorization'];

    if (!authHeader) {
        console.warn('Unauthorized: No Authorization header provided');
        return res.status(401).json({ error: 'Unauthorized: No API Key provided' });
    }

    const authParts = authHeader.split(' ');
    if (authParts.length !== 2 || authParts[0].toLowerCase() !== 'bearer') {
        console.warn('Unauthorized: Invalid Authorization header format');
        return res.status(401).json({ error: 'Unauthorized: Invalid Authorization header format' });
    }

    const providedKey = authParts[1];
    if (providedKey !== API_KEY) {
        console.warn('Unauthorized: Invalid API Key');
        return res.status(401).json({ error: 'Unauthorized: Invalid API Key' });
    }

    next();
};

// 应用 API Key 鉴权中间件到所有 API 路由 (保持不变)
app.use(['/v1/models', '/v1/chat/completions'], apiKeyAuth);

// === 全局定义限制 === (保持不变)
const PROMPT_LIMIT = 4800;
const SYSTEM_PROMPT_LIMIT = 4800;
// === 限制定义结束 ===

// 定义 fal-ai/any-llm 支持的模型列表 (保持不变)
const FAL_SUPPORTED_MODELS = [
    "anthropic/claude-3.7-sonnet",
    "anthropic/claude-3.5-sonnet",
    "anthropic/claude-3-5-haiku",
    "anthropic/claude-3-haiku",
    "google/gemini-pro-1.5",
    "google/gemini-flash-1.5",
    "google/gemini-flash-1.5-8b",
    "google/gemini-2.0-flash-001",
    "meta-llama/llama-3.2-1b-instruct",
    "meta-llama/llama-3.2-3b-instruct",
    "meta-llama/llama-3.1-8b-instruct",
    "meta-llama/llama-3.1-70b-instruct",
    "openai/gpt-4o-mini",
    "openai/gpt-4o",
    "deepseek/deepseek-r1",
    "meta-llama/llama-4-maverick",
    "meta-llama/llama-4-scout"
];

// Helper function to get owner from model ID (保持不变)
const getOwner = (modelId) => {
    if (modelId && modelId.includes('/')) {
        return modelId.split('/')[0];
    }
    return 'fal-ai';
}

// GET /v1/models endpoint (保持不变)
app.get('/v1/models', (req, res) => {
    console.log("Received request for GET /v1/models");
    try {
        const modelsData = FAL_SUPPORTED_MODELS.map(modelId => ({
            id: modelId, object: "model", created: 1700000000, owned_by: getOwner(modelId)
        }));
        res.json({ object: "list", data: modelsData });
        console.log("Successfully returned model list.");
    } catch (error) {
        console.error("Error processing GET /v1/models:", error);
        res.status(500).json({ error: "Failed to retrieve model list." });
    }
});

// === convertMessagesToFalPrompt 函数 (保持不变) ===
function convertMessagesToFalPrompt(messages) {
    let fixed_system_prompt_content = "";
    const conversation_message_blocks = [];
    console.log(`Original messages count: ${messages.length}`);

    // 1. 分离 System 消息，格式化 User/Assistant 消息
    for (const message of messages) {
        let content = (message.content === null || message.content === undefined) ? "" : String(message.content);
        switch (message.role) {
            case 'system':
                fixed_system_prompt_content += `System: ${content}\n\n`;
                break;
            case 'user':
                conversation_message_blocks.push(`Human: ${content}\n\n`);
                break;
            case 'assistant':
                conversation_message_blocks.push(`Assistant: ${content}\n\n`);
                break;
            default:
                console.warn(`Unsupported role: ${message.role}`);
                continue;
        }
    }

    // 2. 截断合并后的 system 消息（如果超长）
    if (fixed_system_prompt_content.length > SYSTEM_PROMPT_LIMIT) {
        const originalLength = fixed_system_prompt_content.length;
        fixed_system_prompt_content = fixed_system_prompt_content.substring(0, SYSTEM_PROMPT_LIMIT);
        console.warn(`Combined system messages truncated from ${originalLength} to ${SYSTEM_PROMPT_LIMIT}`);
    }
    // 清理末尾可能多余的空白，以便后续判断和拼接
    fixed_system_prompt_content = fixed_system_prompt_content.trim();


    // 3. 计算 system_prompt 中留给对话历史的剩余空间
    let space_occupied_by_fixed_system = 0;
    if (fixed_system_prompt_content.length > 0) {
         space_occupied_by_fixed_system = fixed_system_prompt_content.length + 4; // 预留 \n\n...\n\n 的长度
    }
     const remaining_system_limit = Math.max(0, SYSTEM_PROMPT_LIMIT - space_occupied_by_fixed_system);
    console.log(`Trimmed fixed system prompt length: ${fixed_system_prompt_content.length}. Approx remaining system history limit: ${remaining_system_limit}`);


    // 4. 反向填充 User/Assistant 对话历史
    const prompt_history_blocks = [];
    const system_prompt_history_blocks = [];
    let current_prompt_length = 0;
    let current_system_history_length = 0;
    let promptFull = false;
    let systemHistoryFull = (remaining_system_limit <= 0);

    console.log(`Processing ${conversation_message_blocks.length} user/assistant messages for recency filling.`);
    for (let i = conversation_message_blocks.length - 1; i >= 0; i--) {
        const message_block = conversation_message_blocks[i];
        const block_length = message_block.length;

        if (promptFull && systemHistoryFull) {
            console.log(`Both prompt and system history slots full. Omitting older messages from index ${i}.`);
            break;
        }

        // 优先尝试放入 prompt
        if (!promptFull) {
            if (current_prompt_length + block_length <= PROMPT_LIMIT) {
                prompt_history_blocks.unshift(message_block);
                current_prompt_length += block_length;
                continue;
            } else {
                promptFull = true;
                console.log(`Prompt limit (${PROMPT_LIMIT}) reached. Trying system history slot.`);
            }
        }

        // 如果 prompt 满了，尝试放入 system_prompt 的剩余空间
        if (!systemHistoryFull) {
            if (current_system_history_length + block_length <= remaining_system_limit) {
                 system_prompt_history_blocks.unshift(message_block);
                 current_system_history_length += block_length;
                 continue;
            } else {
                 systemHistoryFull = true;
                 console.log(`System history limit (${remaining_system_limit}) reached.`);
            }
        }
    }

    // 5. *** 组合最终的 prompt 和 system_prompt (包含分隔符逻辑) ***
    const system_prompt_history_content = system_prompt_history_blocks.join('').trim();
    const final_prompt = prompt_history_blocks.join('').trim();

    // 定义分隔符
    const SEPARATOR = "\n\n-------下面是比较早之前的对话内容-----\n\n";

    let final_system_prompt = "";

    const hasFixedSystem = fixed_system_prompt_content.length > 0;
    const hasSystemHistory = system_prompt_history_content.length > 0;

    if (hasFixedSystem && hasSystemHistory) {
        final_system_prompt = fixed_system_prompt_content + SEPARATOR + system_prompt_history_content;
        console.log("Combining fixed system prompt and history with separator.");
    } else if (hasFixedSystem) {
        final_system_prompt = fixed_system_prompt_content;
        console.log("Using only fixed system prompt.");
    } else if (hasSystemHistory) {
        final_system_prompt = system_prompt_history_content;
        console.log("Using only history in system prompt slot.");
    }

    const result = {
        system_prompt: final_system_prompt,
        prompt: final_prompt
    };

    console.log(`Final system_prompt length (Sys+Separator+Hist): ${result.system_prompt.length}`);
    console.log(`Final prompt length (Hist): ${result.prompt.length}`);

    return result;
}
// === convertMessagesToFalPrompt 函数结束 ===


// POST /v1/chat/completions endpoint (主要修改处)
app.post('/v1/chat/completions', async (req, res) => {
    const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body;

    console.log(`Received chat completion request for model: ${model}, stream: ${stream}`);

    if (!FAL_SUPPORTED_MODELS.includes(model)) {
         console.warn(`Warning: Requested model '${model}' is not in the explicitly supported list.`);
    }
    if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
        console.error("Invalid request parameters:", { model, messages: Array.isArray(messages) ? messages.length : typeof messages });
        return res.status(400).json({ error: 'Missing or invalid parameters: model and messages array are required.' });
    }

    try {
        // *** 在处理请求前，获取下一个 Fal Key 并配置 fal 客户端 ***
        const selectedFalKey = getNextFalKey();
        fal.config({
            credentials: selectedFalKey,
        });
        // *********************************************************

        const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);

        const falInput = {
            model: model,
            prompt: prompt,
            ...(system_prompt && { system_prompt: system_prompt }),
            reasoning: !!reasoning,
        };
	    console.log("Fal Input:", JSON.stringify(falInput, null, 2));
        console.log("Forwarding request to fal-ai with system-priority + separator + recency input:");
        console.log("System Prompt Length:", system_prompt?.length || 0);
        console.log("Prompt Length:", prompt?.length || 0);
        console.log("--- System Prompt Start ---");
        console.log(system_prompt);
        console.log("--- System Prompt End ---");
        console.log("--- Prompt Start ---");
        console.log(prompt);
        console.log("--- Prompt End ---");


        // --- 流式/非流式处理逻辑 (保持不变) ---
        if (stream) {
            res.setHeader('Content-Type', 'text/event-stream; charset=utf-8');
            res.setHeader('Cache-Control', 'no-cache');
            res.setHeader('Connection', 'keep-alive');
            res.setHeader('Access-Control-Allow-Origin', '*');
            res.flushHeaders();

            let previousOutput = '';

            const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });

            try {
                for await (const event of falStream) {
                    const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
                    const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
                    const errorInfo = (event && event.error) ? event.error : null;

                    if (errorInfo) {
                        console.error("Error received in fal stream event:", errorInfo);
                        const errorChunk = { id: `chatcmpl-${Date.now()}-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Fal Stream Error: ${JSON.stringify(errorInfo)}` } }] };
                        res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
                        break; // Stop processing on error
                    }

                    let deltaContent = '';
                    if (currentOutput.startsWith(previousOutput)) {
                        deltaContent = currentOutput.substring(previousOutput.length);
                    } else if (currentOutput.length > 0) {
                        console.warn("Fal stream output mismatch detected. Sending full current output as delta.", { previousLength: previousOutput.length, currentLength: currentOutput.length });
                        deltaContent = currentOutput;
                        previousOutput = ''; // Reset previous if mismatch
                    }
                    previousOutput = currentOutput;

                    // Send chunk if there's content or if it's the final chunk (isPartial is false)
                    if (deltaContent || !isPartial) {
                        const openAIChunk = { id: `chatcmpl-${Date.now()}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: { content: deltaContent }, finish_reason: isPartial === false ? "stop" : null }] };
                        res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
                    }
                 }
                // After the loop, ensure the [DONE] signal is sent if the stream finished normally
                if (!res.writableEnded) {
                    res.write(`data: [DONE]\n\n`);
                    res.end();
                    console.log("Stream finished and [DONE] sent.");
                }

            } catch (streamError) {
                console.error('Error during fal stream processing loop:', streamError);
                try {
                    if (!res.writableEnded) { // Check if we can still write to the response
                        const errorDetails = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
                        res.write(`data: ${JSON.stringify({ error: { message: "Stream processing error", type: "proxy_error", details: errorDetails } })}\n\n`);
                        res.write(`data: [DONE]\n\n`); // Send DONE even after error
                        res.end();
                    } else {
                         console.error("Stream already ended, cannot send error message.");
                    }
                } catch (finalError) {
                    console.error('Error sending stream error message to client:', finalError);
                    if (!res.writableEnded) { res.end(); }
                }
            }
        } else {
            // --- 非流式处理 (保持不变) ---
            console.log("Executing non-stream request...");
            const result = await fal.subscribe("fal-ai/any-llm", { input: falInput, logs: true });
            console.log("Received non-stream result from fal-ai:", JSON.stringify(result, null, 2));

            if (result && result.error) {
                 console.error("Fal-ai returned an error in non-stream mode:", result.error);
                 return res.status(500).json({ object: "error", message: `Fal-ai error: ${JSON.stringify(result.error)}`, type: "fal_ai_error", param: null, code: null });
            }

            const openAIResponse = {
                id: `chatcmpl-${result.requestId || Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: model,
                choices: [{ index: 0, message: { role: "assistant", content: result.output || "" }, finish_reason: "stop" }],
                usage: { prompt_tokens: null, completion_tokens: null, total_tokens: null }, system_fingerprint: null,
                ...(result.reasoning && { fal_reasoning: result.reasoning }),
            };
            res.json(openAIResponse);
            console.log("Returned non-stream response.");
        }

    } catch (error) {
        console.error('Unhandled error in /v1/chat/completions:', error);
        if (!res.headersSent) {
            const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
            res.status(500).json({ error: 'Internal Server Error in Proxy', details: errorMessage });
        } else if (!res.writableEnded) {
             console.error("Headers already sent, ending response.");
             res.end();
        }
    }
});

// 启动服务器 (更新启动信息)
app.listen(PORT, () => {
    console.log(`===================================================`);
    console.log(` Fal OpenAI Proxy Server (Key Rotation + System Top + Separator + Recency)`);
    console.log(` Listening on port: ${PORT}`);
    console.log(` Loaded ${falKeys.length} Fal AI Keys for rotation.`);
    console.log(` Using Limits: System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
    console.log(` API Key Auth Enabled: ${API_KEY ? 'Yes' : 'No'}`);
    console.log(` Chat Completions Endpoint: POST http://localhost:${PORT}/v1/chat/completions`);
    console.log(` Models Endpoint: GET http://localhost:${PORT}/v1/models`);
    console.log(`===================================================`);
});

// 根路径响应 (更新信息)
app.get('/', (req, res) => {
    res.send('Fal OpenAI Proxy (Key Rotation + System Top + Separator + Recency Strategy) is running.');
});