Calmlo commited on
Commit
713f2f6
·
verified ·
1 Parent(s): 2cae477

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +338 -135
server.js CHANGED
@@ -1,12 +1,25 @@
1
  import express from 'express';
2
  import { fal } from '@fal-ai/client';
3
 
4
- // 从环境变量读取 Fal AI API Key 和自定义 API Key
5
- const FAL_KEY = process.env.FAL_KEY;
6
- const API_KEY = process.env.API_KEY; // 添加自定义 API Key 环境变量
 
7
 
8
- if (!FAL_KEY) {
9
  console.error("Error: FAL_KEY environment variable is not set.");
 
 
 
 
 
 
 
 
 
 
 
 
10
  process.exit(1);
11
  }
12
 
@@ -15,10 +28,67 @@ if (!API_KEY) {
15
  process.exit(1);
16
  }
17
 
18
- // 配置 fal 客户端
19
- fal.config({
20
- credentials: FAL_KEY,
21
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  const app = express();
24
  app.use(express.json({ limit: '50mb' }));
@@ -26,39 +96,38 @@ app.use(express.urlencoded({ extended: true, limit: '50mb' }));
26
 
27
  const PORT = process.env.PORT || 3000;
28
 
29
- // API Key 鉴权中间件
30
  const apiKeyAuth = (req, res, next) => {
31
  const authHeader = req.headers['authorization'];
32
-
33
  if (!authHeader) {
34
  console.warn('Unauthorized: No Authorization header provided');
35
  return res.status(401).json({ error: 'Unauthorized: No API Key provided' });
36
  }
37
-
38
  const authParts = authHeader.split(' ');
39
  if (authParts.length !== 2 || authParts[0].toLowerCase() !== 'bearer') {
40
  console.warn('Unauthorized: Invalid Authorization header format');
41
  return res.status(401).json({ error: 'Unauthorized: Invalid Authorization header format' });
42
  }
43
-
44
  const providedKey = authParts[1];
45
  if (providedKey !== API_KEY) {
46
  console.warn('Unauthorized: Invalid API Key');
47
  return res.status(401).json({ error: 'Unauthorized: Invalid API Key' });
48
  }
49
-
50
  next();
51
  };
52
 
53
- // 应用 API Key 鉴权中间件到所有 API 路由
54
  app.use(['/v1/models', '/v1/chat/completions'], apiKeyAuth);
55
 
56
- // === ���局定义限制 ===
57
  const PROMPT_LIMIT = 4800;
58
  const SYSTEM_PROMPT_LIMIT = 4800;
59
  // === 限制定义结束 ===
60
 
61
- // 定义 fal-ai/any-llm 支持的模型列表
62
  const FAL_SUPPORTED_MODELS = [
63
  "anthropic/claude-3.7-sonnet",
64
  "anthropic/claude-3.5-sonnet",
@@ -79,7 +148,7 @@ const FAL_SUPPORTED_MODELS = [
79
  "meta-llama/llama-4-scout"
80
  ];
81
 
82
- // Helper function to get owner from model ID
83
  const getOwner = (modelId) => {
84
  if (modelId && modelId.includes('/')) {
85
  return modelId.split('/')[0];
@@ -87,12 +156,12 @@ const getOwner = (modelId) => {
87
  return 'fal-ai';
88
  }
89
 
90
- // GET /v1/models endpoint
91
  app.get('/v1/models', (req, res) => {
92
  console.log("Received request for GET /v1/models");
93
  try {
94
  const modelsData = FAL_SUPPORTED_MODELS.map(modelId => ({
95
- id: modelId, object: "model", created: 1700000000, owned_by: getOwner(modelId)
96
  }));
97
  res.json({ object: "list", data: modelsData });
98
  console.log("Successfully returned model list.");
@@ -102,11 +171,12 @@ app.get('/v1/models', (req, res) => {
102
  }
103
  });
104
 
105
- // === 修改后的 convertMessagesToFalPrompt 函数 (System置顶 + 分隔符 + 对话历史Recency) ===
106
  function convertMessagesToFalPrompt(messages) {
 
107
  let fixed_system_prompt_content = "";
108
  const conversation_message_blocks = [];
109
- console.log(`Original messages count: ${messages.length}`);
110
 
111
  // 1. 分离 System 消息,格式化 User/Assistant 消息
112
  for (const message of messages) {
@@ -133,22 +203,15 @@ function convertMessagesToFalPrompt(messages) {
133
  fixed_system_prompt_content = fixed_system_prompt_content.substring(0, SYSTEM_PROMPT_LIMIT);
134
  console.warn(`Combined system messages truncated from ${originalLength} to ${SYSTEM_PROMPT_LIMIT}`);
135
  }
136
- // 清理末尾可能多余的空白,以便后续判断和拼接
137
  fixed_system_prompt_content = fixed_system_prompt_content.trim();
138
 
139
-
140
  // 3. 计算 system_prompt 中留给对话历史的剩余空间
141
- // 注意:这里计算时要考虑分隔符可能占用的长度,但分隔符只在需要时添加
142
- // 因此先计算不含分隔符的剩余空间
143
  let space_occupied_by_fixed_system = 0;
144
  if (fixed_system_prompt_content.length > 0) {
145
- // 如果固定内容不为空,计算其长度 + 后面可能的分隔符的长度(如果需要)
146
- // 暂时只计算内容长度,分隔符在组合时再考虑
147
  space_occupied_by_fixed_system = fixed_system_prompt_content.length + 4; // 预留 \n\n...\n\n 的长度
148
  }
149
  const remaining_system_limit = Math.max(0, SYSTEM_PROMPT_LIMIT - space_occupied_by_fixed_system);
150
- console.log(`Trimmed fixed system prompt length: ${fixed_system_prompt_content.length}. Approx remaining system history limit: ${remaining_system_limit}`);
151
-
152
 
153
  // 4. 反向填充 User/Assistant 对话历史
154
  const prompt_history_blocks = [];
@@ -158,17 +221,16 @@ function convertMessagesToFalPrompt(messages) {
158
  let promptFull = false;
159
  let systemHistoryFull = (remaining_system_limit <= 0);
160
 
161
- console.log(`Processing ${conversation_message_blocks.length} user/assistant messages for recency filling.`);
162
  for (let i = conversation_message_blocks.length - 1; i >= 0; i--) {
163
  const message_block = conversation_message_blocks[i];
164
  const block_length = message_block.length;
165
 
166
  if (promptFull && systemHistoryFull) {
167
- console.log(`Both prompt and system history slots full. Omitting older messages from index ${i}.`);
168
  break;
169
  }
170
 
171
- // 优先尝试放入 prompt
172
  if (!promptFull) {
173
  if (current_prompt_length + block_length <= PROMPT_LIMIT) {
174
  prompt_history_blocks.unshift(message_block);
@@ -176,11 +238,10 @@ function convertMessagesToFalPrompt(messages) {
176
  continue;
177
  } else {
178
  promptFull = true;
179
- console.log(`Prompt limit (${PROMPT_LIMIT}) reached. Trying system history slot.`);
180
  }
181
  }
182
 
183
- // 如果 prompt 满了,尝试放入 system_prompt 的剩余空间
184
  if (!systemHistoryFull) {
185
  if (current_system_history_length + block_length <= remaining_system_limit) {
186
  system_prompt_history_blocks.unshift(message_block);
@@ -188,194 +249,336 @@ function convertMessagesToFalPrompt(messages) {
188
  continue;
189
  } else {
190
  systemHistoryFull = true;
191
- console.log(`System history limit (${remaining_system_limit}) reached.`);
192
  }
193
  }
194
  }
195
 
196
- // 5. *** 组合最终的 prompt 和 system_prompt (包含分隔符逻辑) ***
197
  const system_prompt_history_content = system_prompt_history_blocks.join('').trim();
198
  const final_prompt = prompt_history_blocks.join('').trim();
199
-
200
- // 定义分隔符
201
  const SEPARATOR = "\n\n-------下面是比较早之前的对话内容-----\n\n";
202
-
203
  let final_system_prompt = "";
204
-
205
- // 检查各部分是否有内容 (使用 trim 后的固定部分)
206
  const hasFixedSystem = fixed_system_prompt_content.length > 0;
207
  const hasSystemHistory = system_prompt_history_content.length > 0;
208
 
209
  if (hasFixedSystem && hasSystemHistory) {
210
- // 两部分都有,用分隔符连接
211
  final_system_prompt = fixed_system_prompt_content + SEPARATOR + system_prompt_history_content;
212
- console.log("Combining fixed system prompt and history with separator.");
213
  } else if (hasFixedSystem) {
214
- // 只有固定部分
215
  final_system_prompt = fixed_system_prompt_content;
216
- console.log("Using only fixed system prompt.");
217
  } else if (hasSystemHistory) {
218
- // 只有历史部分 (固定部分为空)
219
  final_system_prompt = system_prompt_history_content;
220
- console.log("Using only history in system prompt slot.");
221
  }
222
- // 如果两部分都为空,final_system_prompt 保持空字符串 ""
223
 
224
- // 6. 返回结果
225
  const result = {
226
- system_prompt: final_system_prompt, // 最终结果不需要再 trim
227
- prompt: final_prompt // final_prompt 在组合前已 trim
228
  };
229
 
230
- console.log(`Final system_prompt length (Sys+Separator+Hist): ${result.system_prompt.length}`);
231
- console.log(`Final prompt length (Hist): ${result.prompt.length}`);
232
 
233
  return result;
234
  }
235
  // === convertMessagesToFalPrompt 函数结束 ===
236
 
237
 
238
- // POST /v1/chat/completions endpoint (保持不变)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  app.post('/v1/chat/completions', async (req, res) => {
240
  const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body;
241
 
242
- console.log(`Received chat completion request for model: ${model}, stream: ${stream}`);
 
243
 
244
  if (!FAL_SUPPORTED_MODELS.includes(model)) {
245
- console.warn(`Warning: Requested model '${model}' is not in the explicitly supported list.`);
246
  }
247
  if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
248
- console.error("Invalid request parameters:", { model, messages: Array.isArray(messages) ? messages.length : typeof messages });
249
  return res.status(400).json({ error: 'Missing or invalid parameters: model and messages array are required.' });
250
  }
251
 
252
  try {
253
- // *** 使用更新后的转换函数 ***
254
  const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
255
-
256
  const falInput = {
257
  model: model,
258
  prompt: prompt,
259
  ...(system_prompt && { system_prompt: system_prompt }),
260
- reasoning: !!reasoning,
261
  };
262
- console.log("Fal Input:", JSON.stringify(falInput, null, 2));
263
- console.log("Forwarding request to fal-ai with system-priority + separator + recency input:");
264
- console.log("System Prompt Length:", system_prompt?.length || 0);
265
- console.log("Prompt Length:", prompt?.length || 0);
266
- // 调试时取消注释可以查看具体内容
267
- console.log("--- System Prompt Start ---");
268
- console.log(system_prompt);
269
- console.log("--- System Prompt End ---");
270
- console.log("--- Prompt Start ---");
271
- console.log(prompt);
272
- console.log("--- Prompt End ---");
273
-
274
-
275
- // --- 流式/非流式处理逻辑 (保持不变) ---
276
  if (stream) {
277
- // ... 流式代码 ...
278
  res.setHeader('Content-Type', 'text/event-stream; charset=utf-8');
279
  res.setHeader('Cache-Control', 'no-cache');
280
  res.setHeader('Connection', 'keep-alive');
281
- res.setHeader('Access-Control-Allow-Origin', '*');
282
  res.flushHeaders();
283
 
284
  let previousOutput = '';
285
-
286
- const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
287
 
288
  try {
 
 
 
 
289
  for await (const event of falStream) {
290
  const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
291
  const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
292
  const errorInfo = (event && event.error) ? event.error : null;
293
 
294
- if (errorInfo) {
295
- console.error("Error received in fal stream event:", errorInfo);
296
- const errorChunk = { id: `chatcmpl-${Date.now()}-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Fal Stream Error: ${JSON.stringify(errorInfo)}` } }] };
297
- res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
298
- break;
 
 
 
 
 
 
 
299
  }
300
 
 
301
  let deltaContent = '';
302
  if (currentOutput.startsWith(previousOutput)) {
303
  deltaContent = currentOutput.substring(previousOutput.length);
304
  } else if (currentOutput.length > 0) {
305
- console.warn("Fal stream output mismatch detected. Sending full current output as delta.", { previousLength: previousOutput.length, currentLength: currentOutput.length });
306
- deltaContent = currentOutput;
307
- previousOutput = '';
308
  }
309
  previousOutput = currentOutput;
310
 
311
- if (deltaContent || !isPartial) {
312
- const openAIChunk = { id: `chatcmpl-${Date.now()}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: { content: deltaContent }, finish_reason: isPartial === false ? "stop" : null }] };
313
- res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  }
315
- }
316
- res.write(`data: [DONE]\n\n`);
317
- res.end();
318
- console.log("Stream finished.");
 
 
 
 
 
 
319
 
320
  } catch (streamError) {
321
- console.error('Error during fal stream processing loop:', streamError);
322
- try {
323
- const errorDetails = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
324
- res.write(`data: ${JSON.stringify({ error: { message: "Stream processing error", type: "proxy_error", details: errorDetails } })}\n\n`);
325
- res.write(`data: [DONE]\n\n`);
326
- res.end();
327
- } catch (finalError) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  console.error('Error sending stream error message to client:', finalError);
 
329
  if (!res.writableEnded) { res.end(); }
330
- }
331
  }
 
332
  } else {
333
- // --- 非流式处理 (保持不变) ---
334
- console.log("Executing non-stream request...");
335
- const result = await fal.subscribe("fal-ai/any-llm", { input: falInput, logs: true });
336
- console.log("Received non-stream result from fal-ai:", JSON.stringify(result, null, 2));
337
-
338
- if (result && result.error) {
339
- console.error("Fal-ai returned an error in non-stream mode:", result.error);
340
- return res.status(500).json({ object: "error", message: `Fal-ai error: ${JSON.stringify(result.error)}`, type: "fal_ai_error", param: null, code: null });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  }
342
-
343
- const openAIResponse = {
344
- id: `chatcmpl-${result.requestId || Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: model,
345
- choices: [{ index: 0, message: { role: "assistant", content: result.output || "" }, finish_reason: "stop" }],
346
- usage: { prompt_tokens: null, completion_tokens: null, total_tokens: null }, system_fingerprint: null,
347
- ...(result.reasoning && { fal_reasoning: result.reasoning }),
348
- };
349
- res.json(openAIResponse);
350
- console.log("Returned non-stream response.");
351
  }
352
 
353
  } catch (error) {
354
- console.error('Unhandled error in /v1/chat/completions:', error);
 
355
  if (!res.headersSent) {
356
  const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
357
- res.status(500).json({ error: 'Internal Server Error in Proxy', details: errorMessage });
358
- } else if (!res.writableEnded) {
359
- console.error("Headers already sent, ending response.");
360
- res.end();
 
361
  }
362
  }
363
  });
364
 
365
- // 启动服务器 (更新启动信息)
366
  app.listen(PORT, () => {
367
- console.log(`===================================================`);
368
- console.log(` Fal OpenAI Proxy Server (System Top + Separator + Recency)`);
369
- console.log(` Listening on port: ${PORT}`);
370
- console.log(` Using Limits: System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
371
- console.log(` Fal AI Key Loaded: ${FAL_KEY ? 'Yes' : 'No'}`);
372
- console.log(` API Key Auth Enabled: ${API_KEY ? 'Yes' : 'No'}`);
373
- console.log(` Chat Completions Endpoint: POST http://localhost:${PORT}/v1/chat/completions`);
374
- console.log(` Models Endpoint: GET http://localhost:${PORT}/v1/models`);
375
- console.log(`===================================================`);
 
 
 
 
 
376
  });
377
 
378
- // 根路径响应 (更新信息)
379
  app.get('/', (req, res) => {
380
- res.send('Fal OpenAI Proxy (System Top + Separator + Recency Strategy) is running.');
381
  });
 
1
  import express from 'express';
2
  import { fal } from '@fal-ai/client';
3
 
4
+ // --- Key Management Setup ---
5
+ // Read comma-separated keys from the SINGLE environment variable FAL_KEY
6
+ const FAL_KEY_STRING = process.env.FAL_KEY;
7
+ const API_KEY = process.env.API_KEY; // Custom API Key for proxy auth
8
 
9
+ if (!FAL_KEY_STRING) {
10
  console.error("Error: FAL_KEY environment variable is not set.");
11
+ console.error("Ensure FAL_KEY contains a comma-separated list of your Fal AI keys.");
12
+ process.exit(1);
13
+ }
14
+
15
+ // Parse the comma-separated keys from FAL_KEY_STRING
16
+ const falKeys = FAL_KEY_STRING.split(',')
17
+ .map(key => key.trim()) // Remove leading/trailing whitespace
18
+ .filter(key => key.length > 0); // Remove any empty strings resulting from extra commas
19
+
20
+ if (falKeys.length === 0) {
21
+ console.error("Error: No valid FAL keys found in the FAL_KEY environment variable after parsing.");
22
+ console.error("Ensure FAL_KEY is a comma-separated list, e.g., 'key1,key2,key3'.");
23
  process.exit(1);
24
  }
25
 
 
28
  process.exit(1);
29
  }
30
 
31
+ let currentKeyIndex = 0;
32
+ const invalidKeys = new Set(); // Keep track of keys that failed
33
+
34
+ console.log(`Loaded ${falKeys.length} Fal AI Key(s) from the FAL_KEY environment variable.`);
35
+
36
+ // Function to get the next valid key in a round-robin fashion
37
+ function getNextValidKey() {
38
+ if (invalidKeys.size >= falKeys.length) {
39
+ console.error("All Fal AI keys are marked as invalid.");
40
+ return null; // No valid keys left
41
+ }
42
+
43
+ const initialIndex = currentKeyIndex;
44
+ let attempts = 0;
45
+ while (attempts < falKeys.length) {
46
+ const keyIndex = currentKeyIndex % falKeys.length;
47
+ const key = falKeys[keyIndex];
48
+
49
+ // Move to the next index for the *next* call
50
+ currentKeyIndex = (keyIndex + 1) % falKeys.length;
51
+
52
+ if (!invalidKeys.has(key)) {
53
+ // Found a valid key
54
+ console.log(`Using Fal Key index: ${keyIndex} (from FAL_KEY list)`);
55
+ return { key, index: keyIndex };
56
+ }
57
+
58
+ attempts++;
59
+ // Continue loop to check the next key
60
+ }
61
+
62
+ // Should not be reached if invalidKeys.size check is correct, but as a safeguard
63
+ console.error("Could not find a valid Fal AI key after checking all.");
64
+ return null;
65
+ }
66
+
67
+ // Function to check if an error is likely related to a bad key
68
+ // NOTE: This is a heuristic. You might need to adjust based on actual errors from Fal AI.
69
+ function isKeyRelatedError(error) {
70
+ const message = error?.message?.toLowerCase() || '';
71
+ const status = error?.status; // Check if the error object has a status code
72
+
73
+ // Check for specific HTTP status codes indicative of auth/permission issues
74
+ if (status === 401 || status === 403) {
75
+ console.warn(`Detected potential key-related error (HTTP Status: ${status}).`);
76
+ return true;
77
+ }
78
+ // Check for common error message patterns
79
+ if (message.includes('invalid api key') ||
80
+ message.includes('authentication failed') ||
81
+ message.includes('permission denied') ||
82
+ message.includes('quota exceeded') || // Include quota errors as key-related for rotation
83
+ message.includes('forbidden') ||
84
+ message.includes('unauthorized')) { // Add 'unauthorized'
85
+ console.warn(`Detected potential key-related error (message: ${message})`);
86
+ return true;
87
+ }
88
+ // Add more specific checks based on observed Fal AI errors if needed
89
+ return false;
90
+ }
91
+ // --- End Key Management Setup ---
92
 
93
  const app = express();
94
  app.use(express.json({ limit: '50mb' }));
 
96
 
97
  const PORT = process.env.PORT || 3000;
98
 
99
+ // API Key 鉴权中间件 (unchanged)
100
  const apiKeyAuth = (req, res, next) => {
101
  const authHeader = req.headers['authorization'];
102
+
103
  if (!authHeader) {
104
  console.warn('Unauthorized: No Authorization header provided');
105
  return res.status(401).json({ error: 'Unauthorized: No API Key provided' });
106
  }
107
+
108
  const authParts = authHeader.split(' ');
109
  if (authParts.length !== 2 || authParts[0].toLowerCase() !== 'bearer') {
110
  console.warn('Unauthorized: Invalid Authorization header format');
111
  return res.status(401).json({ error: 'Unauthorized: Invalid Authorization header format' });
112
  }
113
+
114
  const providedKey = authParts[1];
115
  if (providedKey !== API_KEY) {
116
  console.warn('Unauthorized: Invalid API Key');
117
  return res.status(401).json({ error: 'Unauthorized: Invalid API Key' });
118
  }
119
+
120
  next();
121
  };
122
 
 
123
  app.use(['/v1/models', '/v1/chat/completions'], apiKeyAuth);
124
 
125
+ // === 全局定义限制 === (unchanged)
126
  const PROMPT_LIMIT = 4800;
127
  const SYSTEM_PROMPT_LIMIT = 4800;
128
  // === 限制定义结束 ===
129
 
130
+ // 定义 fal-ai/any-llm 支持的模型列表 (unchanged)
131
  const FAL_SUPPORTED_MODELS = [
132
  "anthropic/claude-3.7-sonnet",
133
  "anthropic/claude-3.5-sonnet",
 
148
  "meta-llama/llama-4-scout"
149
  ];
150
 
151
+ // Helper function getOwner (unchanged)
152
  const getOwner = (modelId) => {
153
  if (modelId && modelId.includes('/')) {
154
  return modelId.split('/')[0];
 
156
  return 'fal-ai';
157
  }
158
 
159
+ // GET /v1/models endpoint (unchanged)
160
  app.get('/v1/models', (req, res) => {
161
  console.log("Received request for GET /v1/models");
162
  try {
163
  const modelsData = FAL_SUPPORTED_MODELS.map(modelId => ({
164
+ id: modelId, object: "model", created: Math.floor(Date.now() / 1000), owned_by: getOwner(modelId) // Use current time for created
165
  }));
166
  res.json({ object: "list", data: modelsData });
167
  console.log("Successfully returned model list.");
 
171
  }
172
  });
173
 
174
+ // convertMessagesToFalPrompt 函数 (unchanged)
175
  function convertMessagesToFalPrompt(messages) {
176
+ // ... (keep existing conversion logic)
177
  let fixed_system_prompt_content = "";
178
  const conversation_message_blocks = [];
179
+ // console.log(`Original messages count: ${messages.length}`); // Less verbose logging
180
 
181
  // 1. 分离 System 消息,格式化 User/Assistant 消息
182
  for (const message of messages) {
 
203
  fixed_system_prompt_content = fixed_system_prompt_content.substring(0, SYSTEM_PROMPT_LIMIT);
204
  console.warn(`Combined system messages truncated from ${originalLength} to ${SYSTEM_PROMPT_LIMIT}`);
205
  }
 
206
  fixed_system_prompt_content = fixed_system_prompt_content.trim();
207
 
 
208
  // 3. 计算 system_prompt 中留给对话历史的剩余空间
 
 
209
  let space_occupied_by_fixed_system = 0;
210
  if (fixed_system_prompt_content.length > 0) {
 
 
211
  space_occupied_by_fixed_system = fixed_system_prompt_content.length + 4; // 预留 \n\n...\n\n 的长度
212
  }
213
  const remaining_system_limit = Math.max(0, SYSTEM_PROMPT_LIMIT - space_occupied_by_fixed_system);
214
+ // console.log(`Trimmed fixed system prompt length: ${fixed_system_prompt_content.length}. Approx remaining system history limit: ${remaining_system_limit}`);
 
215
 
216
  // 4. 反向填充 User/Assistant 对话历史
217
  const prompt_history_blocks = [];
 
221
  let promptFull = false;
222
  let systemHistoryFull = (remaining_system_limit <= 0);
223
 
224
+ // console.log(`Processing ${conversation_message_blocks.length} user/assistant messages for recency filling.`);
225
  for (let i = conversation_message_blocks.length - 1; i >= 0; i--) {
226
  const message_block = conversation_message_blocks[i];
227
  const block_length = message_block.length;
228
 
229
  if (promptFull && systemHistoryFull) {
230
+ // console.log(`Both prompt and system history slots full. Omitting older messages from index ${i}.`);
231
  break;
232
  }
233
 
 
234
  if (!promptFull) {
235
  if (current_prompt_length + block_length <= PROMPT_LIMIT) {
236
  prompt_history_blocks.unshift(message_block);
 
238
  continue;
239
  } else {
240
  promptFull = true;
241
+ // console.log(`Prompt limit (${PROMPT_LIMIT}) reached. Trying system history slot.`);
242
  }
243
  }
244
 
 
245
  if (!systemHistoryFull) {
246
  if (current_system_history_length + block_length <= remaining_system_limit) {
247
  system_prompt_history_blocks.unshift(message_block);
 
249
  continue;
250
  } else {
251
  systemHistoryFull = true;
252
+ // console.log(`System history limit (${remaining_system_limit}) reached.`);
253
  }
254
  }
255
  }
256
 
257
+ // 5. 组合最终的 prompt 和 system_prompt
258
  const system_prompt_history_content = system_prompt_history_blocks.join('').trim();
259
  const final_prompt = prompt_history_blocks.join('').trim();
 
 
260
  const SEPARATOR = "\n\n-------下面是比较早之前的对话内容-----\n\n";
 
261
  let final_system_prompt = "";
 
 
262
  const hasFixedSystem = fixed_system_prompt_content.length > 0;
263
  const hasSystemHistory = system_prompt_history_content.length > 0;
264
 
265
  if (hasFixedSystem && hasSystemHistory) {
 
266
  final_system_prompt = fixed_system_prompt_content + SEPARATOR + system_prompt_history_content;
267
+ // console.log("Combining fixed system prompt and history with separator.");
268
  } else if (hasFixedSystem) {
 
269
  final_system_prompt = fixed_system_prompt_content;
270
+ // console.log("Using only fixed system prompt.");
271
  } else if (hasSystemHistory) {
 
272
  final_system_prompt = system_prompt_history_content;
273
+ // console.log("Using only history in system prompt slot.");
274
  }
 
275
 
 
276
  const result = {
277
+ system_prompt: final_system_prompt,
278
+ prompt: final_prompt
279
  };
280
 
281
+ // console.log(`Final system_prompt length (Sys+Separator+Hist): ${result.system_prompt.length}`);
282
+ // console.log(`Final prompt length (Hist): ${result.prompt.length}`);
283
 
284
  return result;
285
  }
286
  // === convertMessagesToFalPrompt 函数结束 ===
287
 
288
 
289
+ // --- Helper function to make Fal AI request with retries ---
290
+ async function makeFalRequestWithRetry(falInput, stream = false) {
291
+ let attempts = 0;
292
+ const maxAttempts = falKeys.length; // Try each key at most once per request
293
+ const attemptedKeysInThisRequest = new Set(); // Track keys tried for *this* specific request
294
+
295
+ while (attempts < maxAttempts) {
296
+ const keyInfo = getNextValidKey();
297
+
298
+ if (!keyInfo) {
299
+ // This happens if all keys are currently in the invalidKeys set
300
+ throw new Error("No valid Fal AI keys available (all marked as invalid).");
301
+ }
302
+
303
+ // Avoid retrying the *exact same key* within the *same request attempt cycle*
304
+ // This guards against potential infinite loops if getNextValidKey had issues
305
+ if (attemptedKeysInThisRequest.has(keyInfo.key)) {
306
+ console.warn(`Key at index ${keyInfo.index} already attempted for this request cycle. Skipping.`);
307
+ // Don't increment attempts here, as we didn't actually *use* the key.
308
+ // Let the loop continue to find the next *different* valid key.
309
+ // If all keys are invalid, the check at the start of the loop handles it.
310
+ continue;
311
+ }
312
+ attemptedKeysInThisRequest.add(keyInfo.key);
313
+ attempts++; // Count this as a distinct attempt with a key
314
+
315
+ try {
316
+ console.log(`Attempt ${attempts}/${maxAttempts}: Trying Fal Key index ${keyInfo.index}...`);
317
+
318
+ // *** CRITICAL: Reconfigure fal client with the selected key ***
319
+ console.warn("Concurrency Warning: Reconfiguring global fal client. Ensure sufficient instance isolation if under high load.");
320
+ fal.config({ credentials: keyInfo.key });
321
+
322
+ if (stream) {
323
+ // Return the stream directly for the caller to handle
324
+ const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
325
+ console.log(`Successfully initiated stream with key index ${keyInfo.index}.`);
326
+ return falStream; // Success, let the caller handle iteration
327
+ } else {
328
+ // For non-stream, wait for the result here
329
+ console.log(`Executing non-stream request with key index ${keyInfo.index}...`);
330
+ const result = await fal.subscribe("fal-ai/any-llm", { input: falInput, logs: true });
331
+ console.log(`Successfully received non-stream result with key index ${keyInfo.index}.`);
332
+
333
+ // Check for errors *within* the successful response structure
334
+ if (result && result.error) {
335
+ console.error(`Fal-ai returned an error in non-stream result (Key Index ${keyInfo.index}):`, result.error);
336
+ // Treat this like a general Fal error, not necessarily a key error unless message indicates it
337
+ // Convert it to a standard Error object to be caught below
338
+ throw new Error(`Fal-ai error in result: ${JSON.stringify(result.error)}`);
339
+ }
340
+ return result; // Success
341
+ }
342
+ } catch (error) {
343
+ console.error(`Error using Fal Key index ${keyInfo.index}:`, error.message || error);
344
+
345
+ if (isKeyRelatedError(error)) {
346
+ console.warn(`Marking Fal Key index ${keyInfo.index} as invalid due to error.`);
347
+ invalidKeys.add(keyInfo.key);
348
+ // Continue to the next iteration to try another key
349
+ } else {
350
+ // Not identified as a key-related error (e.g., network issue, bad input, internal Fal error)
351
+ // Fail the request immediately, don't retry with other keys for this type of error.
352
+ console.error("Error does not appear to be key-related. Failing request without further retries.");
353
+ throw error; // Re-throw the original error to be caught by the main handler
354
+ }
355
+ }
356
+ }
357
+
358
+ // If the loop finishes, it means all keys were tried and marked invalid *within this request cycle*
359
+ throw new Error(`Request failed after trying ${attempts} unique Fal key(s). All failed with key-related errors or were already marked invalid.`);
360
+ }
361
+
362
+
363
+ // POST /v1/chat/completions endpoint (Modified to use retry logic)
364
  app.post('/v1/chat/completions', async (req, res) => {
365
  const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body;
366
 
367
+ // Basic logging for request entry
368
+ console.log(`--> POST /v1/chat/completions | Model: ${model} | Stream: ${stream}`);
369
 
370
  if (!FAL_SUPPORTED_MODELS.includes(model)) {
371
+ console.warn(`Warning: Requested model '${model}' is not in the explicitly supported list. Proxy will still attempt.`);
372
  }
373
  if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
374
+ console.error("Invalid request: Missing 'model' or 'messages' array.");
375
  return res.status(400).json({ error: 'Missing or invalid parameters: model and messages array are required.' });
376
  }
377
 
378
  try {
379
+ // --- Prepare Input ---
380
  const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
 
381
  const falInput = {
382
  model: model,
383
  prompt: prompt,
384
  ...(system_prompt && { system_prompt: system_prompt }),
385
+ reasoning: !!reasoning, // Ensure boolean
386
  };
387
+ // console.log("Fal Input:", JSON.stringify(falInput, null, 2)); // Verbose logging
388
+ console.log("Attempting Fal request with key rotation/retry...");
389
+
390
+ // --- Handle Stream vs Non-Stream ---
 
 
 
 
 
 
 
 
 
 
391
  if (stream) {
 
392
  res.setHeader('Content-Type', 'text/event-stream; charset=utf-8');
393
  res.setHeader('Cache-Control', 'no-cache');
394
  res.setHeader('Connection', 'keep-alive');
395
+ res.setHeader('Access-Control-Allow-Origin', '*'); // Consider restricting in production
396
  res.flushHeaders();
397
 
398
  let previousOutput = '';
399
+ let falStream;
 
400
 
401
  try {
402
+ // Initiate stream using the retry logic
403
+ falStream = await makeFalRequestWithRetry(falInput, true);
404
+
405
+ // Process the stream events
406
  for await (const event of falStream) {
407
  const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
408
  const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
409
  const errorInfo = (event && event.error) ? event.error : null;
410
 
411
+ if (errorInfo) {
412
+ // Log error from within the stream, but continue processing if possible
413
+ console.error("Error received *within* fal stream event:", errorInfo);
414
+ // Send an error chunk to the client (optional, depends on desired behavior)
415
+ const errorChunk = { id: `chatcmpl-${Date.now()}-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Fal Stream Event Error: ${JSON.stringify(errorInfo)}` } }] };
416
+ // Safety check before writing
417
+ if (!res.writableEnded) {
418
+ res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
419
+ } else {
420
+ console.warn("Stream already ended when trying to write stream event error.");
421
+ }
422
+ // Decide whether to break or continue based on error severity if needed
423
  }
424
 
425
+ // Calculate delta (same logic as before)
426
  let deltaContent = '';
427
  if (currentOutput.startsWith(previousOutput)) {
428
  deltaContent = currentOutput.substring(previousOutput.length);
429
  } else if (currentOutput.length > 0) {
430
+ // console.warn("Fal stream output mismatch. Sending full current output as delta."); // Less verbose
431
+ deltaContent = currentOutput;
432
+ previousOutput = ''; // Reset previous output on mismatch
433
  }
434
  previousOutput = currentOutput;
435
 
436
+ // Send OpenAI compatible chunk
437
+ if (deltaContent || !isPartial) { // Send even if delta is empty when finishing
438
+ const openAIChunk = {
439
+ id: `chatcmpl-${Date.now()}`, // Consider more unique ID if needed
440
+ object: "chat.completion.chunk",
441
+ created: Math.floor(Date.now() / 1000),
442
+ model: model, // Echo back the requested model
443
+ choices: [{
444
+ index: 0,
445
+ delta: { content: deltaContent },
446
+ finish_reason: isPartial === false ? "stop" : null
447
+ }]
448
+ };
449
+ // Safety check before writing
450
+ if (!res.writableEnded) {
451
+ res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
452
+ } else {
453
+ console.warn("Stream already ended when trying to write data chunk.");
454
+ }
455
  }
456
+ } // End for-await loop
457
+
458
+ // Send the final [DONE] marker
459
+ if (!res.writableEnded) {
460
+ res.write(`data: [DONE]\n\n`);
461
+ res.end();
462
+ console.log("<-- Stream finished successfully.");
463
+ } else {
464
+ console.log("<-- Stream finished, but connection was already ended.");
465
+ }
466
 
467
  } catch (streamError) {
468
+ // Catches errors from makeFalRequestWithRetry OR the stream iteration itself
469
+ console.error('Error during stream request processing:', streamError.message || streamError);
470
+ try {
471
+ if (!res.headersSent) {
472
+ // Error likely occurred in makeFalRequestWithRetry before stream started
473
+ res.status(502).json({ // 502 Bad Gateway might be appropriate
474
+ error: 'Failed to initiate Fal stream',
475
+ details: streamError.message || 'Underlying Fal request failed or timed out.'
476
+ });
477
+ console.log("<-- Stream initiation failed response sent.");
478
+ } else if (!res.writableEnded) {
479
+ // Stream started but failed during processing
480
+ const errorDetails = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
481
+ // Send error details in the stream if possible
482
+ res.write(`data: ${JSON.stringify({ error: { message: "Stream processing error after initiation", type: "proxy_error", details: errorDetails } })}\n\n`);
483
+ res.write(`data: [DONE]\n\n`); // Still send DONE after error for client handling
484
+ res.end();
485
+ console.log("<-- Stream error sent, stream ended.");
486
+ } else {
487
+ console.log("<-- Stream error occurred, but connection already ended.");
488
+ }
489
+ } catch (finalError) {
490
  console.error('Error sending stream error message to client:', finalError);
491
+ // Ensure response is ended if possible
492
  if (!res.writableEnded) { res.end(); }
493
+ }
494
  }
495
+
496
  } else {
497
+ // --- Non-Stream ---
498
+ try {
499
+ // Get the result using the retry logic
500
+ const result = await makeFalRequestWithRetry(falInput, false);
501
+ // console.log("Received non-stream result via retry function:", JSON.stringify(result, null, 2)); // Verbose
502
+
503
+ // Construct OpenAI compatible response
504
+ const openAIResponse = {
505
+ id: `chatcmpl-${result.requestId || Date.now()}`,
506
+ object: "chat.completion",
507
+ created: Math.floor(Date.now() / 1000),
508
+ model: model, // Echo back requested model
509
+ choices: [{
510
+ index: 0,
511
+ message: {
512
+ role: "assistant",
513
+ content: result.output || "" // Ensure content is string
514
+ },
515
+ finish_reason: "stop" // Assume stop for non-stream success
516
+ }],
517
+ usage: { // Provide null usage as Fal doesn't return it
518
+ prompt_tokens: null,
519
+ completion_tokens: null,
520
+ total_tokens: null
521
+ },
522
+ system_fingerprint: null, // Fal doesn't provide this
523
+ ...(result.reasoning && { fal_reasoning: result.reasoning }), // Include Fal specific reasoning if present
524
+ };
525
+
526
+ res.json(openAIResponse);
527
+ console.log("<-- Non-stream response sent successfully.");
528
+
529
+ } catch (error) {
530
+ // Catches errors from makeFalRequestWithRetry (e.g., all keys failed or non-key error)
531
+ console.error('Error during non-stream request processing:', error.message || error);
532
+ if (!res.headersSent) {
533
+ const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
534
+ // Check if it was the "all keys failed" error
535
+ const finalMessage = errorMessage.includes("No valid Fal AI keys available") || errorMessage.includes("Request failed after trying")
536
+ ? `Fal request failed after trying all available keys: ${errorMessage}`
537
+ : `Internal Server Error processing Fal request: ${errorMessage}`;
538
+ // Use 502 Bad Gateway if it's likely an upstream (Fal) failure
539
+ res.status(502).json({ error: 'Fal Request Failed', details: finalMessage });
540
+ console.log("<-- Non-stream error response sent.");
541
+ } else {
542
+ // Should be rare for non-stream, but handle just in case
543
+ console.error("Headers already sent for non-stream error? This is unexpected.");
544
+ if (!res.writableEnded) { res.end(); }
545
+ }
546
  }
 
 
 
 
 
 
 
 
 
547
  }
548
 
549
  } catch (error) {
550
+ // Catch errors from parameter validation or prompt conversion *before* calling Fal
551
+ console.error('Unhandled error before initiating Fal request:', error.message || error);
552
  if (!res.headersSent) {
553
  const errorMessage = (error instanceof Error) ? error.message : JSON.stringify(error);
554
+ res.status(500).json({ error: 'Internal Server Error in Proxy Setup', details: errorMessage });
555
+ console.log("<-- Proxy setup error response sent.");
556
+ } else {
557
+ console.error("Headers already sent when catching setup error. Ending response.");
558
+ if (!res.writableEnded) { res.end(); }
559
  }
560
  }
561
  });
562
 
563
+ // 启动服务器 (Updated startup message)
564
  app.listen(PORT, () => {
565
+ console.log(`=====================================================================`);
566
+ console.log(` Fal OpenAI Proxy Server (Multi-Key Rotation & Failover)`);
567
+ console.log(`---------------------------------------------------------------------`);
568
+ console.log(` Listening on port : ${PORT}`);
569
+ console.log(` Reading Fal Keys from : FAL_KEY environment variable (comma-separated)`);
570
+ console.log(` Loaded Keys Count : ${falKeys.length}`);
571
+ console.log(` API Key Auth : ${API_KEY ? 'Enabled (using API_KEY env var)' : 'Disabled'}`);
572
+ console.log(` Input Limits : System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
573
+ console.log(` Concurrency Warning : Global Fal client reconfigured per request.`);
574
+ console.log(`---------------------------------------------------------------------`);
575
+ console.log(` Endpoints:`);
576
+ console.log(` POST http://localhost:${PORT}/v1/chat/completions`);
577
+ console.log(` GET http://localhost:${PORT}/v1/models`);
578
+ console.log(`=====================================================================`);
579
  });
580
 
581
+ // 根路径响应 (Updated message)
582
  app.get('/', (req, res) => {
583
+ res.send(`Fal OpenAI Proxy (Multi-Key Rotation from FAL_KEY) is running. Loaded ${falKeys.length} key(s).`);
584
  });