Wauplin HF Staff commited on
Commit
db8d62b
·
verified ·
1 Parent(s): e121d54

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. src/mcp.ts +1 -14
  2. src/routes/landingPageHtml.ts +27 -0
  3. src/routes/responses.ts +583 -462
  4. src/schemas.ts +11 -2
src/mcp.ts CHANGED
@@ -6,8 +6,6 @@ import { URL } from "url";
6
 
7
  import type { McpServerParams } from "./schemas";
8
  import { McpResultFormatter } from "./lib/McpResultFormatter";
9
- import { generateUniqueId } from "./lib/generateUniqueId";
10
- import type { ResponseOutputItem } from "openai/resources/responses/responses";
11
 
12
  export async function connectMcpServer(mcpServer: McpServerParams): Promise<Client> {
13
  const mcp = new Client({ name: "@huggingface/responses.js", version: packageVersion });
@@ -37,9 +35,8 @@ export async function connectMcpServer(mcpServer: McpServerParams): Promise<Clie
37
  export async function callMcpTool(
38
  mcpServer: McpServerParams,
39
  toolName: string,
40
- server_label: string,
41
  argumentsString: string
42
- ): Promise<ResponseOutputItem> {
43
  try {
44
  const client = await connectMcpServer(mcpServer);
45
  const toolArgs: Record<string, unknown> = argumentsString === "" ? {} : JSON.parse(argumentsString);
@@ -47,22 +44,12 @@ export async function callMcpTool(
47
  const toolResponse = await client.callTool({ name: toolName, arguments: toolArgs });
48
  const formattedResult = McpResultFormatter.format(toolResponse);
49
  return {
50
- type: "mcp_call",
51
- id: generateUniqueId("mcp_call"),
52
- name: toolName,
53
- server_label: server_label,
54
- arguments: argumentsString,
55
  output: formattedResult,
56
  };
57
  } catch (error) {
58
  const errorMessage =
59
  error instanceof Error ? error.message : typeof error === "string" ? error : JSON.stringify(error);
60
  return {
61
- type: "mcp_call",
62
- id: generateUniqueId("mcp_call"),
63
- name: toolName,
64
- server_label: server_label,
65
- arguments: argumentsString,
66
  error: errorMessage,
67
  };
68
  }
 
6
 
7
  import type { McpServerParams } from "./schemas";
8
  import { McpResultFormatter } from "./lib/McpResultFormatter";
 
 
9
 
10
  export async function connectMcpServer(mcpServer: McpServerParams): Promise<Client> {
11
  const mcp = new Client({ name: "@huggingface/responses.js", version: packageVersion });
 
35
  export async function callMcpTool(
36
  mcpServer: McpServerParams,
37
  toolName: string,
 
38
  argumentsString: string
39
+ ): Promise<{ error: string; output?: undefined } | { error?: undefined; output: string }> {
40
  try {
41
  const client = await connectMcpServer(mcpServer);
42
  const toolArgs: Record<string, unknown> = argumentsString === "" ? {} : JSON.parse(argumentsString);
 
44
  const toolResponse = await client.callTool({ name: toolName, arguments: toolArgs });
45
  const formattedResult = McpResultFormatter.format(toolResponse);
46
  return {
 
 
 
 
 
47
  output: formattedResult,
48
  };
49
  } catch (error) {
50
  const errorMessage =
51
  error instanceof Error ? error.message : typeof error === "string" ? error : JSON.stringify(error);
52
  return {
 
 
 
 
 
53
  error: errorMessage,
54
  };
55
  }
src/routes/landingPageHtml.ts CHANGED
@@ -498,6 +498,7 @@ export function getLandingPageHtml(req: Request, res: Response): void {
498
  <button class="examples-tab" type="button">Streaming</button>
499
  <button class="examples-tab" type="button">Function Calling</button>
500
  <button class="examples-tab" type="button">Structured Output</button>
 
501
  </div>
502
  <div class="example-panel active">
503
  <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-python">from openai import OpenAI
@@ -657,6 +658,32 @@ response = client.responses.parse(
657
 
658
  print(response.output_parsed)</code></pre>
659
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
660
  </section>
661
  <footer class="more-info-footer">
662
  <div style="font-weight:600; color:var(--primary-dark); font-size:1.13em; margin-bottom:0.5em;">More Info</div>
 
498
  <button class="examples-tab" type="button">Streaming</button>
499
  <button class="examples-tab" type="button">Function Calling</button>
500
  <button class="examples-tab" type="button">Structured Output</button>
501
+ <button class="examples-tab" type="button">MCP</button>
502
  </div>
503
  <div class="example-panel active">
504
  <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-python">from openai import OpenAI
 
658
 
659
  print(response.output_parsed)</code></pre>
660
  </div>
661
+ <div class="example-panel">
662
+ <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-python">from openai import OpenAI
663
+ import os
664
+
665
+ client = OpenAI(
666
+ base_url="${baseUrl}",
667
+ api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
668
+ )
669
+
670
+ response = client.responses.create(
671
+ model="cerebras@meta-llama/Llama-3.3-70B-Instruct",
672
+ input="how does tiktoken work?",
673
+ tools=[
674
+ {
675
+ "type": "mcp",
676
+ "server_label": "gitmcp",
677
+ "server_url": "https://gitmcp.io/openai/tiktoken",
678
+ "allowed_tools": ["search_tiktoken_documentation", "fetch_tiktoken_documentation"],
679
+ "require_approval": "never",
680
+ },
681
+ ],
682
+ )
683
+
684
+ for output in response.output:
685
+ print(output)</code></pre>
686
+ </div>
687
  </section>
688
  <footer class="more-info-footer">
689
  <div style="font-weight:600; color:var(--primary-dark); font-size:1.13em; margin-bottom:0.5em;">More Info</div>
src/routes/responses.ts CHANGED
@@ -17,10 +17,7 @@ import type {
17
  ResponseFunctionToolCall,
18
  ResponseOutputItem,
19
  } from "openai/resources/responses/responses";
20
- import type {
21
- ChatCompletionInputTool,
22
- ChatCompletionStreamOutputUsage,
23
- } from "@huggingface/tasks/dist/commonjs/tasks/chat-completion/inference.js";
24
  import { callMcpTool, connectMcpServer } from "../mcp.js";
25
 
26
  class StreamingError extends Error {
@@ -30,12 +27,129 @@ class StreamingError extends Error {
30
  }
31
  }
32
 
 
 
 
33
  export const postCreateResponse = async (
34
  req: ValidatedRequest<CreateResponseParams>,
35
  res: ExpressResponse
36
  ): Promise<void> => {
37
- const apiKey = req.headers.authorization?.split(" ")[1];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  if (!apiKey) {
40
  res.status(401).json({
41
  success: false,
@@ -44,11 +158,111 @@ export const postCreateResponse = async (
44
  return;
45
  }
46
 
47
- const client = new InferenceClient(apiKey);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  const messages: ChatCompletionInputMessage[] = req.body.instructions
49
  ? [{ role: "system", content: req.body.instructions }]
50
  : [];
51
-
52
  if (Array.isArray(req.body.input)) {
53
  messages.push(
54
  ...req.body.input
@@ -103,13 +317,20 @@ export const postCreateResponse = async (
103
  .filter((item) => item !== undefined),
104
  };
105
  case "mcp_list_tools": {
106
- // Hacky: will be dropped by filter
107
  return {
108
  role: "assistant",
109
  name: "mcp_list_tools",
110
  content: "",
111
  };
112
  }
 
 
 
 
 
 
 
113
  case "mcp_approval_request": {
114
  return {
115
  role: "assistant",
@@ -132,105 +353,7 @@ export const postCreateResponse = async (
132
  messages.push({ role: "user", content: req.body.input });
133
  }
134
 
135
- const output: ResponseOutputItem[] = [];
136
- let tools: ChatCompletionInputTool[] | undefined = [];
137
- const mcpToolsMapping: Record<string, McpServerParams> = {};
138
- if (req.body.tools) {
139
- await Promise.all(
140
- req.body.tools.map(async (tool) => {
141
- switch (tool.type) {
142
- case "function":
143
- tools?.push({
144
- type: tool.type,
145
- function: {
146
- name: tool.name,
147
- parameters: tool.parameters,
148
- description: tool.description,
149
- strict: tool.strict,
150
- },
151
- });
152
- break;
153
- case "mcp": {
154
- let mcpListTools: ResponseOutputItem.McpListTools | undefined;
155
-
156
- // If MCP list tools is already in the input, use it
157
- if (Array.isArray(req.body.input)) {
158
- for (const item of req.body.input) {
159
- if (item.type === "mcp_list_tools" && item.server_label === tool.server_label) {
160
- mcpListTools = item;
161
- console.debug(`Using MCP list tools from input for server '${tool.server_label}'`);
162
- break;
163
- }
164
- }
165
- }
166
- // Otherwise, list tools from MCP server
167
- if (!mcpListTools) {
168
- try {
169
- const mcp = await connectMcpServer(tool);
170
- console.debug("Listing MCP tools from server");
171
- const mcpTools = await mcp.listTools();
172
- console.debug(`Fetched ${mcpTools.tools.length} tools from MCP server '${tool.server_label}'`);
173
-
174
- // All tools are returned in Response object
175
- mcpListTools = {
176
- id: generateUniqueId("mcp_list_tools"),
177
- type: "mcp_list_tools",
178
- server_label: tool.server_label,
179
- tools: mcpTools.tools.map((mcpTool) => ({
180
- input_schema: mcpTool.inputSchema,
181
- name: mcpTool.name,
182
- annotations: mcpTool.annotations,
183
- description: mcpTool.description,
184
- })),
185
- };
186
- } catch (error) {
187
- console.error("Error listing tools from MCP server", error);
188
- mcpListTools = {
189
- id: generateUniqueId("mcp_list_tools"),
190
- type: "mcp_list_tools",
191
- server_label: tool.server_label,
192
- tools: [],
193
- error: `Failed to list tools from MCP server '${tool.server_label}': ${error instanceof Error ? error.message : "Unknown error"}`,
194
- };
195
- }
196
- output.push(mcpListTools);
197
- }
198
-
199
- // Only allowed tools are forwarded to the LLM
200
- const allowedTools = tool.allowed_tools
201
- ? Array.isArray(tool.allowed_tools)
202
- ? tool.allowed_tools
203
- : tool.allowed_tools.tool_names
204
- : [];
205
- if (mcpListTools?.tools) {
206
- for (const mcpTool of mcpListTools.tools) {
207
- if (allowedTools.length === 0 || allowedTools.includes(mcpTool.name)) {
208
- tools?.push({
209
- type: "function" as const,
210
- function: {
211
- name: mcpTool.name,
212
- parameters: mcpTool.input_schema,
213
- description: mcpTool.description ?? undefined,
214
- },
215
- });
216
- }
217
- mcpToolsMapping[mcpTool.name] = tool;
218
- }
219
- break;
220
- }
221
- }
222
- }
223
- })
224
- );
225
- }
226
-
227
- if (tools.length === 0) {
228
- tools = undefined;
229
- }
230
-
231
- const model = req.body.model.includes("@") ? req.body.model.split("@")[1] : req.body.model;
232
- const provider = req.body.model.includes("@") ? req.body.model.split("@")[0] : undefined;
233
-
234
  const payload: ChatCompletionInput = {
235
  // main params
236
  model: model,
@@ -269,391 +392,389 @@ export const postCreateResponse = async (
269
  top_p: req.body.top_p,
270
  };
271
 
272
- const responseObject: Omit<Response, "incomplete_details" | "output_text" | "parallel_tool_calls"> = {
273
- created_at: Math.floor(new Date().getTime() / 1000),
274
- error: null,
275
- id: generateUniqueId("resp"),
276
- instructions: req.body.instructions,
277
- max_output_tokens: req.body.max_output_tokens,
278
- metadata: req.body.metadata,
279
- model: req.body.model,
280
- object: "response",
281
- output,
282
- // parallel_tool_calls: req.body.parallel_tool_calls,
283
- status: "in_progress",
284
- text: req.body.text,
285
- tool_choice: req.body.tool_choice ?? "auto",
286
- tools: req.body.tools ?? [],
287
- temperature: req.body.temperature,
288
- top_p: req.body.top_p,
289
- usage: {
290
- input_tokens: 0,
291
- input_tokens_details: { cached_tokens: 0 },
292
- output_tokens: 0,
293
- output_tokens_details: { reasoning_tokens: 0 },
294
- total_tokens: 0,
295
- },
296
- };
297
 
298
- // MCP approval requests => do not call LLM at all
299
- if (Array.isArray(req.body.input)) {
300
- for (const item of req.body.input) {
301
- // Note: currently supporting only 1 mcp_approval_response per request
302
- if (item.type === "mcp_approval_response" && item.approve) {
303
- const approvalRequest = req.body.input.find(
304
- (i) => i.type === "mcp_approval_request" && i.id === item.approval_request_id
305
- ) as McpApprovalRequestParams | undefined;
306
- console.log("approvalRequest", approvalRequest);
307
- if (approvalRequest) {
308
- const toolParams = mcpToolsMapping[approvalRequest.name];
309
- responseObject.output.push(
310
- await callMcpTool(toolParams, approvalRequest.name, toolParams.server_label, approvalRequest.arguments)
311
- );
312
- responseObject.status = "completed";
313
- res.json(responseObject);
314
- return;
315
- } else {
316
- responseObject.status = "failed";
317
- const errorMessage = `MCP approval response for approval request '${item.approval_request_id}' not found`;
318
- console.error(errorMessage);
319
- responseObject.error = {
320
- code: "server_error",
321
- message: errorMessage,
322
- };
323
- res.json(responseObject);
324
- return;
325
- }
326
- }
327
- }
328
  }
 
329
 
330
- // Streaming mode
331
- if (req.body.stream) {
332
- res.setHeader("Content-Type", "text/event-stream");
333
- res.setHeader("Connection", "keep-alive");
334
- let sequenceNumber = 0;
 
 
 
335
 
336
- // Emit events in sequence
337
- const emitEvent = (event: ResponseStreamEvent) => {
338
- res.write(`data: ${JSON.stringify(event)}\n\n`);
 
 
 
339
  };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
 
341
- try {
342
- // Response created event
343
- emitEvent({
344
- type: "response.created",
345
- response: responseObject as Response,
346
- sequence_number: sequenceNumber++,
347
- });
348
-
349
- // Response in progress event
350
- emitEvent({
351
- type: "response.in_progress",
352
- response: responseObject as Response,
353
- sequence_number: sequenceNumber++,
354
- });
355
-
356
- const stream = client.chatCompletionStream(payload);
357
- let usage: ChatCompletionStreamOutputUsage | undefined;
358
-
359
- for await (const chunk of stream) {
360
- if (chunk.usage) {
361
- usage = chunk.usage;
362
- }
363
-
364
- if (chunk.choices[0].delta.content) {
365
- if (responseObject.output.length === 0) {
366
- const outputObject: ResponseOutputMessage = {
367
- id: generateUniqueId("msg"),
368
- type: "message",
369
- role: "assistant",
370
- status: "in_progress",
371
- content: [],
372
- };
373
- responseObject.output = [outputObject];
374
-
375
- // Response output item added event
376
- emitEvent({
377
- type: "response.output_item.added",
378
- output_index: 0,
379
- item: outputObject,
380
- sequence_number: sequenceNumber++,
381
- });
382
- }
383
-
384
- const outputObject = responseObject.output.at(-1);
385
- if (!outputObject || outputObject.type !== "message") {
386
- throw new StreamingError("Not implemented: only single output item type is supported in streaming mode.");
387
- }
388
-
389
- if (outputObject.content.length === 0) {
390
- // Response content part added event
391
- const contentPart: ResponseContentPartAddedEvent["part"] = {
392
- type: "output_text",
393
- text: "",
394
- annotations: [],
395
- };
396
- outputObject.content.push(contentPart);
397
-
398
- emitEvent({
399
- type: "response.content_part.added",
400
- item_id: outputObject.id,
401
- output_index: 0,
402
- content_index: 0,
403
- part: contentPart,
404
- sequence_number: sequenceNumber++,
405
- });
406
- }
407
 
408
- const contentPart = outputObject.content.at(-1);
409
- if (!contentPart || contentPart.type !== "output_text") {
410
- throw new StreamingError("Not implemented: only output_text is supported in streaming mode.");
411
- }
412
 
413
- // Add text delta
414
- contentPart.text += chunk.choices[0].delta.content;
415
- emitEvent({
416
- type: "response.output_text.delta",
417
- item_id: outputObject.id,
418
- output_index: 0,
419
- content_index: 0,
420
- delta: chunk.choices[0].delta.content,
421
- sequence_number: sequenceNumber++,
422
- });
423
- } else if (chunk.choices[0].delta.tool_calls && chunk.choices[0].delta.tool_calls.length > 0) {
424
- if (chunk.choices[0].delta.tool_calls.length > 1) {
425
- throw new StreamingError("Not implemented: only single tool call is supported in streaming mode.");
426
- }
 
 
 
 
 
427
 
428
- if (responseObject.output.length === 0) {
429
- if (!chunk.choices[0].delta.tool_calls[0].function.name) {
430
- throw new StreamingError("Tool call function name is required.");
431
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
 
433
- const outputObject: ResponseFunctionToolCall = {
434
- type: "function_call",
435
- id: generateUniqueId("fc"),
436
- call_id: chunk.choices[0].delta.tool_calls[0].id,
437
- name: chunk.choices[0].delta.tool_calls[0].function.name,
438
- arguments: "",
439
- };
440
- responseObject.output = [outputObject];
441
-
442
- // Response output item added event
443
- emitEvent({
444
- type: "response.output_item.added",
445
- output_index: 0,
446
- item: outputObject,
447
- sequence_number: sequenceNumber++,
448
- });
449
- }
450
 
451
- const outputObject = responseObject.output.at(-1);
452
- if (!outputObject || !outputObject.id || outputObject.type !== "function_call") {
453
- throw new StreamingError("Not implemented: can only support single output item type in streaming mode.");
454
- }
 
 
 
 
 
 
 
 
 
 
455
 
456
- outputObject.arguments += chunk.choices[0].delta.tool_calls[0].function.arguments;
457
- emitEvent({
458
- type: "response.function_call_arguments.delta",
459
- item_id: outputObject.id,
460
- output_index: 0,
461
- delta: chunk.choices[0].delta.tool_calls[0].function.arguments,
462
- sequence_number: sequenceNumber++,
463
- });
464
  }
465
- }
466
 
467
- const lastOutputItem = responseObject.output.at(-1);
468
-
469
- if (lastOutputItem) {
470
- if (lastOutputItem?.type === "message") {
471
- const contentPart = lastOutputItem.content.at(-1);
472
- if (contentPart?.type === "output_text") {
473
- emitEvent({
474
- type: "response.output_text.done",
475
- item_id: lastOutputItem.id,
476
- output_index: responseObject.output.length - 1,
477
- content_index: lastOutputItem.content.length - 1,
478
- text: contentPart.text,
479
- sequence_number: sequenceNumber++,
480
- });
481
-
482
- emitEvent({
483
- type: "response.content_part.done",
484
- item_id: lastOutputItem.id,
485
- output_index: responseObject.output.length - 1,
486
- content_index: lastOutputItem.content.length - 1,
487
- part: contentPart,
488
- sequence_number: sequenceNumber++,
489
- });
490
- } else {
491
- throw new StreamingError("Not implemented: only output_text is supported in streaming mode.");
492
- }
493
 
494
- // Response output item done event
495
- lastOutputItem.status = "completed";
496
- emitEvent({
497
- type: "response.output_item.done",
498
- output_index: responseObject.output.length - 1,
499
- item: lastOutputItem,
500
- sequence_number: sequenceNumber++,
501
- });
502
- } else if (lastOutputItem?.type === "function_call") {
503
- if (!lastOutputItem.id) {
504
- throw new StreamingError("Function call id is required.");
505
- }
506
 
507
- emitEvent({
508
- type: "response.function_call_arguments.done",
509
- item_id: lastOutputItem.id,
510
- output_index: responseObject.output.length - 1,
511
- arguments: lastOutputItem.arguments,
512
- sequence_number: sequenceNumber++,
513
- });
 
 
 
 
 
 
 
 
514
 
515
- lastOutputItem.status = "completed";
516
- emitEvent({
517
- type: "response.output_item.done",
518
- output_index: responseObject.output.length - 1,
519
- item: lastOutputItem,
520
- sequence_number: sequenceNumber++,
521
- });
522
- } else {
523
- throw new StreamingError("Not implemented: only message output is supported in streaming mode.");
524
- }
525
- }
 
 
526
 
527
- // Response completed event
528
- responseObject.status = "completed";
529
- if (usage) {
530
- responseObject.usage = {
531
- input_tokens: usage.prompt_tokens,
532
- input_tokens_details: { cached_tokens: 0 },
533
- output_tokens: usage.completion_tokens,
534
- output_tokens_details: { reasoning_tokens: 0 },
535
- total_tokens: usage.total_tokens,
536
  };
 
 
537
  }
538
- emitEvent({
539
- type: "response.completed",
540
- response: responseObject as Response,
541
- sequence_number: sequenceNumber++,
542
- });
543
- } catch (streamError) {
544
- console.error("Error in streaming chat completion:", streamError);
545
-
546
- let message = "An error occurred while streaming from inference server.";
547
- if (streamError instanceof StreamingError) {
548
- message = streamError.message;
549
- } else if (
550
- typeof streamError === "object" &&
551
- streamError &&
552
- "message" in streamError &&
553
- typeof streamError.message === "string"
554
- ) {
555
- message = streamError.message;
556
- }
557
- responseObject.status = "failed";
558
- responseObject.error = {
559
- code: "server_error",
560
- message,
 
561
  };
562
- emitEvent({
563
- type: "response.failed",
564
- response: responseObject as Response,
565
- sequence_number: sequenceNumber++,
566
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
567
  }
568
- res.end();
569
- return;
570
  }
 
571
 
572
- try {
573
- const chatCompletionResponse = await client.chatCompletion(payload);
 
 
 
 
 
 
 
 
 
 
574
 
575
- responseObject.status = "completed";
576
- for (const choice of chatCompletionResponse.choices) {
577
- if (choice.message.content) {
578
- responseObject.output.push({
579
- id: generateUniqueId("msg"),
580
- type: "message",
581
- role: "assistant",
582
- status: "completed",
583
- content: [
584
- {
585
- type: "output_text",
586
- text: choice.message.content,
587
- annotations: [],
588
- },
589
- ],
590
- });
591
- }
592
- if (choice.message.tool_calls) {
593
- for (const toolCall of choice.message.tool_calls) {
594
- if (toolCall.function.name in mcpToolsMapping) {
595
- const toolParams = mcpToolsMapping[toolCall.function.name];
596
-
597
- // Check if approval is required
598
- const approvalRequired =
599
- toolParams.require_approval === "always"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
600
  ? true
601
- : toolParams.require_approval === "never"
602
  ? false
603
- : toolParams.require_approval.always?.tool_names?.includes(toolCall.function.name)
604
- ? true
605
- : toolParams.require_approval.never?.tool_names?.includes(toolCall.function.name)
606
- ? false
607
- : true; // behavior is undefined in specs, let's default to
608
-
609
- if (approvalRequired) {
610
- // TODO: Implement approval logic
611
- console.log(`Requesting approval for MCP tool '${toolCall.function.name}'`);
612
- responseObject.output.push({
613
- type: "mcp_approval_request",
614
- id: generateUniqueId("mcp_approval_request"),
615
- name: toolCall.function.name,
616
- server_label: toolParams.server_label,
617
- arguments: toolCall.function.arguments,
618
- });
619
- } else {
620
- responseObject.output.push(
621
- await callMcpTool(
622
- toolParams,
623
- toolCall.function.name,
624
- toolParams.server_label,
625
- toolCall.function.arguments
626
- )
627
- );
628
- }
 
 
 
 
 
 
 
629
  } else {
630
- responseObject.output.push({
631
- type: "function_call",
632
- id: generateUniqueId("fc"),
633
- call_id: toolCall.id,
634
- name: toolCall.function.name,
635
- arguments: toolCall.function.arguments,
636
- status: "completed",
637
- });
638
  }
639
  }
640
  }
641
  }
642
-
643
- responseObject.usage = {
644
- input_tokens: chatCompletionResponse.usage.prompt_tokens,
645
- input_tokens_details: { cached_tokens: 0 },
646
- output_tokens: chatCompletionResponse.usage.completion_tokens,
647
- output_tokens_details: { reasoning_tokens: 0 },
648
- total_tokens: chatCompletionResponse.usage.total_tokens,
649
- };
650
-
651
- res.json(responseObject);
652
- } catch (error) {
653
- console.error(error);
654
- res.status(500).json({
655
- success: false,
656
- error: error instanceof Error ? error.message : "Unknown error",
657
- });
658
  }
659
- };
 
17
  ResponseFunctionToolCall,
18
  ResponseOutputItem,
19
  } from "openai/resources/responses/responses";
20
+ import type { ChatCompletionInputTool } from "@huggingface/tasks/dist/commonjs/tasks/chat-completion/inference.js";
 
 
 
21
  import { callMcpTool, connectMcpServer } from "../mcp.js";
22
 
23
  class StreamingError extends Error {
 
27
  }
28
  }
29
 
30
+ type IncompleteResponse = Omit<Response, "incomplete_details" | "output_text" | "parallel_tool_calls">;
31
+ const SEQUENCE_NUMBER_PLACEHOLDER = -1;
32
+
33
  export const postCreateResponse = async (
34
  req: ValidatedRequest<CreateResponseParams>,
35
  res: ExpressResponse
36
  ): Promise<void> => {
37
+ // To avoid duplicated code, we run all requests as stream.
38
+ const events = runCreateResponseStream(req, res);
39
+
40
+ // Then we return in the correct format depending on the user 'stream' flag.
41
+ if (req.body.stream) {
42
+ res.setHeader("Content-Type", "text/event-stream");
43
+ res.setHeader("Connection", "keep-alive");
44
+ console.debug("Stream request");
45
+ for await (const event of events) {
46
+ console.debug(`Event #${event.sequence_number}: ${event.type}`);
47
+ res.write(`data: ${JSON.stringify(event)}\n\n`);
48
+ }
49
+ res.end();
50
+ } else {
51
+ console.debug("Non-stream request");
52
+ for await (const event of events) {
53
+ if (event.type === "response.completed" || event.type === "response.failed") {
54
+ console.debug(event.type);
55
+ res.json(event.response);
56
+ }
57
+ }
58
+ }
59
+ };
60
 
61
+ /*
62
+ * Top-level stream.
63
+ *
64
+ * Handles response lifecycle + execute inner logic (MCP list tools, MCP tool calls, LLM call, etc.).
65
+ * Handles sequenceNumber by overwriting it in the events.
66
+ */
67
+ async function* runCreateResponseStream(
68
+ req: ValidatedRequest<CreateResponseParams>,
69
+ res: ExpressResponse
70
+ ): AsyncGenerator<ResponseStreamEvent> {
71
+ let sequenceNumber = 0;
72
+ // Prepare response object that will be iteratively populated
73
+ const responseObject: IncompleteResponse = {
74
+ created_at: Math.floor(new Date().getTime() / 1000),
75
+ error: null,
76
+ id: generateUniqueId("resp"),
77
+ instructions: req.body.instructions,
78
+ max_output_tokens: req.body.max_output_tokens,
79
+ metadata: req.body.metadata,
80
+ model: req.body.model,
81
+ object: "response",
82
+ output: [],
83
+ // parallel_tool_calls: req.body.parallel_tool_calls,
84
+ status: "in_progress",
85
+ text: req.body.text,
86
+ tool_choice: req.body.tool_choice ?? "auto",
87
+ tools: req.body.tools ?? [],
88
+ temperature: req.body.temperature,
89
+ top_p: req.body.top_p,
90
+ usage: {
91
+ input_tokens: 0,
92
+ input_tokens_details: { cached_tokens: 0 },
93
+ output_tokens: 0,
94
+ output_tokens_details: { reasoning_tokens: 0 },
95
+ total_tokens: 0,
96
+ },
97
+ };
98
+
99
+ // Response created event
100
+ yield {
101
+ type: "response.created",
102
+ response: responseObject as Response,
103
+ sequence_number: sequenceNumber++,
104
+ };
105
+
106
+ // Response in progress event
107
+ yield {
108
+ type: "response.in_progress",
109
+ response: responseObject as Response,
110
+ sequence_number: sequenceNumber++,
111
+ };
112
+
113
+ // Any events (LLM call, MCP call, list tools, etc.)
114
+ try {
115
+ for await (const event of innerRunStream(req, res, responseObject)) {
116
+ yield { ...event, sequence_number: sequenceNumber++ };
117
+ }
118
+ } catch (error) {
119
+ // Error event => stop
120
+ console.error("Error in stream:", error);
121
+ const message =
122
+ typeof error === "object" && error && "message" in error && typeof error.message === "string"
123
+ ? error.message
124
+ : "An error occurred in stream";
125
+ responseObject.status = "failed";
126
+ responseObject.error = {
127
+ code: "server_error",
128
+ message,
129
+ };
130
+ yield {
131
+ type: "response.failed",
132
+ response: responseObject as Response,
133
+ sequence_number: sequenceNumber++,
134
+ };
135
+ return;
136
+ }
137
+
138
+ // Response completed event
139
+ yield {
140
+ type: "response.completed",
141
+ response: responseObject as Response,
142
+ sequence_number: sequenceNumber++,
143
+ };
144
+ }
145
+
146
+ async function* innerRunStream(
147
+ req: ValidatedRequest<CreateResponseParams>,
148
+ res: ExpressResponse,
149
+ responseObject: IncompleteResponse
150
+ ): AsyncGenerator<ResponseStreamEvent> {
151
+ // Retrieve API key from headers
152
+ const apiKey = req.headers.authorization?.split(" ")[1];
153
  if (!apiKey) {
154
  res.status(401).json({
155
  success: false,
 
158
  return;
159
  }
160
 
161
+ // List MCP tools from server (if required) + prepare tools for the LLM
162
+ let tools: ChatCompletionInputTool[] | undefined = [];
163
+ const mcpToolsMapping: Record<string, McpServerParams> = {};
164
+ if (req.body.tools) {
165
+ for (const tool of req.body.tools) {
166
+ switch (tool.type) {
167
+ case "function":
168
+ tools?.push({
169
+ type: tool.type,
170
+ function: {
171
+ name: tool.name,
172
+ parameters: tool.parameters,
173
+ description: tool.description,
174
+ strict: tool.strict,
175
+ },
176
+ });
177
+ break;
178
+ case "mcp": {
179
+ let mcpListTools: ResponseOutputItem.McpListTools | undefined;
180
+
181
+ // If MCP list tools is already in the input, use it
182
+ if (Array.isArray(req.body.input)) {
183
+ for (const item of req.body.input) {
184
+ if (item.type === "mcp_list_tools" && item.server_label === tool.server_label) {
185
+ mcpListTools = item;
186
+ console.debug(`Using MCP list tools from input for server '${tool.server_label}'`);
187
+ break;
188
+ }
189
+ }
190
+ }
191
+ // Otherwise, list tools from MCP server
192
+ if (!mcpListTools) {
193
+ for await (const event of listMcpToolsStream(tool, responseObject)) {
194
+ yield event;
195
+ }
196
+ mcpListTools = responseObject.output.at(-1) as ResponseOutputItem.McpListTools;
197
+ }
198
+
199
+ // Only allowed tools are forwarded to the LLM
200
+ const allowedTools = tool.allowed_tools
201
+ ? Array.isArray(tool.allowed_tools)
202
+ ? tool.allowed_tools
203
+ : tool.allowed_tools.tool_names
204
+ : [];
205
+ if (mcpListTools?.tools) {
206
+ for (const mcpTool of mcpListTools.tools) {
207
+ if (allowedTools.length === 0 || allowedTools.includes(mcpTool.name)) {
208
+ tools?.push({
209
+ type: "function" as const,
210
+ function: {
211
+ name: mcpTool.name,
212
+ parameters: mcpTool.input_schema,
213
+ description: mcpTool.description ?? undefined,
214
+ },
215
+ });
216
+ }
217
+ mcpToolsMapping[mcpTool.name] = tool;
218
+ }
219
+ break;
220
+ }
221
+ }
222
+ }
223
+ }
224
+ }
225
+ if (tools.length === 0) {
226
+ tools = undefined;
227
+ }
228
+
229
+ // If MCP approval requests => execute them and return (no LLM call)
230
+ if (Array.isArray(req.body.input)) {
231
+ for (const item of req.body.input) {
232
+ // Note: currently supporting only 1 mcp_approval_response per request
233
+ let shouldStop = false;
234
+ if (item.type === "mcp_approval_response" && item.approve) {
235
+ const approvalRequest = req.body.input.find(
236
+ (i) => i.type === "mcp_approval_request" && i.id === item.approval_request_id
237
+ ) as McpApprovalRequestParams | undefined;
238
+ for await (const event of callApprovedMCPToolStream(
239
+ item.approval_request_id,
240
+ approvalRequest,
241
+ mcpToolsMapping,
242
+ responseObject
243
+ )) {
244
+ yield event;
245
+ }
246
+ shouldStop = true;
247
+ }
248
+ if (shouldStop) {
249
+ // stop if at least one approval request is processed
250
+ break;
251
+ }
252
+ }
253
+ }
254
+
255
+ // At this point, we have all tools and we know we want to call the LLM
256
+ // Let's prepare the payload and make the call!
257
+
258
+ // Resolve model and provider
259
+ const model = req.body.model.includes("@") ? req.body.model.split("@")[1] : req.body.model;
260
+ const provider = req.body.model.includes("@") ? req.body.model.split("@")[0] : undefined;
261
+
262
+ // Format input to Chat Completion format
263
  const messages: ChatCompletionInputMessage[] = req.body.instructions
264
  ? [{ role: "system", content: req.body.instructions }]
265
  : [];
 
266
  if (Array.isArray(req.body.input)) {
267
  messages.push(
268
  ...req.body.input
 
317
  .filter((item) => item !== undefined),
318
  };
319
  case "mcp_list_tools": {
320
+ // Hacky: will be dropped by filter since tools are passed as separate objects
321
  return {
322
  role: "assistant",
323
  name: "mcp_list_tools",
324
  content: "",
325
  };
326
  }
327
+ case "mcp_call": {
328
+ return {
329
+ role: "assistant",
330
+ name: "mcp_call",
331
+ content: `MCP call (${item.id}). Server: '${item.server_label}'. Tool: '${item.name}'. Arguments: '${item.arguments}'.`,
332
+ };
333
+ }
334
  case "mcp_approval_request": {
335
  return {
336
  role: "assistant",
 
353
  messages.push({ role: "user", content: req.body.input });
354
  }
355
 
356
+ // Prepare payload for the LLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  const payload: ChatCompletionInput = {
358
  // main params
359
  model: model,
 
392
  top_p: req.body.top_p,
393
  };
394
 
395
+ // Call LLM
396
+ for await (const event of callLLMStream(apiKey, payload, responseObject, mcpToolsMapping)) {
397
+ yield event;
398
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
 
400
+ // Handle MCP tool calls if any
401
+ for await (const event of handleMCPToolCallsAfterLLM(responseObject, mcpToolsMapping)) {
402
+ yield event;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  }
404
+ }
405
 
406
+ async function* listMcpToolsStream(
407
+ tool: McpServerParams,
408
+ responseObject: IncompleteResponse
409
+ ): AsyncGenerator<ResponseStreamEvent> {
410
+ yield {
411
+ type: "response.mcp_list_tools.in_progress",
412
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
413
+ };
414
 
415
+ try {
416
+ const mcp = await connectMcpServer(tool);
417
+ const mcpTools = await mcp.listTools();
418
+ yield {
419
+ type: "response.mcp_list_tools.completed",
420
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
421
  };
422
+ responseObject.output.push({
423
+ id: generateUniqueId("mcp_list_tools"),
424
+ type: "mcp_list_tools",
425
+ server_label: tool.server_label,
426
+ tools: mcpTools.tools.map((mcpTool) => ({
427
+ input_schema: mcpTool.inputSchema,
428
+ name: mcpTool.name,
429
+ annotations: mcpTool.annotations,
430
+ description: mcpTool.description,
431
+ })),
432
+ });
433
+ } catch (error) {
434
+ const errorMessage = `Failed to list tools from MCP server '${tool.server_label}': ${error instanceof Error ? error.message : "Unknown error"}`;
435
+ console.error(errorMessage);
436
+ yield {
437
+ type: "response.mcp_list_tools.failed",
438
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
439
+ };
440
+ throw new Error(errorMessage);
441
+ }
442
+ }
443
 
444
+ /*
445
+ * Call LLM and stream the response.
446
+ */
447
+ async function* callLLMStream(
448
+ apiKey: string | undefined,
449
+ payload: ChatCompletionInput,
450
+ responseObject: IncompleteResponse,
451
+ mcpToolsMapping: Record<string, McpServerParams>
452
+ ): AsyncGenerator<ResponseStreamEvent> {
453
+ const stream = new InferenceClient(apiKey).chatCompletionStream(payload);
454
+
455
+ for await (const chunk of stream) {
456
+ if (chunk.usage) {
457
+ // Overwrite usage with the latest chunk's usage
458
+ responseObject.usage = {
459
+ input_tokens: chunk.usage.prompt_tokens,
460
+ input_tokens_details: { cached_tokens: 0 },
461
+ output_tokens: chunk.usage.completion_tokens,
462
+ output_tokens_details: { reasoning_tokens: 0 },
463
+ total_tokens: chunk.usage.total_tokens,
464
+ };
465
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
 
467
+ const delta = chunk.choices[0].delta;
468
+ if (delta.content) {
469
+ let currentOutputItem = responseObject.output.at(-1);
 
470
 
471
+ // If start of a new message, create it
472
+ if (currentOutputItem?.type !== "message" || currentOutputItem?.status !== "in_progress") {
473
+ const outputObject: ResponseOutputMessage = {
474
+ id: generateUniqueId("msg"),
475
+ type: "message",
476
+ role: "assistant",
477
+ status: "in_progress",
478
+ content: [],
479
+ };
480
+ responseObject.output.push(outputObject);
481
+
482
+ // Response output item added event
483
+ yield {
484
+ type: "response.output_item.added",
485
+ output_index: 0,
486
+ item: outputObject,
487
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
488
+ };
489
+ }
490
 
491
+ // If start of a new content part, create it
492
+ currentOutputItem = responseObject.output.at(-1) as ResponseOutputMessage;
493
+ if (currentOutputItem.content.length === 0) {
494
+ // Response content part added event
495
+ const contentPart: ResponseContentPartAddedEvent["part"] = {
496
+ type: "output_text",
497
+ text: "",
498
+ annotations: [],
499
+ };
500
+ currentOutputItem.content.push(contentPart);
501
+
502
+ yield {
503
+ type: "response.content_part.added",
504
+ item_id: currentOutputItem.id,
505
+ output_index: responseObject.output.length - 1,
506
+ content_index: currentOutputItem.content.length - 1,
507
+ part: contentPart,
508
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
509
+ };
510
+ }
511
 
512
+ const contentPart = currentOutputItem.content.at(-1);
513
+ if (!contentPart || contentPart.type !== "output_text") {
514
+ throw new StreamingError(
515
+ `Not implemented: only output_text is supported in response.output[].content[].type. Got ${contentPart?.type}`
516
+ );
517
+ }
 
 
 
 
 
 
 
 
 
 
 
518
 
519
+ // Add text delta
520
+ contentPart.text += delta.content;
521
+ yield {
522
+ type: "response.output_text.delta",
523
+ item_id: currentOutputItem.id,
524
+ output_index: responseObject.output.length - 1,
525
+ content_index: currentOutputItem.content.length - 1,
526
+ delta: delta.content,
527
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
528
+ };
529
+ } else if (delta.tool_calls && delta.tool_calls.length > 0) {
530
+ if (delta.tool_calls.length > 1) {
531
+ throw new StreamingError("Not implemented: multiple tool calls are not supported.");
532
+ }
533
 
534
+ let currentOutputItem = responseObject.output.at(-1);
535
+ if (currentOutputItem?.type !== "mcp_call" && currentOutputItem?.type !== "function_call") {
536
+ if (!delta.tool_calls[0].function.name) {
537
+ throw new StreamingError("Tool call function name is required when starting a new tool call.");
 
 
 
 
538
  }
 
539
 
540
+ const newOutputObject: ResponseOutputItem.McpCall | ResponseFunctionToolCall =
541
+ delta.tool_calls[0].function.name in mcpToolsMapping
542
+ ? {
543
+ type: "mcp_call",
544
+ id: generateUniqueId("mcp_call"),
545
+ name: delta.tool_calls[0].function.name,
546
+ server_label: mcpToolsMapping[delta.tool_calls[0].function.name].server_label,
547
+ arguments: "",
548
+ }
549
+ : {
550
+ type: "function_call",
551
+ id: generateUniqueId("fc"),
552
+ call_id: delta.tool_calls[0].id,
553
+ name: delta.tool_calls[0].function.name,
554
+ arguments: "",
555
+ };
 
 
 
 
 
 
 
 
 
 
556
 
557
+ // Response output item added event
558
+ responseObject.output.push(newOutputObject);
559
+ yield {
560
+ type: "response.output_item.added",
561
+ output_index: responseObject.output.length - 1,
562
+ item: newOutputObject,
563
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
564
+ };
565
+ }
 
 
 
566
 
567
+ // Current item is necessarily a tool call
568
+ currentOutputItem = responseObject.output.at(-1) as ResponseOutputItem.McpCall | ResponseFunctionToolCall;
569
+ currentOutputItem.arguments += delta.tool_calls[0].function.arguments;
570
+ yield {
571
+ type:
572
+ currentOutputItem.type === "mcp_call"
573
+ ? "response.mcp_call.arguments_delta"
574
+ : "response.function_call_arguments.delta",
575
+ item_id: currentOutputItem.id as string,
576
+ output_index: responseObject.output.length - 1,
577
+ delta: delta.tool_calls[0].function.arguments,
578
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
579
+ };
580
+ }
581
+ }
582
 
583
+ const lastOutputItem = responseObject.output.at(-1);
584
+ if (lastOutputItem) {
585
+ if (lastOutputItem?.type === "message") {
586
+ const contentPart = lastOutputItem.content.at(-1);
587
+ if (contentPart?.type === "output_text") {
588
+ yield {
589
+ type: "response.output_text.done",
590
+ item_id: lastOutputItem.id,
591
+ output_index: responseObject.output.length - 1,
592
+ content_index: lastOutputItem.content.length - 1,
593
+ text: contentPart.text,
594
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
595
+ };
596
 
597
+ yield {
598
+ type: "response.content_part.done",
599
+ item_id: lastOutputItem.id,
600
+ output_index: responseObject.output.length - 1,
601
+ content_index: lastOutputItem.content.length - 1,
602
+ part: contentPart,
603
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
 
 
604
  };
605
+ } else {
606
+ throw new StreamingError("Not implemented: only output_text is supported in streaming mode.");
607
  }
608
+
609
+ // Response output item done event
610
+ lastOutputItem.status = "completed";
611
+ yield {
612
+ type: "response.output_item.done",
613
+ output_index: responseObject.output.length - 1,
614
+ item: lastOutputItem,
615
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
616
+ };
617
+ } else if (lastOutputItem?.type === "function_call") {
618
+ yield {
619
+ type: "response.function_call_arguments.done",
620
+ item_id: lastOutputItem.id as string,
621
+ output_index: responseObject.output.length - 1,
622
+ arguments: lastOutputItem.arguments,
623
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
624
+ };
625
+
626
+ lastOutputItem.status = "completed";
627
+ yield {
628
+ type: "response.output_item.done",
629
+ output_index: responseObject.output.length - 1,
630
+ item: lastOutputItem,
631
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
632
  };
633
+ } else if (lastOutputItem?.type === "mcp_call") {
634
+ yield {
635
+ type: "response.mcp_call.arguments_done",
636
+ item_id: lastOutputItem.id as string,
637
+ output_index: responseObject.output.length - 1,
638
+ arguments: lastOutputItem.arguments,
639
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
640
+ };
641
+ yield {
642
+ type: "response.output_item.done",
643
+ output_index: responseObject.output.length - 1,
644
+ item: lastOutputItem,
645
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
646
+ };
647
+ } else {
648
+ throw new StreamingError(
649
+ `Not implemented: expected message, function_call, or mcp_call, got ${lastOutputItem?.type}`
650
+ );
651
  }
 
 
652
  }
653
+ }
654
 
655
+ /*
656
+ * Perform an approved MCP tool call and stream the response.
657
+ */
658
+ async function* callApprovedMCPToolStream(
659
+ approval_request_id: string,
660
+ approvalRequest: McpApprovalRequestParams | undefined,
661
+ mcpToolsMapping: Record<string, McpServerParams>,
662
+ responseObject: IncompleteResponse
663
+ ): AsyncGenerator<ResponseStreamEvent> {
664
+ if (!approvalRequest) {
665
+ throw new Error(`MCP approval request '${approval_request_id}' not found`);
666
+ }
667
 
668
+ const outputObject: ResponseOutputItem.McpCall = {
669
+ type: "mcp_call",
670
+ id: generateUniqueId("mcp_call"),
671
+ name: approvalRequest.name,
672
+ server_label: approvalRequest.server_label,
673
+ arguments: approvalRequest.arguments,
674
+ };
675
+ responseObject.output.push(outputObject);
676
+
677
+ // Response output item added event
678
+ yield {
679
+ type: "response.output_item.added",
680
+ output_index: responseObject.output.length - 1,
681
+ item: outputObject,
682
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
683
+ };
684
+
685
+ yield {
686
+ type: "response.mcp_call.in_progress",
687
+ item_id: outputObject.id,
688
+ output_index: responseObject.output.length - 1,
689
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
690
+ };
691
+
692
+ const toolParams = mcpToolsMapping[approvalRequest.name];
693
+ const toolResult = await callMcpTool(toolParams, approvalRequest.name, approvalRequest.arguments);
694
+
695
+ if (toolResult.error) {
696
+ outputObject.error = toolResult.error;
697
+ yield {
698
+ type: "response.mcp_call.failed",
699
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
700
+ };
701
+ throw new Error(outputObject.error);
702
+ }
703
+
704
+ outputObject.output = toolResult.output;
705
+ yield {
706
+ type: "response.mcp_call.completed",
707
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
708
+ };
709
+ yield {
710
+ type: "response.output_item.done",
711
+ output_index: responseObject.output.length - 1,
712
+ item: outputObject,
713
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
714
+ };
715
+ }
716
+
717
+ async function* handleMCPToolCallsAfterLLM(
718
+ responseObject: IncompleteResponse,
719
+ mcpToolsMapping: Record<string, McpServerParams>
720
+ ): AsyncGenerator<ResponseStreamEvent> {
721
+ for (let output_index = 0; output_index < responseObject.output.length; output_index++) {
722
+ const outputItem = responseObject.output[output_index];
723
+ if (outputItem.type === "mcp_call") {
724
+ const toolCall = outputItem as ResponseOutputItem.McpCall;
725
+ const toolParams = mcpToolsMapping[toolCall.name];
726
+ if (toolParams) {
727
+ const approvalRequired =
728
+ toolParams.require_approval === "always"
729
+ ? true
730
+ : toolParams.require_approval === "never"
731
+ ? false
732
+ : toolParams.require_approval.always?.tool_names?.includes(toolCall.name)
733
  ? true
734
+ : toolParams.require_approval.never?.tool_names?.includes(toolCall.name)
735
  ? false
736
+ : true; // behavior is undefined in specs, let's default to
737
+
738
+ if (approvalRequired) {
739
+ const approvalRequest: ResponseOutputItem.McpApprovalRequest = {
740
+ type: "mcp_approval_request",
741
+ id: generateUniqueId("mcp_approval_request"),
742
+ name: toolCall.name,
743
+ server_label: toolParams.server_label,
744
+ arguments: toolCall.arguments,
745
+ };
746
+ responseObject.output.push(approvalRequest);
747
+ yield {
748
+ type: "response.output_item.added",
749
+ output_index: responseObject.output.length,
750
+ item: approvalRequest,
751
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
752
+ };
753
+ } else {
754
+ responseObject.output.push;
755
+ yield {
756
+ type: "response.mcp_call.in_progress",
757
+ item_id: toolCall.id,
758
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
759
+ output_index,
760
+ };
761
+ const toolResult = await callMcpTool(toolParams, toolCall.name, toolCall.arguments);
762
+ if (toolResult.error) {
763
+ toolCall.error = toolResult.error;
764
+ yield {
765
+ type: "response.mcp_call.failed",
766
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
767
+ };
768
+ throw new Error(toolCall.error);
769
  } else {
770
+ toolCall.output = toolResult.output;
771
+ yield {
772
+ type: "response.mcp_call.completed",
773
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
774
+ };
 
 
 
775
  }
776
  }
777
  }
778
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
779
  }
780
+ }
src/schemas.ts CHANGED
@@ -60,10 +60,17 @@ const mcpApprovalRequestParamsSchema = z.object({
60
  });
61
  const mcpApprovalResponseParamsSchema = z.object({
62
  type: z.literal("mcp_approval_response"),
63
- id: z.string().optional(),
64
  approval_request_id: z.string(),
65
  approve: z.boolean(),
66
- reason: z.string().optional(),
 
 
 
 
 
 
 
67
  });
68
 
69
  export const createResponseParamsSchema = z.object({
@@ -136,6 +143,7 @@ export const createResponseParamsSchema = z.object({
136
  }),
137
  mcpApprovalRequestParamsSchema,
138
  mcpApprovalResponseParamsSchema,
 
139
  ])
140
  ),
141
  ]),
@@ -215,3 +223,4 @@ export type CreateResponseParams = z.infer<typeof createResponseParamsSchema>;
215
  export type McpServerParams = z.infer<typeof mcpServerParamsSchema>;
216
  export type McpApprovalRequestParams = z.infer<typeof mcpApprovalRequestParamsSchema>;
217
  export type McpApprovalResponseParams = z.infer<typeof mcpApprovalResponseParamsSchema>;
 
 
60
  });
61
  const mcpApprovalResponseParamsSchema = z.object({
62
  type: z.literal("mcp_approval_response"),
63
+ id: z.string().nullable().default(null),
64
  approval_request_id: z.string(),
65
  approve: z.boolean(),
66
+ reason: z.string().nullable().default(null),
67
+ });
68
+ const mcpCallParamsSchema = z.object({
69
+ type: z.literal("mcp_call"),
70
+ id: z.string(),
71
+ name: z.string(),
72
+ server_label: z.string(),
73
+ arguments: z.string(),
74
  });
75
 
76
  export const createResponseParamsSchema = z.object({
 
143
  }),
144
  mcpApprovalRequestParamsSchema,
145
  mcpApprovalResponseParamsSchema,
146
+ mcpCallParamsSchema,
147
  ])
148
  ),
149
  ]),
 
223
  export type McpServerParams = z.infer<typeof mcpServerParamsSchema>;
224
  export type McpApprovalRequestParams = z.infer<typeof mcpApprovalRequestParamsSchema>;
225
  export type McpApprovalResponseParams = z.infer<typeof mcpApprovalResponseParamsSchema>;
226
+ export type McpCallParams = z.infer<typeof mcpCallParamsSchema>;