Wauplin HF Staff commited on
Commit
d4c3b65
·
verified ·
1 Parent(s): db8d62b

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +13 -0
  2. src/routes/responses.ts +248 -160
  3. src/schemas.ts +1 -1
README.md CHANGED
@@ -11,6 +11,19 @@ app_port: 3000
11
  ---
12
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # responses.js
15
 
16
  A lightweight Express.js server that implements OpenAI's Responses API, built on top of Chat Completions and powered by Hugging Face Inference Providers.
 
11
  ---
12
 
13
 
14
+ ---
15
+ title: Responses.js
16
+ emoji: 😻
17
+ colorFrom: red
18
+ colorTo: red
19
+ sdk: docker
20
+ pinned: false
21
+ license: mit
22
+ short_description: Check out https://github.com/huggingface/responses.js
23
+ app_port: 3000
24
+ ---
25
+
26
+
27
  # responses.js
28
 
29
  A lightweight Express.js server that implements OpenAI's Responses API, built on top of Chat Completions and powered by Hugging Face Inference Providers.
src/routes/responses.ts CHANGED
@@ -17,7 +17,10 @@ import type {
17
  ResponseFunctionToolCall,
18
  ResponseOutputItem,
19
  } from "openai/resources/responses/responses";
20
- import type { ChatCompletionInputTool } from "@huggingface/tasks/dist/commonjs/tasks/chat-completion/inference.js";
 
 
 
21
  import { callMcpTool, connectMcpServer } from "../mcp.js";
22
 
23
  class StreamingError extends Error {
@@ -136,6 +139,7 @@ async function* runCreateResponseStream(
136
  }
137
 
138
  // Response completed event
 
139
  yield {
140
  type: "response.completed",
141
  response: responseObject as Response,
@@ -226,34 +230,7 @@ async function* innerRunStream(
226
  tools = undefined;
227
  }
228
 
229
- // If MCP approval requests => execute them and return (no LLM call)
230
- if (Array.isArray(req.body.input)) {
231
- for (const item of req.body.input) {
232
- // Note: currently supporting only 1 mcp_approval_response per request
233
- let shouldStop = false;
234
- if (item.type === "mcp_approval_response" && item.approve) {
235
- const approvalRequest = req.body.input.find(
236
- (i) => i.type === "mcp_approval_request" && i.id === item.approval_request_id
237
- ) as McpApprovalRequestParams | undefined;
238
- for await (const event of callApprovedMCPToolStream(
239
- item.approval_request_id,
240
- approvalRequest,
241
- mcpToolsMapping,
242
- responseObject
243
- )) {
244
- yield event;
245
- }
246
- shouldStop = true;
247
- }
248
- if (shouldStop) {
249
- // stop if at least one approval request is processed
250
- break;
251
- }
252
- }
253
- }
254
-
255
- // At this point, we have all tools and we know we want to call the LLM
256
- // Let's prepare the payload and make the call!
257
 
258
  // Resolve model and provider
259
  const model = req.body.model.includes("@") ? req.body.model.split("@")[1] : req.body.model;
@@ -356,9 +333,9 @@ async function* innerRunStream(
356
  // Prepare payload for the LLM
357
  const payload: ChatCompletionInput = {
358
  // main params
359
- model: model,
360
- provider: provider,
361
- messages: messages,
362
  stream: req.body.stream,
363
  // options
364
  max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
@@ -392,21 +369,72 @@ async function* innerRunStream(
392
  top_p: req.body.top_p,
393
  };
394
 
395
- // Call LLM
396
- for await (const event of callLLMStream(apiKey, payload, responseObject, mcpToolsMapping)) {
397
- yield event;
398
- }
 
 
 
 
 
 
 
 
 
399
 
400
- // Handle MCP tool calls if any
401
- for await (const event of handleMCPToolCallsAfterLLM(responseObject, mcpToolsMapping)) {
402
- yield event;
 
 
 
 
 
 
 
 
 
403
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
  }
405
 
406
  async function* listMcpToolsStream(
407
  tool: McpServerParams,
408
  responseObject: IncompleteResponse
409
  ): AsyncGenerator<ResponseStreamEvent> {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
  yield {
411
  type: "response.mcp_list_tools.in_progress",
412
  sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
@@ -419,17 +447,18 @@ async function* listMcpToolsStream(
419
  type: "response.mcp_list_tools.completed",
420
  sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
421
  };
422
- responseObject.output.push({
423
- id: generateUniqueId("mcp_list_tools"),
424
- type: "mcp_list_tools",
425
- server_label: tool.server_label,
426
- tools: mcpTools.tools.map((mcpTool) => ({
427
- input_schema: mcpTool.inputSchema,
428
- name: mcpTool.name,
429
- annotations: mcpTool.annotations,
430
- description: mcpTool.description,
431
- })),
432
- });
 
433
  } catch (error) {
434
  const errorMessage = `Failed to list tools from MCP server '${tool.server_label}': ${error instanceof Error ? error.message : "Unknown error"}`;
435
  console.error(errorMessage);
@@ -444,27 +473,31 @@ async function* listMcpToolsStream(
444
  /*
445
  * Call LLM and stream the response.
446
  */
447
- async function* callLLMStream(
448
  apiKey: string | undefined,
449
  payload: ChatCompletionInput,
450
  responseObject: IncompleteResponse,
451
  mcpToolsMapping: Record<string, McpServerParams>
452
  ): AsyncGenerator<ResponseStreamEvent> {
453
  const stream = new InferenceClient(apiKey).chatCompletionStream(payload);
 
 
 
454
 
455
  for await (const chunk of stream) {
456
  if (chunk.usage) {
457
  // Overwrite usage with the latest chunk's usage
458
  responseObject.usage = {
459
- input_tokens: chunk.usage.prompt_tokens,
460
  input_tokens_details: { cached_tokens: 0 },
461
- output_tokens: chunk.usage.completion_tokens,
462
  output_tokens_details: { reasoning_tokens: 0 },
463
- total_tokens: chunk.usage.total_tokens,
464
  };
465
  }
466
 
467
  const delta = chunk.choices[0].delta;
 
468
  if (delta.content) {
469
  let currentOutputItem = responseObject.output.at(-1);
470
 
@@ -528,32 +561,45 @@ async function* callLLMStream(
528
  };
529
  } else if (delta.tool_calls && delta.tool_calls.length > 0) {
530
  if (delta.tool_calls.length > 1) {
531
- throw new StreamingError("Not implemented: multiple tool calls are not supported.");
532
  }
533
 
534
  let currentOutputItem = responseObject.output.at(-1);
535
- if (currentOutputItem?.type !== "mcp_call" && currentOutputItem?.type !== "function_call") {
536
- if (!delta.tool_calls[0].function.name) {
537
- throw new StreamingError("Tool call function name is required when starting a new tool call.");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
  }
539
 
540
- const newOutputObject: ResponseOutputItem.McpCall | ResponseFunctionToolCall =
541
- delta.tool_calls[0].function.name in mcpToolsMapping
542
- ? {
543
- type: "mcp_call",
544
- id: generateUniqueId("mcp_call"),
545
- name: delta.tool_calls[0].function.name,
546
- server_label: mcpToolsMapping[delta.tool_calls[0].function.name].server_label,
547
- arguments: "",
548
- }
549
- : {
550
- type: "function_call",
551
- id: generateUniqueId("fc"),
552
- call_id: delta.tool_calls[0].id,
553
- name: delta.tool_calls[0].function.name,
554
- arguments: "",
555
- };
556
-
557
  // Response output item added event
558
  responseObject.output.push(newOutputObject);
559
  yield {
@@ -562,21 +608,36 @@ async function* callLLMStream(
562
  item: newOutputObject,
563
  sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
564
  };
 
 
 
 
 
 
 
 
565
  }
566
 
567
- // Current item is necessarily a tool call
568
- currentOutputItem = responseObject.output.at(-1) as ResponseOutputItem.McpCall | ResponseFunctionToolCall;
569
- currentOutputItem.arguments += delta.tool_calls[0].function.arguments;
570
- yield {
571
- type:
572
- currentOutputItem.type === "mcp_call"
573
- ? "response.mcp_call.arguments_delta"
574
- : "response.function_call_arguments.delta",
575
- item_id: currentOutputItem.id as string,
576
- output_index: responseObject.output.length - 1,
577
- delta: delta.tool_calls[0].function.arguments,
578
- sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
579
- };
 
 
 
 
 
 
 
580
  }
581
  }
582
 
@@ -632,12 +693,65 @@ async function* callLLMStream(
632
  };
633
  } else if (lastOutputItem?.type === "mcp_call") {
634
  yield {
635
- type: "response.mcp_call.arguments_done",
636
  item_id: lastOutputItem.id as string,
637
  output_index: responseObject.output.length - 1,
638
  arguments: lastOutputItem.arguments,
639
  sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
640
  };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
641
  yield {
642
  type: "response.output_item.done",
643
  output_index: responseObject.output.length - 1,
@@ -657,9 +771,11 @@ async function* callLLMStream(
657
  */
658
  async function* callApprovedMCPToolStream(
659
  approval_request_id: string,
 
660
  approvalRequest: McpApprovalRequestParams | undefined,
661
  mcpToolsMapping: Record<string, McpServerParams>,
662
- responseObject: IncompleteResponse
 
663
  ): AsyncGenerator<ResponseStreamEvent> {
664
  if (!approvalRequest) {
665
  throw new Error(`MCP approval request '${approval_request_id}' not found`);
@@ -667,7 +783,7 @@ async function* callApprovedMCPToolStream(
667
 
668
  const outputObject: ResponseOutputItem.McpCall = {
669
  type: "mcp_call",
670
- id: generateUniqueId("mcp_call"),
671
  name: approvalRequest.name,
672
  server_label: approvalRequest.server_label,
673
  arguments: approvalRequest.arguments,
@@ -698,83 +814,55 @@ async function* callApprovedMCPToolStream(
698
  type: "response.mcp_call.failed",
699
  sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
700
  };
701
- throw new Error(outputObject.error);
 
 
 
 
 
702
  }
703
 
704
- outputObject.output = toolResult.output;
705
- yield {
706
- type: "response.mcp_call.completed",
707
- sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
708
- };
709
  yield {
710
  type: "response.output_item.done",
711
  output_index: responseObject.output.length - 1,
712
  item: outputObject,
713
  sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
714
  };
715
- }
716
 
717
- async function* handleMCPToolCallsAfterLLM(
718
- responseObject: IncompleteResponse,
719
- mcpToolsMapping: Record<string, McpServerParams>
720
- ): AsyncGenerator<ResponseStreamEvent> {
721
- for (let output_index = 0; output_index < responseObject.output.length; output_index++) {
722
- const outputItem = responseObject.output[output_index];
723
- if (outputItem.type === "mcp_call") {
724
- const toolCall = outputItem as ResponseOutputItem.McpCall;
725
- const toolParams = mcpToolsMapping[toolCall.name];
726
- if (toolParams) {
727
- const approvalRequired =
728
- toolParams.require_approval === "always"
729
- ? true
730
- : toolParams.require_approval === "never"
731
- ? false
732
- : toolParams.require_approval.always?.tool_names?.includes(toolCall.name)
733
- ? true
734
- : toolParams.require_approval.never?.tool_names?.includes(toolCall.name)
735
- ? false
736
- : true; // behavior is undefined in specs, let's default to
737
-
738
- if (approvalRequired) {
739
- const approvalRequest: ResponseOutputItem.McpApprovalRequest = {
740
- type: "mcp_approval_request",
741
- id: generateUniqueId("mcp_approval_request"),
742
- name: toolCall.name,
743
- server_label: toolParams.server_label,
744
- arguments: toolCall.arguments,
745
- };
746
- responseObject.output.push(approvalRequest);
747
- yield {
748
- type: "response.output_item.added",
749
- output_index: responseObject.output.length,
750
- item: approvalRequest,
751
- sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
752
- };
753
- } else {
754
- responseObject.output.push;
755
- yield {
756
- type: "response.mcp_call.in_progress",
757
- item_id: toolCall.id,
758
- sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
759
- output_index,
760
- };
761
- const toolResult = await callMcpTool(toolParams, toolCall.name, toolCall.arguments);
762
- if (toolResult.error) {
763
- toolCall.error = toolResult.error;
764
- yield {
765
- type: "response.mcp_call.failed",
766
- sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
767
- };
768
- throw new Error(toolCall.error);
769
- } else {
770
- toolCall.output = toolResult.output;
771
- yield {
772
- type: "response.mcp_call.completed",
773
- sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
774
- };
775
- }
776
- }
777
- }
778
  }
779
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
780
  }
 
17
  ResponseFunctionToolCall,
18
  ResponseOutputItem,
19
  } from "openai/resources/responses/responses";
20
+ import type {
21
+ ChatCompletionInputFunctionDefinition,
22
+ ChatCompletionInputTool,
23
+ } from "@huggingface/tasks/dist/commonjs/tasks/chat-completion/inference.js";
24
  import { callMcpTool, connectMcpServer } from "../mcp.js";
25
 
26
  class StreamingError extends Error {
 
139
  }
140
 
141
  // Response completed event
142
+ responseObject.status = "completed";
143
  yield {
144
  type: "response.completed",
145
  response: responseObject as Response,
 
230
  tools = undefined;
231
  }
232
 
233
+ // Prepare payload for the LLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
  // Resolve model and provider
236
  const model = req.body.model.includes("@") ? req.body.model.split("@")[1] : req.body.model;
 
333
  // Prepare payload for the LLM
334
  const payload: ChatCompletionInput = {
335
  // main params
336
+ model,
337
+ provider,
338
+ messages,
339
  stream: req.body.stream,
340
  // options
341
  max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
 
369
  top_p: req.body.top_p,
370
  };
371
 
372
+ // If MCP approval requests => execute them and return (no LLM call)
373
+ if (Array.isArray(req.body.input)) {
374
+ for (const item of req.body.input) {
375
+ if (item.type === "mcp_approval_response" && item.approve) {
376
+ const approvalRequest = req.body.input.find(
377
+ (i) => i.type === "mcp_approval_request" && i.id === item.approval_request_id
378
+ ) as McpApprovalRequestParams | undefined;
379
+ const mcpCallId = "mcp_" + item.approval_request_id.split("_")[1];
380
+ const mcpCall = req.body.input.find((i) => i.type === "mcp_call" && i.id === mcpCallId);
381
+ if (mcpCall) {
382
+ // MCP call for that approval request has already been made, so we can skip it
383
+ continue;
384
+ }
385
 
386
+ for await (const event of callApprovedMCPToolStream(
387
+ item.approval_request_id,
388
+ mcpCallId,
389
+ approvalRequest,
390
+ mcpToolsMapping,
391
+ responseObject,
392
+ payload
393
+ )) {
394
+ yield event;
395
+ }
396
+ }
397
+ }
398
  }
399
+
400
+ // Call the LLM until no new message is added to the payload.
401
+ // New messages can be added if the LLM calls an MCP tool that is automatically run.
402
+ // A maximum number of iterations is set to avoid infinite loops.
403
+ let previousMessageCount: number;
404
+ let currentMessageCount = payload.messages.length;
405
+ const MAX_ITERATIONS = 5; // hard-coded
406
+ let iterations = 0;
407
+ do {
408
+ previousMessageCount = currentMessageCount;
409
+
410
+ for await (const event of handleOneTurnStream(apiKey, payload, responseObject, mcpToolsMapping)) {
411
+ yield event;
412
+ }
413
+
414
+ currentMessageCount = payload.messages.length;
415
+ iterations++;
416
+ } while (currentMessageCount > previousMessageCount && iterations < MAX_ITERATIONS);
417
  }
418
 
419
  async function* listMcpToolsStream(
420
  tool: McpServerParams,
421
  responseObject: IncompleteResponse
422
  ): AsyncGenerator<ResponseStreamEvent> {
423
+ const outputObject: ResponseOutputItem.McpListTools = {
424
+ id: generateUniqueId("mcpl"),
425
+ type: "mcp_list_tools",
426
+ server_label: tool.server_label,
427
+ tools: [],
428
+ };
429
+ responseObject.output.push(outputObject);
430
+
431
+ yield {
432
+ type: "response.output_item.added",
433
+ output_index: responseObject.output.length - 1,
434
+ item: outputObject,
435
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
436
+ };
437
+
438
  yield {
439
  type: "response.mcp_list_tools.in_progress",
440
  sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
 
447
  type: "response.mcp_list_tools.completed",
448
  sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
449
  };
450
+ outputObject.tools = mcpTools.tools.map((mcpTool) => ({
451
+ input_schema: mcpTool.inputSchema,
452
+ name: mcpTool.name,
453
+ annotations: mcpTool.annotations,
454
+ description: mcpTool.description,
455
+ }));
456
+ yield {
457
+ type: "response.output_item.done",
458
+ output_index: responseObject.output.length - 1,
459
+ item: outputObject,
460
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
461
+ };
462
  } catch (error) {
463
  const errorMessage = `Failed to list tools from MCP server '${tool.server_label}': ${error instanceof Error ? error.message : "Unknown error"}`;
464
  console.error(errorMessage);
 
473
  /*
474
  * Call LLM and stream the response.
475
  */
476
+ async function* handleOneTurnStream(
477
  apiKey: string | undefined,
478
  payload: ChatCompletionInput,
479
  responseObject: IncompleteResponse,
480
  mcpToolsMapping: Record<string, McpServerParams>
481
  ): AsyncGenerator<ResponseStreamEvent> {
482
  const stream = new InferenceClient(apiKey).chatCompletionStream(payload);
483
+ let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
484
+ let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;
485
+ let previousTotalTokens = responseObject.usage?.total_tokens ?? 0;
486
 
487
  for await (const chunk of stream) {
488
  if (chunk.usage) {
489
  // Overwrite usage with the latest chunk's usage
490
  responseObject.usage = {
491
+ input_tokens: previousInputTokens + chunk.usage.prompt_tokens,
492
  input_tokens_details: { cached_tokens: 0 },
493
+ output_tokens: previousOutputTokens + chunk.usage.completion_tokens,
494
  output_tokens_details: { reasoning_tokens: 0 },
495
+ total_tokens: previousTotalTokens + chunk.usage.total_tokens,
496
  };
497
  }
498
 
499
  const delta = chunk.choices[0].delta;
500
+
501
  if (delta.content) {
502
  let currentOutputItem = responseObject.output.at(-1);
503
 
 
561
  };
562
  } else if (delta.tool_calls && delta.tool_calls.length > 0) {
563
  if (delta.tool_calls.length > 1) {
564
+ console.log("Multiple tool calls are not supported. Only the first one will be processed.");
565
  }
566
 
567
  let currentOutputItem = responseObject.output.at(-1);
568
+ if (delta.tool_calls[0].function.name) {
569
+ const functionName = delta.tool_calls[0].function.name;
570
+ // Tool call with a name => new tool call
571
+ let newOutputObject:
572
+ | ResponseOutputItem.McpCall
573
+ | ResponseFunctionToolCall
574
+ | ResponseOutputItem.McpApprovalRequest;
575
+ if (functionName in mcpToolsMapping) {
576
+ if (requiresApproval(functionName, mcpToolsMapping)) {
577
+ newOutputObject = {
578
+ id: generateUniqueId("mcpr"),
579
+ type: "mcp_approval_request",
580
+ name: functionName,
581
+ server_label: mcpToolsMapping[functionName].server_label,
582
+ arguments: "",
583
+ };
584
+ } else {
585
+ newOutputObject = {
586
+ type: "mcp_call",
587
+ id: generateUniqueId("mcp"),
588
+ name: functionName,
589
+ server_label: mcpToolsMapping[functionName].server_label,
590
+ arguments: "",
591
+ };
592
+ }
593
+ } else {
594
+ newOutputObject = {
595
+ type: "function_call",
596
+ id: generateUniqueId("fc"),
597
+ call_id: delta.tool_calls[0].id,
598
+ name: functionName,
599
+ arguments: "",
600
+ };
601
  }
602
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
603
  // Response output item added event
604
  responseObject.output.push(newOutputObject);
605
  yield {
 
608
  item: newOutputObject,
609
  sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
610
  };
611
+ if (newOutputObject.type === "mcp_call") {
612
+ yield {
613
+ type: "response.mcp_call.in_progress",
614
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
615
+ item_id: newOutputObject.id,
616
+ output_index: responseObject.output.length - 1,
617
+ };
618
+ }
619
  }
620
 
621
+ if (delta.tool_calls[0].function.arguments) {
622
+ // Current item is necessarily a tool call
623
+ currentOutputItem = responseObject.output.at(-1) as
624
+ | ResponseOutputItem.McpCall
625
+ | ResponseFunctionToolCall
626
+ | ResponseOutputItem.McpApprovalRequest;
627
+ currentOutputItem.arguments += delta.tool_calls[0].function.arguments;
628
+ if (currentOutputItem.type === "mcp_call" || currentOutputItem.type === "function_call") {
629
+ yield {
630
+ type:
631
+ currentOutputItem.type === "mcp_call"
632
+ ? ("response.mcp_call_arguments.delta" as "response.mcp_call.arguments_delta") // bug workaround (see https://github.com/openai/openai-node/issues/1562)
633
+ : "response.function_call_arguments.delta",
634
+ item_id: currentOutputItem.id as string,
635
+ output_index: responseObject.output.length - 1,
636
+ delta: delta.tool_calls[0].function.arguments,
637
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
638
+ };
639
+ }
640
+ }
641
  }
642
  }
643
 
 
693
  };
694
  } else if (lastOutputItem?.type === "mcp_call") {
695
  yield {
696
+ type: "response.mcp_call_arguments.done" as "response.mcp_call.arguments_done", // bug workaround (see https://github.com/openai/openai-node/issues/1562)
697
  item_id: lastOutputItem.id as string,
698
  output_index: responseObject.output.length - 1,
699
  arguments: lastOutputItem.arguments,
700
  sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
701
  };
702
+
703
+ // Call MCP tool
704
+ const toolParams = mcpToolsMapping[lastOutputItem.name];
705
+ const toolResult = await callMcpTool(toolParams, lastOutputItem.name, lastOutputItem.arguments);
706
+ if (toolResult.error) {
707
+ lastOutputItem.error = toolResult.error;
708
+ yield {
709
+ type: "response.mcp_call.failed",
710
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
711
+ };
712
+ } else {
713
+ lastOutputItem.output = toolResult.output;
714
+ yield {
715
+ type: "response.mcp_call.completed",
716
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
717
+ };
718
+ }
719
+
720
+ yield {
721
+ type: "response.output_item.done",
722
+ output_index: responseObject.output.length - 1,
723
+ item: lastOutputItem,
724
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
725
+ };
726
+
727
+ // Updating the payload for next LLM call
728
+ payload.messages.push(
729
+ {
730
+ role: "assistant",
731
+ tool_calls: [
732
+ {
733
+ id: lastOutputItem.id,
734
+ type: "function",
735
+ function: {
736
+ name: lastOutputItem.name,
737
+ arguments: lastOutputItem.arguments,
738
+ // Hacky: type is not correct in inference.js. Will fix it but in the meantime we need to cast it.
739
+ // TODO: fix it in the inference.js package. Should be "arguments" and not "parameters".
740
+ } as unknown as ChatCompletionInputFunctionDefinition,
741
+ },
742
+ ],
743
+ },
744
+ {
745
+ role: "tool",
746
+ tool_call_id: lastOutputItem.id,
747
+ content: lastOutputItem.output
748
+ ? lastOutputItem.output
749
+ : lastOutputItem.error
750
+ ? `Error: ${lastOutputItem.error}`
751
+ : "",
752
+ }
753
+ );
754
+ } else if (lastOutputItem?.type === "mcp_approval_request") {
755
  yield {
756
  type: "response.output_item.done",
757
  output_index: responseObject.output.length - 1,
 
771
  */
772
  async function* callApprovedMCPToolStream(
773
  approval_request_id: string,
774
+ mcpCallId: string,
775
  approvalRequest: McpApprovalRequestParams | undefined,
776
  mcpToolsMapping: Record<string, McpServerParams>,
777
+ responseObject: IncompleteResponse,
778
+ payload: ChatCompletionInput
779
  ): AsyncGenerator<ResponseStreamEvent> {
780
  if (!approvalRequest) {
781
  throw new Error(`MCP approval request '${approval_request_id}' not found`);
 
783
 
784
  const outputObject: ResponseOutputItem.McpCall = {
785
  type: "mcp_call",
786
+ id: mcpCallId,
787
  name: approvalRequest.name,
788
  server_label: approvalRequest.server_label,
789
  arguments: approvalRequest.arguments,
 
814
  type: "response.mcp_call.failed",
815
  sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
816
  };
817
+ } else {
818
+ outputObject.output = toolResult.output;
819
+ yield {
820
+ type: "response.mcp_call.completed",
821
+ sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
822
+ };
823
  }
824
 
 
 
 
 
 
825
  yield {
826
  type: "response.output_item.done",
827
  output_index: responseObject.output.length - 1,
828
  item: outputObject,
829
  sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
830
  };
 
831
 
832
+ // Updating the payload for next LLM call
833
+ payload.messages.push(
834
+ {
835
+ role: "assistant",
836
+ tool_calls: [
837
+ {
838
+ id: outputObject.id,
839
+ type: "function",
840
+ function: {
841
+ name: outputObject.name,
842
+ arguments: outputObject.arguments,
843
+ // Hacky: type is not correct in inference.js. Will fix it but in the meantime we need to cast it.
844
+ // TODO: fix it in the inference.js package. Should be "arguments" and not "parameters".
845
+ } as unknown as ChatCompletionInputFunctionDefinition,
846
+ },
847
+ ],
848
+ },
849
+ {
850
+ role: "tool",
851
+ tool_call_id: outputObject.id,
852
+ content: outputObject.output ? outputObject.output : outputObject.error ? `Error: ${outputObject.error}` : "",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
853
  }
854
+ );
855
+ }
856
+
857
+ function requiresApproval(toolName: string, mcpToolsMapping: Record<string, McpServerParams>): boolean {
858
+ const toolParams = mcpToolsMapping[toolName];
859
+ return toolParams.require_approval === "always"
860
+ ? true
861
+ : toolParams.require_approval === "never"
862
+ ? false
863
+ : toolParams.require_approval.always?.tool_names?.includes(toolName)
864
+ ? true
865
+ : toolParams.require_approval.never?.tool_names?.includes(toolName)
866
+ ? false
867
+ : true; // behavior is undefined in specs, let's default to true
868
  }
src/schemas.ts CHANGED
@@ -125,7 +125,7 @@ export const createResponseParamsSchema = z.object({
125
  output: z.string(),
126
  type: z.literal("function_call_output"),
127
  id: z.string().optional(),
128
- status: z.enum(["in_progress", "completed", "incomplete"]),
129
  }),
130
  z.object({
131
  type: z.literal("mcp_list_tools"),
 
125
  output: z.string(),
126
  type: z.literal("function_call_output"),
127
  id: z.string().optional(),
128
+ status: z.enum(["in_progress", "completed", "incomplete"]).optional(),
129
  }),
130
  z.object({
131
  type: z.literal("mcp_list_tools"),