Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- README.md +13 -0
- src/routes/responses.ts +248 -160
- src/schemas.ts +1 -1
README.md
CHANGED
@@ -11,6 +11,19 @@ app_port: 3000
|
|
11 |
---
|
12 |
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# responses.js
|
15 |
|
16 |
A lightweight Express.js server that implements OpenAI's Responses API, built on top of Chat Completions and powered by Hugging Face Inference Providers.
|
|
|
11 |
---
|
12 |
|
13 |
|
14 |
+
---
|
15 |
+
title: Responses.js
|
16 |
+
emoji: 😻
|
17 |
+
colorFrom: red
|
18 |
+
colorTo: red
|
19 |
+
sdk: docker
|
20 |
+
pinned: false
|
21 |
+
license: mit
|
22 |
+
short_description: Check out https://github.com/huggingface/responses.js
|
23 |
+
app_port: 3000
|
24 |
+
---
|
25 |
+
|
26 |
+
|
27 |
# responses.js
|
28 |
|
29 |
A lightweight Express.js server that implements OpenAI's Responses API, built on top of Chat Completions and powered by Hugging Face Inference Providers.
|
src/routes/responses.ts
CHANGED
@@ -17,7 +17,10 @@ import type {
|
|
17 |
ResponseFunctionToolCall,
|
18 |
ResponseOutputItem,
|
19 |
} from "openai/resources/responses/responses";
|
20 |
-
import type {
|
|
|
|
|
|
|
21 |
import { callMcpTool, connectMcpServer } from "../mcp.js";
|
22 |
|
23 |
class StreamingError extends Error {
|
@@ -136,6 +139,7 @@ async function* runCreateResponseStream(
|
|
136 |
}
|
137 |
|
138 |
// Response completed event
|
|
|
139 |
yield {
|
140 |
type: "response.completed",
|
141 |
response: responseObject as Response,
|
@@ -226,34 +230,7 @@ async function* innerRunStream(
|
|
226 |
tools = undefined;
|
227 |
}
|
228 |
|
229 |
-
//
|
230 |
-
if (Array.isArray(req.body.input)) {
|
231 |
-
for (const item of req.body.input) {
|
232 |
-
// Note: currently supporting only 1 mcp_approval_response per request
|
233 |
-
let shouldStop = false;
|
234 |
-
if (item.type === "mcp_approval_response" && item.approve) {
|
235 |
-
const approvalRequest = req.body.input.find(
|
236 |
-
(i) => i.type === "mcp_approval_request" && i.id === item.approval_request_id
|
237 |
-
) as McpApprovalRequestParams | undefined;
|
238 |
-
for await (const event of callApprovedMCPToolStream(
|
239 |
-
item.approval_request_id,
|
240 |
-
approvalRequest,
|
241 |
-
mcpToolsMapping,
|
242 |
-
responseObject
|
243 |
-
)) {
|
244 |
-
yield event;
|
245 |
-
}
|
246 |
-
shouldStop = true;
|
247 |
-
}
|
248 |
-
if (shouldStop) {
|
249 |
-
// stop if at least one approval request is processed
|
250 |
-
break;
|
251 |
-
}
|
252 |
-
}
|
253 |
-
}
|
254 |
-
|
255 |
-
// At this point, we have all tools and we know we want to call the LLM
|
256 |
-
// Let's prepare the payload and make the call!
|
257 |
|
258 |
// Resolve model and provider
|
259 |
const model = req.body.model.includes("@") ? req.body.model.split("@")[1] : req.body.model;
|
@@ -356,9 +333,9 @@ async function* innerRunStream(
|
|
356 |
// Prepare payload for the LLM
|
357 |
const payload: ChatCompletionInput = {
|
358 |
// main params
|
359 |
-
model
|
360 |
-
provider
|
361 |
-
messages
|
362 |
stream: req.body.stream,
|
363 |
// options
|
364 |
max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
|
@@ -392,21 +369,72 @@ async function* innerRunStream(
|
|
392 |
top_p: req.body.top_p,
|
393 |
};
|
394 |
|
395 |
-
//
|
396 |
-
|
397 |
-
|
398 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
|
400 |
-
|
401 |
-
|
402 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
}
|
405 |
|
406 |
async function* listMcpToolsStream(
|
407 |
tool: McpServerParams,
|
408 |
responseObject: IncompleteResponse
|
409 |
): AsyncGenerator<ResponseStreamEvent> {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
410 |
yield {
|
411 |
type: "response.mcp_list_tools.in_progress",
|
412 |
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
@@ -419,17 +447,18 @@ async function* listMcpToolsStream(
|
|
419 |
type: "response.mcp_list_tools.completed",
|
420 |
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
421 |
};
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
|
|
433 |
} catch (error) {
|
434 |
const errorMessage = `Failed to list tools from MCP server '${tool.server_label}': ${error instanceof Error ? error.message : "Unknown error"}`;
|
435 |
console.error(errorMessage);
|
@@ -444,27 +473,31 @@ async function* listMcpToolsStream(
|
|
444 |
/*
|
445 |
* Call LLM and stream the response.
|
446 |
*/
|
447 |
-
async function*
|
448 |
apiKey: string | undefined,
|
449 |
payload: ChatCompletionInput,
|
450 |
responseObject: IncompleteResponse,
|
451 |
mcpToolsMapping: Record<string, McpServerParams>
|
452 |
): AsyncGenerator<ResponseStreamEvent> {
|
453 |
const stream = new InferenceClient(apiKey).chatCompletionStream(payload);
|
|
|
|
|
|
|
454 |
|
455 |
for await (const chunk of stream) {
|
456 |
if (chunk.usage) {
|
457 |
// Overwrite usage with the latest chunk's usage
|
458 |
responseObject.usage = {
|
459 |
-
input_tokens: chunk.usage.prompt_tokens,
|
460 |
input_tokens_details: { cached_tokens: 0 },
|
461 |
-
output_tokens: chunk.usage.completion_tokens,
|
462 |
output_tokens_details: { reasoning_tokens: 0 },
|
463 |
-
total_tokens: chunk.usage.total_tokens,
|
464 |
};
|
465 |
}
|
466 |
|
467 |
const delta = chunk.choices[0].delta;
|
|
|
468 |
if (delta.content) {
|
469 |
let currentOutputItem = responseObject.output.at(-1);
|
470 |
|
@@ -528,32 +561,45 @@ async function* callLLMStream(
|
|
528 |
};
|
529 |
} else if (delta.tool_calls && delta.tool_calls.length > 0) {
|
530 |
if (delta.tool_calls.length > 1) {
|
531 |
-
|
532 |
}
|
533 |
|
534 |
let currentOutputItem = responseObject.output.at(-1);
|
535 |
-
if (
|
536 |
-
|
537 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
538 |
}
|
539 |
|
540 |
-
const newOutputObject: ResponseOutputItem.McpCall | ResponseFunctionToolCall =
|
541 |
-
delta.tool_calls[0].function.name in mcpToolsMapping
|
542 |
-
? {
|
543 |
-
type: "mcp_call",
|
544 |
-
id: generateUniqueId("mcp_call"),
|
545 |
-
name: delta.tool_calls[0].function.name,
|
546 |
-
server_label: mcpToolsMapping[delta.tool_calls[0].function.name].server_label,
|
547 |
-
arguments: "",
|
548 |
-
}
|
549 |
-
: {
|
550 |
-
type: "function_call",
|
551 |
-
id: generateUniqueId("fc"),
|
552 |
-
call_id: delta.tool_calls[0].id,
|
553 |
-
name: delta.tool_calls[0].function.name,
|
554 |
-
arguments: "",
|
555 |
-
};
|
556 |
-
|
557 |
// Response output item added event
|
558 |
responseObject.output.push(newOutputObject);
|
559 |
yield {
|
@@ -562,21 +608,36 @@ async function* callLLMStream(
|
|
562 |
item: newOutputObject,
|
563 |
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
564 |
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
565 |
}
|
566 |
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
580 |
}
|
581 |
}
|
582 |
|
@@ -632,12 +693,65 @@ async function* callLLMStream(
|
|
632 |
};
|
633 |
} else if (lastOutputItem?.type === "mcp_call") {
|
634 |
yield {
|
635 |
-
type: "response.mcp_call.arguments_done",
|
636 |
item_id: lastOutputItem.id as string,
|
637 |
output_index: responseObject.output.length - 1,
|
638 |
arguments: lastOutputItem.arguments,
|
639 |
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
640 |
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
641 |
yield {
|
642 |
type: "response.output_item.done",
|
643 |
output_index: responseObject.output.length - 1,
|
@@ -657,9 +771,11 @@ async function* callLLMStream(
|
|
657 |
*/
|
658 |
async function* callApprovedMCPToolStream(
|
659 |
approval_request_id: string,
|
|
|
660 |
approvalRequest: McpApprovalRequestParams | undefined,
|
661 |
mcpToolsMapping: Record<string, McpServerParams>,
|
662 |
-
responseObject: IncompleteResponse
|
|
|
663 |
): AsyncGenerator<ResponseStreamEvent> {
|
664 |
if (!approvalRequest) {
|
665 |
throw new Error(`MCP approval request '${approval_request_id}' not found`);
|
@@ -667,7 +783,7 @@ async function* callApprovedMCPToolStream(
|
|
667 |
|
668 |
const outputObject: ResponseOutputItem.McpCall = {
|
669 |
type: "mcp_call",
|
670 |
-
id:
|
671 |
name: approvalRequest.name,
|
672 |
server_label: approvalRequest.server_label,
|
673 |
arguments: approvalRequest.arguments,
|
@@ -698,83 +814,55 @@ async function* callApprovedMCPToolStream(
|
|
698 |
type: "response.mcp_call.failed",
|
699 |
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
700 |
};
|
701 |
-
|
|
|
|
|
|
|
|
|
|
|
702 |
}
|
703 |
|
704 |
-
outputObject.output = toolResult.output;
|
705 |
-
yield {
|
706 |
-
type: "response.mcp_call.completed",
|
707 |
-
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
708 |
-
};
|
709 |
yield {
|
710 |
type: "response.output_item.done",
|
711 |
output_index: responseObject.output.length - 1,
|
712 |
item: outputObject,
|
713 |
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
714 |
};
|
715 |
-
}
|
716 |
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
-
|
735 |
-
|
736 |
-
|
737 |
-
|
738 |
-
if (approvalRequired) {
|
739 |
-
const approvalRequest: ResponseOutputItem.McpApprovalRequest = {
|
740 |
-
type: "mcp_approval_request",
|
741 |
-
id: generateUniqueId("mcp_approval_request"),
|
742 |
-
name: toolCall.name,
|
743 |
-
server_label: toolParams.server_label,
|
744 |
-
arguments: toolCall.arguments,
|
745 |
-
};
|
746 |
-
responseObject.output.push(approvalRequest);
|
747 |
-
yield {
|
748 |
-
type: "response.output_item.added",
|
749 |
-
output_index: responseObject.output.length,
|
750 |
-
item: approvalRequest,
|
751 |
-
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
752 |
-
};
|
753 |
-
} else {
|
754 |
-
responseObject.output.push;
|
755 |
-
yield {
|
756 |
-
type: "response.mcp_call.in_progress",
|
757 |
-
item_id: toolCall.id,
|
758 |
-
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
759 |
-
output_index,
|
760 |
-
};
|
761 |
-
const toolResult = await callMcpTool(toolParams, toolCall.name, toolCall.arguments);
|
762 |
-
if (toolResult.error) {
|
763 |
-
toolCall.error = toolResult.error;
|
764 |
-
yield {
|
765 |
-
type: "response.mcp_call.failed",
|
766 |
-
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
767 |
-
};
|
768 |
-
throw new Error(toolCall.error);
|
769 |
-
} else {
|
770 |
-
toolCall.output = toolResult.output;
|
771 |
-
yield {
|
772 |
-
type: "response.mcp_call.completed",
|
773 |
-
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
774 |
-
};
|
775 |
-
}
|
776 |
-
}
|
777 |
-
}
|
778 |
}
|
779 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
780 |
}
|
|
|
17 |
ResponseFunctionToolCall,
|
18 |
ResponseOutputItem,
|
19 |
} from "openai/resources/responses/responses";
|
20 |
+
import type {
|
21 |
+
ChatCompletionInputFunctionDefinition,
|
22 |
+
ChatCompletionInputTool,
|
23 |
+
} from "@huggingface/tasks/dist/commonjs/tasks/chat-completion/inference.js";
|
24 |
import { callMcpTool, connectMcpServer } from "../mcp.js";
|
25 |
|
26 |
class StreamingError extends Error {
|
|
|
139 |
}
|
140 |
|
141 |
// Response completed event
|
142 |
+
responseObject.status = "completed";
|
143 |
yield {
|
144 |
type: "response.completed",
|
145 |
response: responseObject as Response,
|
|
|
230 |
tools = undefined;
|
231 |
}
|
232 |
|
233 |
+
// Prepare payload for the LLM
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
|
235 |
// Resolve model and provider
|
236 |
const model = req.body.model.includes("@") ? req.body.model.split("@")[1] : req.body.model;
|
|
|
333 |
// Prepare payload for the LLM
|
334 |
const payload: ChatCompletionInput = {
|
335 |
// main params
|
336 |
+
model,
|
337 |
+
provider,
|
338 |
+
messages,
|
339 |
stream: req.body.stream,
|
340 |
// options
|
341 |
max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
|
|
|
369 |
top_p: req.body.top_p,
|
370 |
};
|
371 |
|
372 |
+
// If MCP approval requests => execute them and return (no LLM call)
|
373 |
+
if (Array.isArray(req.body.input)) {
|
374 |
+
for (const item of req.body.input) {
|
375 |
+
if (item.type === "mcp_approval_response" && item.approve) {
|
376 |
+
const approvalRequest = req.body.input.find(
|
377 |
+
(i) => i.type === "mcp_approval_request" && i.id === item.approval_request_id
|
378 |
+
) as McpApprovalRequestParams | undefined;
|
379 |
+
const mcpCallId = "mcp_" + item.approval_request_id.split("_")[1];
|
380 |
+
const mcpCall = req.body.input.find((i) => i.type === "mcp_call" && i.id === mcpCallId);
|
381 |
+
if (mcpCall) {
|
382 |
+
// MCP call for that approval request has already been made, so we can skip it
|
383 |
+
continue;
|
384 |
+
}
|
385 |
|
386 |
+
for await (const event of callApprovedMCPToolStream(
|
387 |
+
item.approval_request_id,
|
388 |
+
mcpCallId,
|
389 |
+
approvalRequest,
|
390 |
+
mcpToolsMapping,
|
391 |
+
responseObject,
|
392 |
+
payload
|
393 |
+
)) {
|
394 |
+
yield event;
|
395 |
+
}
|
396 |
+
}
|
397 |
+
}
|
398 |
}
|
399 |
+
|
400 |
+
// Call the LLM until no new message is added to the payload.
|
401 |
+
// New messages can be added if the LLM calls an MCP tool that is automatically run.
|
402 |
+
// A maximum number of iterations is set to avoid infinite loops.
|
403 |
+
let previousMessageCount: number;
|
404 |
+
let currentMessageCount = payload.messages.length;
|
405 |
+
const MAX_ITERATIONS = 5; // hard-coded
|
406 |
+
let iterations = 0;
|
407 |
+
do {
|
408 |
+
previousMessageCount = currentMessageCount;
|
409 |
+
|
410 |
+
for await (const event of handleOneTurnStream(apiKey, payload, responseObject, mcpToolsMapping)) {
|
411 |
+
yield event;
|
412 |
+
}
|
413 |
+
|
414 |
+
currentMessageCount = payload.messages.length;
|
415 |
+
iterations++;
|
416 |
+
} while (currentMessageCount > previousMessageCount && iterations < MAX_ITERATIONS);
|
417 |
}
|
418 |
|
419 |
async function* listMcpToolsStream(
|
420 |
tool: McpServerParams,
|
421 |
responseObject: IncompleteResponse
|
422 |
): AsyncGenerator<ResponseStreamEvent> {
|
423 |
+
const outputObject: ResponseOutputItem.McpListTools = {
|
424 |
+
id: generateUniqueId("mcpl"),
|
425 |
+
type: "mcp_list_tools",
|
426 |
+
server_label: tool.server_label,
|
427 |
+
tools: [],
|
428 |
+
};
|
429 |
+
responseObject.output.push(outputObject);
|
430 |
+
|
431 |
+
yield {
|
432 |
+
type: "response.output_item.added",
|
433 |
+
output_index: responseObject.output.length - 1,
|
434 |
+
item: outputObject,
|
435 |
+
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
436 |
+
};
|
437 |
+
|
438 |
yield {
|
439 |
type: "response.mcp_list_tools.in_progress",
|
440 |
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
|
|
447 |
type: "response.mcp_list_tools.completed",
|
448 |
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
449 |
};
|
450 |
+
outputObject.tools = mcpTools.tools.map((mcpTool) => ({
|
451 |
+
input_schema: mcpTool.inputSchema,
|
452 |
+
name: mcpTool.name,
|
453 |
+
annotations: mcpTool.annotations,
|
454 |
+
description: mcpTool.description,
|
455 |
+
}));
|
456 |
+
yield {
|
457 |
+
type: "response.output_item.done",
|
458 |
+
output_index: responseObject.output.length - 1,
|
459 |
+
item: outputObject,
|
460 |
+
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
461 |
+
};
|
462 |
} catch (error) {
|
463 |
const errorMessage = `Failed to list tools from MCP server '${tool.server_label}': ${error instanceof Error ? error.message : "Unknown error"}`;
|
464 |
console.error(errorMessage);
|
|
|
473 |
/*
|
474 |
* Call LLM and stream the response.
|
475 |
*/
|
476 |
+
async function* handleOneTurnStream(
|
477 |
apiKey: string | undefined,
|
478 |
payload: ChatCompletionInput,
|
479 |
responseObject: IncompleteResponse,
|
480 |
mcpToolsMapping: Record<string, McpServerParams>
|
481 |
): AsyncGenerator<ResponseStreamEvent> {
|
482 |
const stream = new InferenceClient(apiKey).chatCompletionStream(payload);
|
483 |
+
let previousInputTokens = responseObject.usage?.input_tokens ?? 0;
|
484 |
+
let previousOutputTokens = responseObject.usage?.output_tokens ?? 0;
|
485 |
+
let previousTotalTokens = responseObject.usage?.total_tokens ?? 0;
|
486 |
|
487 |
for await (const chunk of stream) {
|
488 |
if (chunk.usage) {
|
489 |
// Overwrite usage with the latest chunk's usage
|
490 |
responseObject.usage = {
|
491 |
+
input_tokens: previousInputTokens + chunk.usage.prompt_tokens,
|
492 |
input_tokens_details: { cached_tokens: 0 },
|
493 |
+
output_tokens: previousOutputTokens + chunk.usage.completion_tokens,
|
494 |
output_tokens_details: { reasoning_tokens: 0 },
|
495 |
+
total_tokens: previousTotalTokens + chunk.usage.total_tokens,
|
496 |
};
|
497 |
}
|
498 |
|
499 |
const delta = chunk.choices[0].delta;
|
500 |
+
|
501 |
if (delta.content) {
|
502 |
let currentOutputItem = responseObject.output.at(-1);
|
503 |
|
|
|
561 |
};
|
562 |
} else if (delta.tool_calls && delta.tool_calls.length > 0) {
|
563 |
if (delta.tool_calls.length > 1) {
|
564 |
+
console.log("Multiple tool calls are not supported. Only the first one will be processed.");
|
565 |
}
|
566 |
|
567 |
let currentOutputItem = responseObject.output.at(-1);
|
568 |
+
if (delta.tool_calls[0].function.name) {
|
569 |
+
const functionName = delta.tool_calls[0].function.name;
|
570 |
+
// Tool call with a name => new tool call
|
571 |
+
let newOutputObject:
|
572 |
+
| ResponseOutputItem.McpCall
|
573 |
+
| ResponseFunctionToolCall
|
574 |
+
| ResponseOutputItem.McpApprovalRequest;
|
575 |
+
if (functionName in mcpToolsMapping) {
|
576 |
+
if (requiresApproval(functionName, mcpToolsMapping)) {
|
577 |
+
newOutputObject = {
|
578 |
+
id: generateUniqueId("mcpr"),
|
579 |
+
type: "mcp_approval_request",
|
580 |
+
name: functionName,
|
581 |
+
server_label: mcpToolsMapping[functionName].server_label,
|
582 |
+
arguments: "",
|
583 |
+
};
|
584 |
+
} else {
|
585 |
+
newOutputObject = {
|
586 |
+
type: "mcp_call",
|
587 |
+
id: generateUniqueId("mcp"),
|
588 |
+
name: functionName,
|
589 |
+
server_label: mcpToolsMapping[functionName].server_label,
|
590 |
+
arguments: "",
|
591 |
+
};
|
592 |
+
}
|
593 |
+
} else {
|
594 |
+
newOutputObject = {
|
595 |
+
type: "function_call",
|
596 |
+
id: generateUniqueId("fc"),
|
597 |
+
call_id: delta.tool_calls[0].id,
|
598 |
+
name: functionName,
|
599 |
+
arguments: "",
|
600 |
+
};
|
601 |
}
|
602 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
603 |
// Response output item added event
|
604 |
responseObject.output.push(newOutputObject);
|
605 |
yield {
|
|
|
608 |
item: newOutputObject,
|
609 |
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
610 |
};
|
611 |
+
if (newOutputObject.type === "mcp_call") {
|
612 |
+
yield {
|
613 |
+
type: "response.mcp_call.in_progress",
|
614 |
+
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
615 |
+
item_id: newOutputObject.id,
|
616 |
+
output_index: responseObject.output.length - 1,
|
617 |
+
};
|
618 |
+
}
|
619 |
}
|
620 |
|
621 |
+
if (delta.tool_calls[0].function.arguments) {
|
622 |
+
// Current item is necessarily a tool call
|
623 |
+
currentOutputItem = responseObject.output.at(-1) as
|
624 |
+
| ResponseOutputItem.McpCall
|
625 |
+
| ResponseFunctionToolCall
|
626 |
+
| ResponseOutputItem.McpApprovalRequest;
|
627 |
+
currentOutputItem.arguments += delta.tool_calls[0].function.arguments;
|
628 |
+
if (currentOutputItem.type === "mcp_call" || currentOutputItem.type === "function_call") {
|
629 |
+
yield {
|
630 |
+
type:
|
631 |
+
currentOutputItem.type === "mcp_call"
|
632 |
+
? ("response.mcp_call_arguments.delta" as "response.mcp_call.arguments_delta") // bug workaround (see https://github.com/openai/openai-node/issues/1562)
|
633 |
+
: "response.function_call_arguments.delta",
|
634 |
+
item_id: currentOutputItem.id as string,
|
635 |
+
output_index: responseObject.output.length - 1,
|
636 |
+
delta: delta.tool_calls[0].function.arguments,
|
637 |
+
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
638 |
+
};
|
639 |
+
}
|
640 |
+
}
|
641 |
}
|
642 |
}
|
643 |
|
|
|
693 |
};
|
694 |
} else if (lastOutputItem?.type === "mcp_call") {
|
695 |
yield {
|
696 |
+
type: "response.mcp_call_arguments.done" as "response.mcp_call.arguments_done", // bug workaround (see https://github.com/openai/openai-node/issues/1562)
|
697 |
item_id: lastOutputItem.id as string,
|
698 |
output_index: responseObject.output.length - 1,
|
699 |
arguments: lastOutputItem.arguments,
|
700 |
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
701 |
};
|
702 |
+
|
703 |
+
// Call MCP tool
|
704 |
+
const toolParams = mcpToolsMapping[lastOutputItem.name];
|
705 |
+
const toolResult = await callMcpTool(toolParams, lastOutputItem.name, lastOutputItem.arguments);
|
706 |
+
if (toolResult.error) {
|
707 |
+
lastOutputItem.error = toolResult.error;
|
708 |
+
yield {
|
709 |
+
type: "response.mcp_call.failed",
|
710 |
+
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
711 |
+
};
|
712 |
+
} else {
|
713 |
+
lastOutputItem.output = toolResult.output;
|
714 |
+
yield {
|
715 |
+
type: "response.mcp_call.completed",
|
716 |
+
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
717 |
+
};
|
718 |
+
}
|
719 |
+
|
720 |
+
yield {
|
721 |
+
type: "response.output_item.done",
|
722 |
+
output_index: responseObject.output.length - 1,
|
723 |
+
item: lastOutputItem,
|
724 |
+
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
725 |
+
};
|
726 |
+
|
727 |
+
// Updating the payload for next LLM call
|
728 |
+
payload.messages.push(
|
729 |
+
{
|
730 |
+
role: "assistant",
|
731 |
+
tool_calls: [
|
732 |
+
{
|
733 |
+
id: lastOutputItem.id,
|
734 |
+
type: "function",
|
735 |
+
function: {
|
736 |
+
name: lastOutputItem.name,
|
737 |
+
arguments: lastOutputItem.arguments,
|
738 |
+
// Hacky: type is not correct in inference.js. Will fix it but in the meantime we need to cast it.
|
739 |
+
// TODO: fix it in the inference.js package. Should be "arguments" and not "parameters".
|
740 |
+
} as unknown as ChatCompletionInputFunctionDefinition,
|
741 |
+
},
|
742 |
+
],
|
743 |
+
},
|
744 |
+
{
|
745 |
+
role: "tool",
|
746 |
+
tool_call_id: lastOutputItem.id,
|
747 |
+
content: lastOutputItem.output
|
748 |
+
? lastOutputItem.output
|
749 |
+
: lastOutputItem.error
|
750 |
+
? `Error: ${lastOutputItem.error}`
|
751 |
+
: "",
|
752 |
+
}
|
753 |
+
);
|
754 |
+
} else if (lastOutputItem?.type === "mcp_approval_request") {
|
755 |
yield {
|
756 |
type: "response.output_item.done",
|
757 |
output_index: responseObject.output.length - 1,
|
|
|
771 |
*/
|
772 |
async function* callApprovedMCPToolStream(
|
773 |
approval_request_id: string,
|
774 |
+
mcpCallId: string,
|
775 |
approvalRequest: McpApprovalRequestParams | undefined,
|
776 |
mcpToolsMapping: Record<string, McpServerParams>,
|
777 |
+
responseObject: IncompleteResponse,
|
778 |
+
payload: ChatCompletionInput
|
779 |
): AsyncGenerator<ResponseStreamEvent> {
|
780 |
if (!approvalRequest) {
|
781 |
throw new Error(`MCP approval request '${approval_request_id}' not found`);
|
|
|
783 |
|
784 |
const outputObject: ResponseOutputItem.McpCall = {
|
785 |
type: "mcp_call",
|
786 |
+
id: mcpCallId,
|
787 |
name: approvalRequest.name,
|
788 |
server_label: approvalRequest.server_label,
|
789 |
arguments: approvalRequest.arguments,
|
|
|
814 |
type: "response.mcp_call.failed",
|
815 |
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
816 |
};
|
817 |
+
} else {
|
818 |
+
outputObject.output = toolResult.output;
|
819 |
+
yield {
|
820 |
+
type: "response.mcp_call.completed",
|
821 |
+
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
822 |
+
};
|
823 |
}
|
824 |
|
|
|
|
|
|
|
|
|
|
|
825 |
yield {
|
826 |
type: "response.output_item.done",
|
827 |
output_index: responseObject.output.length - 1,
|
828 |
item: outputObject,
|
829 |
sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
|
830 |
};
|
|
|
831 |
|
832 |
+
// Updating the payload for next LLM call
|
833 |
+
payload.messages.push(
|
834 |
+
{
|
835 |
+
role: "assistant",
|
836 |
+
tool_calls: [
|
837 |
+
{
|
838 |
+
id: outputObject.id,
|
839 |
+
type: "function",
|
840 |
+
function: {
|
841 |
+
name: outputObject.name,
|
842 |
+
arguments: outputObject.arguments,
|
843 |
+
// Hacky: type is not correct in inference.js. Will fix it but in the meantime we need to cast it.
|
844 |
+
// TODO: fix it in the inference.js package. Should be "arguments" and not "parameters".
|
845 |
+
} as unknown as ChatCompletionInputFunctionDefinition,
|
846 |
+
},
|
847 |
+
],
|
848 |
+
},
|
849 |
+
{
|
850 |
+
role: "tool",
|
851 |
+
tool_call_id: outputObject.id,
|
852 |
+
content: outputObject.output ? outputObject.output : outputObject.error ? `Error: ${outputObject.error}` : "",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
853 |
}
|
854 |
+
);
|
855 |
+
}
|
856 |
+
|
857 |
+
function requiresApproval(toolName: string, mcpToolsMapping: Record<string, McpServerParams>): boolean {
|
858 |
+
const toolParams = mcpToolsMapping[toolName];
|
859 |
+
return toolParams.require_approval === "always"
|
860 |
+
? true
|
861 |
+
: toolParams.require_approval === "never"
|
862 |
+
? false
|
863 |
+
: toolParams.require_approval.always?.tool_names?.includes(toolName)
|
864 |
+
? true
|
865 |
+
: toolParams.require_approval.never?.tool_names?.includes(toolName)
|
866 |
+
? false
|
867 |
+
: true; // behavior is undefined in specs, let's default to true
|
868 |
}
|
src/schemas.ts
CHANGED
@@ -125,7 +125,7 @@ export const createResponseParamsSchema = z.object({
|
|
125 |
output: z.string(),
|
126 |
type: z.literal("function_call_output"),
|
127 |
id: z.string().optional(),
|
128 |
-
status: z.enum(["in_progress", "completed", "incomplete"]),
|
129 |
}),
|
130 |
z.object({
|
131 |
type: z.literal("mcp_list_tools"),
|
|
|
125 |
output: z.string(),
|
126 |
type: z.literal("function_call_output"),
|
127 |
id: z.string().optional(),
|
128 |
+
status: z.enum(["in_progress", "completed", "incomplete"]).optional(),
|
129 |
}),
|
130 |
z.object({
|
131 |
type: z.literal("mcp_list_tools"),
|