Spaces:

Wauplin
/

responses.js

Running

App Files Files Community

Wauplin HF Staff commited on 5 days ago

Commit

db8d62b

verified ·

1 Parent(s): e121d54

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

src/mcp.ts +1 -14
src/routes/landingPageHtml.ts +27 -0
src/routes/responses.ts +583 -462
src/schemas.ts +11 -2

src/mcp.ts CHANGED Viewed

@@ -6,8 +6,6 @@ import { URL } from "url";
 import type { McpServerParams } from "./schemas";
 import { McpResultFormatter } from "./lib/McpResultFormatter";
-import { generateUniqueId } from "./lib/generateUniqueId";
-import type { ResponseOutputItem } from "openai/resources/responses/responses";
 export async function connectMcpServer(mcpServer: McpServerParams): Promise<Client> {
 	const mcp = new Client({ name: "@huggingface/responses.js", version: packageVersion });
@@ -37,9 +35,8 @@ export async function connectMcpServer(mcpServer: McpServerParams): Promise<Clie
 export async function callMcpTool(
 	mcpServer: McpServerParams,
 	toolName: string,
-	server_label: string,
 	argumentsString: string
-): Promise<ResponseOutputItem> {
 	try {
 		const client = await connectMcpServer(mcpServer);
 		const toolArgs: Record<string, unknown> = argumentsString === "" ? {} : JSON.parse(argumentsString);
@@ -47,22 +44,12 @@ export async function callMcpTool(
 		const toolResponse = await client.callTool({ name: toolName, arguments: toolArgs });
 		const formattedResult = McpResultFormatter.format(toolResponse);
 		return {
-			type: "mcp_call",
-			id: generateUniqueId("mcp_call"),
-			name: toolName,
-			server_label: server_label,
-			arguments: argumentsString,
 			output: formattedResult,
 		};
 	} catch (error) {
 		const errorMessage =
 			error instanceof Error ? error.message : typeof error === "string" ? error : JSON.stringify(error);
 		return {
-			type: "mcp_call",
-			id: generateUniqueId("mcp_call"),
-			name: toolName,
-			server_label: server_label,
-			arguments: argumentsString,
 			error: errorMessage,
 		};
 	}

 import type { McpServerParams } from "./schemas";
 import { McpResultFormatter } from "./lib/McpResultFormatter";
 export async function connectMcpServer(mcpServer: McpServerParams): Promise<Client> {
 	const mcp = new Client({ name: "@huggingface/responses.js", version: packageVersion });
 export async function callMcpTool(
 	mcpServer: McpServerParams,
 	toolName: string,
 	argumentsString: string
+): Promise<{ error: string; output?: undefined } | { error?: undefined; output: string }> {
 	try {
 		const client = await connectMcpServer(mcpServer);
 		const toolArgs: Record<string, unknown> = argumentsString === "" ? {} : JSON.parse(argumentsString);
 		const toolResponse = await client.callTool({ name: toolName, arguments: toolArgs });
 		const formattedResult = McpResultFormatter.format(toolResponse);
 		return {
 			output: formattedResult,
 		};
 	} catch (error) {
 		const errorMessage =
 			error instanceof Error ? error.message : typeof error === "string" ? error : JSON.stringify(error);
 		return {
 			error: errorMessage,
 		};
 	}

src/routes/landingPageHtml.ts CHANGED Viewed

@@ -498,6 +498,7 @@ export function getLandingPageHtml(req: Request, res: Response): void {
         <button class="examples-tab" type="button">Streaming</button>
         <button class="examples-tab" type="button">Function Calling</button>
         <button class="examples-tab" type="button">Structured Output</button>
       </div>
       <div class="example-panel active">
         <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-python">from openai import OpenAI
@@ -657,6 +658,32 @@ response = client.responses.parse(
 print(response.output_parsed)</code></pre>
       </div>
     </section>
     <footer class="more-info-footer">
       <div style="font-weight:600; color:var(--primary-dark); font-size:1.13em; margin-bottom:0.5em;">More Info</div>

         <button class="examples-tab" type="button">Streaming</button>
         <button class="examples-tab" type="button">Function Calling</button>
         <button class="examples-tab" type="button">Structured Output</button>
+        <button class="examples-tab" type="button">MCP</button>
       </div>
       <div class="example-panel active">
         <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-python">from openai import OpenAI
 print(response.output_parsed)</code></pre>
       </div>
+      <div class="example-panel">
+        <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-python">from openai import OpenAI
+import os
+client = OpenAI(
+    base_url="${baseUrl}",
+    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
+)
+response = client.responses.create(
+    model="cerebras@meta-llama/Llama-3.3-70B-Instruct",
+    input="how does tiktoken work?",
+    tools=[
+        {
+            "type": "mcp",
+            "server_label": "gitmcp",
+            "server_url": "https://gitmcp.io/openai/tiktoken",
+            "allowed_tools": ["search_tiktoken_documentation", "fetch_tiktoken_documentation"],
+            "require_approval": "never",
+        },
+    ],
+)
+for output in response.output:
+    print(output)</code></pre>
+      </div>
     </section>
     <footer class="more-info-footer">
       <div style="font-weight:600; color:var(--primary-dark); font-size:1.13em; margin-bottom:0.5em;">More Info</div>

src/routes/responses.ts CHANGED Viewed

@@ -17,10 +17,7 @@ import type {
 	ResponseFunctionToolCall,
 	ResponseOutputItem,
 } from "openai/resources/responses/responses";
-import type {
-	ChatCompletionInputTool,
-	ChatCompletionStreamOutputUsage,
-} from "@huggingface/tasks/dist/commonjs/tasks/chat-completion/inference.js";
 import { callMcpTool, connectMcpServer } from "../mcp.js";
 class StreamingError extends Error {
@@ -30,12 +27,129 @@ class StreamingError extends Error {
 	}
 }
 export const postCreateResponse = async (
 	req: ValidatedRequest<CreateResponseParams>,
 	res: ExpressResponse
 ): Promise<void> => {
-	const apiKey = req.headers.authorization?.split(" ")[1];
 	if (!apiKey) {
 		res.status(401).json({
 			success: false,
@@ -44,11 +158,111 @@ export const postCreateResponse = async (
 		return;
 	}
-	const client = new InferenceClient(apiKey);
 	const messages: ChatCompletionInputMessage[] = req.body.instructions
 		? [{ role: "system", content: req.body.instructions }]
 		: [];
 	if (Array.isArray(req.body.input)) {
 		messages.push(
 			...req.body.input
@@ -103,13 +317,20 @@ export const postCreateResponse = async (
 												.filter((item) => item !== undefined),
 							};
 						case "mcp_list_tools": {
-							// Hacky: will be dropped by filter
 							return {
 								role: "assistant",
 								name: "mcp_list_tools",
 								content: "",
 							};
 						}
 						case "mcp_approval_request": {
 							return {
 								role: "assistant",
@@ -132,105 +353,7 @@ export const postCreateResponse = async (
 		messages.push({ role: "user", content: req.body.input });
 	}
-	const output: ResponseOutputItem[] = [];
-	let tools: ChatCompletionInputTool[] | undefined = [];
-	const mcpToolsMapping: Record<string, McpServerParams> = {};
-	if (req.body.tools) {
-		await Promise.all(
-			req.body.tools.map(async (tool) => {
-				switch (tool.type) {
-					case "function":
-						tools?.push({
-							type: tool.type,
-							function: {
-								name: tool.name,
-								parameters: tool.parameters,
-								description: tool.description,
-								strict: tool.strict,
-							},
-						});
-						break;
-					case "mcp": {
-						let mcpListTools: ResponseOutputItem.McpListTools | undefined;
-						// If MCP list tools is already in the input, use it
-						if (Array.isArray(req.body.input)) {
-							for (const item of req.body.input) {
-								if (item.type === "mcp_list_tools" && item.server_label === tool.server_label) {
-									mcpListTools = item;
-									console.debug(`Using MCP list tools from input for server '${tool.server_label}'`);
-									break;
-								}
-							}
-						}
-						// Otherwise, list tools from MCP server
-						if (!mcpListTools) {
-							try {
-								const mcp = await connectMcpServer(tool);
-								console.debug("Listing MCP tools from server");
-								const mcpTools = await mcp.listTools();
-								console.debug(`Fetched ${mcpTools.tools.length} tools from MCP server '${tool.server_label}'`);
-								// All tools are returned in Response object
-								mcpListTools = {
-									id: generateUniqueId("mcp_list_tools"),
-									type: "mcp_list_tools",
-									server_label: tool.server_label,
-									tools: mcpTools.tools.map((mcpTool) => ({
-										input_schema: mcpTool.inputSchema,
-										name: mcpTool.name,
-										annotations: mcpTool.annotations,
-										description: mcpTool.description,
-									})),
-								};
-							} catch (error) {
-								console.error("Error listing tools from MCP server", error);
-								mcpListTools = {
-									id: generateUniqueId("mcp_list_tools"),
-									type: "mcp_list_tools",
-									server_label: tool.server_label,
-									tools: [],
-									error: `Failed to list tools from MCP server '${tool.server_label}': ${error instanceof Error ? error.message : "Unknown error"}`,
-								};
-							}
-							output.push(mcpListTools);
-						}
-						// Only allowed tools are forwarded to the LLM
-						const allowedTools = tool.allowed_tools
-							? Array.isArray(tool.allowed_tools)
-								? tool.allowed_tools
-								: tool.allowed_tools.tool_names
-							: [];
-						if (mcpListTools?.tools) {
-							for (const mcpTool of mcpListTools.tools) {
-								if (allowedTools.length === 0 || allowedTools.includes(mcpTool.name)) {
-									tools?.push({
-										type: "function" as const,
-										function: {
-											name: mcpTool.name,
-											parameters: mcpTool.input_schema,
-											description: mcpTool.description ?? undefined,
-										},
-									});
-								}
-								mcpToolsMapping[mcpTool.name] = tool;
-							}
-							break;
-						}
-					}
-				}
-			})
-		);
-	}
-	if (tools.length === 0) {
-		tools = undefined;
-	}
-	const model = req.body.model.includes("@") ? req.body.model.split("@")[1] : req.body.model;
-	const provider = req.body.model.includes("@") ? req.body.model.split("@")[0] : undefined;
 	const payload: ChatCompletionInput = {
 		// main params
 		model: model,
@@ -269,391 +392,389 @@ export const postCreateResponse = async (
 		top_p: req.body.top_p,
 	};
-	const responseObject: Omit<Response, "incomplete_details" | "output_text" | "parallel_tool_calls"> = {
-		created_at: Math.floor(new Date().getTime() / 1000),
-		error: null,
-		id: generateUniqueId("resp"),
-		instructions: req.body.instructions,
-		max_output_tokens: req.body.max_output_tokens,
-		metadata: req.body.metadata,
-		model: req.body.model,
-		object: "response",
-		output,
-		// parallel_tool_calls: req.body.parallel_tool_calls,
-		status: "in_progress",
-		text: req.body.text,
-		tool_choice: req.body.tool_choice ?? "auto",
-		tools: req.body.tools ?? [],
-		temperature: req.body.temperature,
-		top_p: req.body.top_p,
-		usage: {
-			input_tokens: 0,
-			input_tokens_details: { cached_tokens: 0 },
-			output_tokens: 0,
-			output_tokens_details: { reasoning_tokens: 0 },
-			total_tokens: 0,
-		},
-	};
-	// MCP approval requests => do not call LLM at all
-	if (Array.isArray(req.body.input)) {
-		for (const item of req.body.input) {
-			// Note: currently supporting only 1 mcp_approval_response per request
-			if (item.type === "mcp_approval_response" && item.approve) {
-				const approvalRequest = req.body.input.find(
-					(i) => i.type === "mcp_approval_request" && i.id === item.approval_request_id
-				) as McpApprovalRequestParams | undefined;
-				console.log("approvalRequest", approvalRequest);
-				if (approvalRequest) {
-					const toolParams = mcpToolsMapping[approvalRequest.name];
-					responseObject.output.push(
-						await callMcpTool(toolParams, approvalRequest.name, toolParams.server_label, approvalRequest.arguments)
-					);
-					responseObject.status = "completed";
-					res.json(responseObject);
-					return;
-				} else {
-					responseObject.status = "failed";
-					const errorMessage = `MCP approval response for approval request '${item.approval_request_id}' not found`;
-					console.error(errorMessage);
-					responseObject.error = {
-						code: "server_error",
-						message: errorMessage,
-					};
-					res.json(responseObject);
-					return;
-				}
-			}
-		}
 	}
-	// Streaming mode
-	if (req.body.stream) {
-		res.setHeader("Content-Type", "text/event-stream");
-		res.setHeader("Connection", "keep-alive");
-		let sequenceNumber = 0;
-		// Emit events in sequence
-		const emitEvent = (event: ResponseStreamEvent) => {
-			res.write(`data: ${JSON.stringify(event)}\n\n`);
 		};
-		try {
-			// Response created event
-			emitEvent({
-				type: "response.created",
-				response: responseObject as Response,
-				sequence_number: sequenceNumber++,
-			});
-			// Response in progress event
-			emitEvent({
-				type: "response.in_progress",
-				response: responseObject as Response,
-				sequence_number: sequenceNumber++,
-			});
-			const stream = client.chatCompletionStream(payload);
-			let usage: ChatCompletionStreamOutputUsage | undefined;
-			for await (const chunk of stream) {
-				if (chunk.usage) {
-					usage = chunk.usage;
-				}
-				if (chunk.choices[0].delta.content) {
-					if (responseObject.output.length === 0) {
-						const outputObject: ResponseOutputMessage = {
-							id: generateUniqueId("msg"),
-							type: "message",
-							role: "assistant",
-							status: "in_progress",
-							content: [],
-						};
-						responseObject.output = [outputObject];
-						// Response output item added event
-						emitEvent({
-							type: "response.output_item.added",
-							output_index: 0,
-							item: outputObject,
-							sequence_number: sequenceNumber++,
-						});
-					}
-					const outputObject = responseObject.output.at(-1);
-					if (!outputObject || outputObject.type !== "message") {
-						throw new StreamingError("Not implemented: only single output item type is supported in streaming mode.");
-					}
-					if (outputObject.content.length === 0) {
-						// Response content part added event
-						const contentPart: ResponseContentPartAddedEvent["part"] = {
-							type: "output_text",
-							text: "",
-							annotations: [],
-						};
-						outputObject.content.push(contentPart);
-						emitEvent({
-							type: "response.content_part.added",
-							item_id: outputObject.id,
-							output_index: 0,
-							content_index: 0,
-							part: contentPart,
-							sequence_number: sequenceNumber++,
-						});
-					}
-					const contentPart = outputObject.content.at(-1);
-					if (!contentPart || contentPart.type !== "output_text") {
-						throw new StreamingError("Not implemented: only output_text is supported in streaming mode.");
-					}
-					// Add text delta
-					contentPart.text += chunk.choices[0].delta.content;
-					emitEvent({
-						type: "response.output_text.delta",
-						item_id: outputObject.id,
-						output_index: 0,
-						content_index: 0,
-						delta: chunk.choices[0].delta.content,
-						sequence_number: sequenceNumber++,
-					});
-				} else if (chunk.choices[0].delta.tool_calls && chunk.choices[0].delta.tool_calls.length > 0) {
-					if (chunk.choices[0].delta.tool_calls.length > 1) {
-						throw new StreamingError("Not implemented: only single tool call is supported in streaming mode.");
-					}
-					if (responseObject.output.length === 0) {
-						if (!chunk.choices[0].delta.tool_calls[0].function.name) {
-							throw new StreamingError("Tool call function name is required.");
-						}
-						const outputObject: ResponseFunctionToolCall = {
-							type: "function_call",
-							id: generateUniqueId("fc"),
-							call_id: chunk.choices[0].delta.tool_calls[0].id,
-							name: chunk.choices[0].delta.tool_calls[0].function.name,
-							arguments: "",
-						};
-						responseObject.output = [outputObject];
-						// Response output item added event
-						emitEvent({
-							type: "response.output_item.added",
-							output_index: 0,
-							item: outputObject,
-							sequence_number: sequenceNumber++,
-						});
-					}
-					const outputObject = responseObject.output.at(-1);
-					if (!outputObject || !outputObject.id || outputObject.type !== "function_call") {
-						throw new StreamingError("Not implemented: can only support single output item type in streaming mode.");
-					}
-					outputObject.arguments += chunk.choices[0].delta.tool_calls[0].function.arguments;
-					emitEvent({
-						type: "response.function_call_arguments.delta",
-						item_id: outputObject.id,
-						output_index: 0,
-						delta: chunk.choices[0].delta.tool_calls[0].function.arguments,
-						sequence_number: sequenceNumber++,
-					});
 				}
-			}
-			const lastOutputItem = responseObject.output.at(-1);
-			if (lastOutputItem) {
-				if (lastOutputItem?.type === "message") {
-					const contentPart = lastOutputItem.content.at(-1);
-					if (contentPart?.type === "output_text") {
-						emitEvent({
-							type: "response.output_text.done",
-							item_id: lastOutputItem.id,
-							output_index: responseObject.output.length - 1,
-							content_index: lastOutputItem.content.length - 1,
-							text: contentPart.text,
-							sequence_number: sequenceNumber++,
-						});
-						emitEvent({
-							type: "response.content_part.done",
-							item_id: lastOutputItem.id,
-							output_index: responseObject.output.length - 1,
-							content_index: lastOutputItem.content.length - 1,
-							part: contentPart,
-							sequence_number: sequenceNumber++,
-						});
-					} else {
-						throw new StreamingError("Not implemented: only output_text is supported in streaming mode.");
-					}
-					// Response output item done event
-					lastOutputItem.status = "completed";
-					emitEvent({
-						type: "response.output_item.done",
-						output_index: responseObject.output.length - 1,
-						item: lastOutputItem,
-						sequence_number: sequenceNumber++,
-					});
-				} else if (lastOutputItem?.type === "function_call") {
-					if (!lastOutputItem.id) {
-						throw new StreamingError("Function call id is required.");
-					}
-					emitEvent({
-						type: "response.function_call_arguments.done",
-						item_id: lastOutputItem.id,
-						output_index: responseObject.output.length - 1,
-						arguments: lastOutputItem.arguments,
-						sequence_number: sequenceNumber++,
-					});
-					lastOutputItem.status = "completed";
-					emitEvent({
-						type: "response.output_item.done",
-						output_index: responseObject.output.length - 1,
-						item: lastOutputItem,
-						sequence_number: sequenceNumber++,
-					});
-				} else {
-					throw new StreamingError("Not implemented: only message output is supported in streaming mode.");
-				}
-			}
-			// Response completed event
-			responseObject.status = "completed";
-			if (usage) {
-				responseObject.usage = {
-					input_tokens: usage.prompt_tokens,
-					input_tokens_details: { cached_tokens: 0 },
-					output_tokens: usage.completion_tokens,
-					output_tokens_details: { reasoning_tokens: 0 },
-					total_tokens: usage.total_tokens,
 				};
 			}
-			emitEvent({
-				type: "response.completed",
-				response: responseObject as Response,
-				sequence_number: sequenceNumber++,
-			});
-		} catch (streamError) {
-			console.error("Error in streaming chat completion:", streamError);
-			let message = "An error occurred while streaming from inference server.";
-			if (streamError instanceof StreamingError) {
-				message = streamError.message;
-			} else if (
-				typeof streamError === "object" &&
-				streamError &&
-				"message" in streamError &&
-				typeof streamError.message === "string"
-			) {
-				message = streamError.message;
-			}
-			responseObject.status = "failed";
-			responseObject.error = {
-				code: "server_error",
-				message,
 			};
-			emitEvent({
-				type: "response.failed",
-				response: responseObject as Response,
-				sequence_number: sequenceNumber++,
-			});
 		}
-		res.end();
-		return;
 	}
-	try {
-		const chatCompletionResponse = await client.chatCompletion(payload);
-		responseObject.status = "completed";
-		for (const choice of chatCompletionResponse.choices) {
-			if (choice.message.content) {
-				responseObject.output.push({
-					id: generateUniqueId("msg"),
-					type: "message",
-					role: "assistant",
-					status: "completed",
-					content: [
-						{
-							type: "output_text",
-							text: choice.message.content,
-							annotations: [],
-						},
-					],
-				});
-			}
-			if (choice.message.tool_calls) {
-				for (const toolCall of choice.message.tool_calls) {
-					if (toolCall.function.name in mcpToolsMapping) {
-						const toolParams = mcpToolsMapping[toolCall.function.name];
-						// Check if approval is required
-						const approvalRequired =
-							toolParams.require_approval === "always"
 								? true
-								: toolParams.require_approval === "never"
 									? false
-									: toolParams.require_approval.always?.tool_names?.includes(toolCall.function.name)
-										? true
-										: toolParams.require_approval.never?.tool_names?.includes(toolCall.function.name)
-											? false
-											: true; // behavior is undefined in specs, let's default to
-						if (approvalRequired) {
-							// TODO: Implement approval logic
-							console.log(`Requesting approval for MCP tool '${toolCall.function.name}'`);
-							responseObject.output.push({
-								type: "mcp_approval_request",
-								id: generateUniqueId("mcp_approval_request"),
-								name: toolCall.function.name,
-								server_label: toolParams.server_label,
-								arguments: toolCall.function.arguments,
-							});
-						} else {
-							responseObject.output.push(
-								await callMcpTool(
-									toolParams,
-									toolCall.function.name,
-									toolParams.server_label,
-									toolCall.function.arguments
-								)
-							);
-						}
 					} else {
-						responseObject.output.push({
-							type: "function_call",
-							id: generateUniqueId("fc"),
-							call_id: toolCall.id,
-							name: toolCall.function.name,
-							arguments: toolCall.function.arguments,
-							status: "completed",
-						});
 					}
 				}
 			}
 		}
-		responseObject.usage = {
-			input_tokens: chatCompletionResponse.usage.prompt_tokens,
-			input_tokens_details: { cached_tokens: 0 },
-			output_tokens: chatCompletionResponse.usage.completion_tokens,
-			output_tokens_details: { reasoning_tokens: 0 },
-			total_tokens: chatCompletionResponse.usage.total_tokens,
-		};
-		res.json(responseObject);
-	} catch (error) {
-		console.error(error);
-		res.status(500).json({
-			success: false,
-			error: error instanceof Error ? error.message : "Unknown error",
-		});
 	}
-};

 	ResponseFunctionToolCall,
 	ResponseOutputItem,
 } from "openai/resources/responses/responses";
+import type { ChatCompletionInputTool } from "@huggingface/tasks/dist/commonjs/tasks/chat-completion/inference.js";
 import { callMcpTool, connectMcpServer } from "../mcp.js";
 class StreamingError extends Error {
 	}
 }
+type IncompleteResponse = Omit<Response, "incomplete_details" | "output_text" | "parallel_tool_calls">;
+const SEQUENCE_NUMBER_PLACEHOLDER = -1;
 export const postCreateResponse = async (
 	req: ValidatedRequest<CreateResponseParams>,
 	res: ExpressResponse
 ): Promise<void> => {
+	// To avoid duplicated code, we run all requests as stream.
+	const events = runCreateResponseStream(req, res);
+	// Then we return in the correct format depending on the user 'stream' flag.
+	if (req.body.stream) {
+		res.setHeader("Content-Type", "text/event-stream");
+		res.setHeader("Connection", "keep-alive");
+		console.debug("Stream request");
+		for await (const event of events) {
+			console.debug(`Event #${event.sequence_number}: ${event.type}`);
+			res.write(`data: ${JSON.stringify(event)}\n\n`);
+		}
+		res.end();
+	} else {
+		console.debug("Non-stream request");
+		for await (const event of events) {
+			if (event.type === "response.completed" || event.type === "response.failed") {
+				console.debug(event.type);
+				res.json(event.response);
+			}
+		}
+	}
+};
+/*
+ * Top-level stream.
+ *
+ * Handles response lifecycle + execute inner logic (MCP list tools, MCP tool calls, LLM call, etc.).
+ * Handles sequenceNumber by overwriting it in the events.
+ */
+async function* runCreateResponseStream(
+	req: ValidatedRequest<CreateResponseParams>,
+	res: ExpressResponse
+): AsyncGenerator<ResponseStreamEvent> {
+	let sequenceNumber = 0;
+	// Prepare response object that will be iteratively populated
+	const responseObject: IncompleteResponse = {
+		created_at: Math.floor(new Date().getTime() / 1000),
+		error: null,
+		id: generateUniqueId("resp"),
+		instructions: req.body.instructions,
+		max_output_tokens: req.body.max_output_tokens,
+		metadata: req.body.metadata,
+		model: req.body.model,
+		object: "response",
+		output: [],
+		// parallel_tool_calls: req.body.parallel_tool_calls,
+		status: "in_progress",
+		text: req.body.text,
+		tool_choice: req.body.tool_choice ?? "auto",
+		tools: req.body.tools ?? [],
+		temperature: req.body.temperature,
+		top_p: req.body.top_p,
+		usage: {
+			input_tokens: 0,
+			input_tokens_details: { cached_tokens: 0 },
+			output_tokens: 0,
+			output_tokens_details: { reasoning_tokens: 0 },
+			total_tokens: 0,
+		},
+	};
+	// Response created event
+	yield {
+		type: "response.created",
+		response: responseObject as Response,
+		sequence_number: sequenceNumber++,
+	};
+	// Response in progress event
+	yield {
+		type: "response.in_progress",
+		response: responseObject as Response,
+		sequence_number: sequenceNumber++,
+	};
+	// Any events (LLM call, MCP call, list tools, etc.)
+	try {
+		for await (const event of innerRunStream(req, res, responseObject)) {
+			yield { ...event, sequence_number: sequenceNumber++ };
+		}
+	} catch (error) {
+		// Error event => stop
+		console.error("Error in stream:", error);
+		const message =
+			typeof error === "object" && error && "message" in error && typeof error.message === "string"
+				? error.message
+				: "An error occurred in stream";
+		responseObject.status = "failed";
+		responseObject.error = {
+			code: "server_error",
+			message,
+		};
+		yield {
+			type: "response.failed",
+			response: responseObject as Response,
+			sequence_number: sequenceNumber++,
+		};
+		return;
+	}
+	// Response completed event
+	yield {
+		type: "response.completed",
+		response: responseObject as Response,
+		sequence_number: sequenceNumber++,
+	};
+}
+async function* innerRunStream(
+	req: ValidatedRequest<CreateResponseParams>,
+	res: ExpressResponse,
+	responseObject: IncompleteResponse
+): AsyncGenerator<ResponseStreamEvent> {
+	// Retrieve API key from headers
+	const apiKey = req.headers.authorization?.split(" ")[1];
 	if (!apiKey) {
 		res.status(401).json({
 			success: false,
 		return;
 	}
+	// List MCP tools from server (if required) + prepare tools for the LLM
+	let tools: ChatCompletionInputTool[] | undefined = [];
+	const mcpToolsMapping: Record<string, McpServerParams> = {};
+	if (req.body.tools) {
+		for (const tool of req.body.tools) {
+			switch (tool.type) {
+				case "function":
+					tools?.push({
+						type: tool.type,
+						function: {
+							name: tool.name,
+							parameters: tool.parameters,
+							description: tool.description,
+							strict: tool.strict,
+						},
+					});
+					break;
+				case "mcp": {
+					let mcpListTools: ResponseOutputItem.McpListTools | undefined;
+					// If MCP list tools is already in the input, use it
+					if (Array.isArray(req.body.input)) {
+						for (const item of req.body.input) {
+							if (item.type === "mcp_list_tools" && item.server_label === tool.server_label) {
+								mcpListTools = item;
+								console.debug(`Using MCP list tools from input for server '${tool.server_label}'`);
+								break;
+							}
+						}
+					}
+					// Otherwise, list tools from MCP server
+					if (!mcpListTools) {
+						for await (const event of listMcpToolsStream(tool, responseObject)) {
+							yield event;
+						}
+						mcpListTools = responseObject.output.at(-1) as ResponseOutputItem.McpListTools;
+					}
+					// Only allowed tools are forwarded to the LLM
+					const allowedTools = tool.allowed_tools
+						? Array.isArray(tool.allowed_tools)
+							? tool.allowed_tools
+							: tool.allowed_tools.tool_names
+						: [];
+					if (mcpListTools?.tools) {
+						for (const mcpTool of mcpListTools.tools) {
+							if (allowedTools.length === 0 || allowedTools.includes(mcpTool.name)) {
+								tools?.push({
+									type: "function" as const,
+									function: {
+										name: mcpTool.name,
+										parameters: mcpTool.input_schema,
+										description: mcpTool.description ?? undefined,
+									},
+								});
+							}
+							mcpToolsMapping[mcpTool.name] = tool;
+						}
+						break;
+					}
+				}
+			}
+		}
+	}
+	if (tools.length === 0) {
+		tools = undefined;
+	}
+	// If MCP approval requests => execute them and return (no LLM call)
+	if (Array.isArray(req.body.input)) {
+		for (const item of req.body.input) {
+			// Note: currently supporting only 1 mcp_approval_response per request
+			let shouldStop = false;
+			if (item.type === "mcp_approval_response" && item.approve) {
+				const approvalRequest = req.body.input.find(
+					(i) => i.type === "mcp_approval_request" && i.id === item.approval_request_id
+				) as McpApprovalRequestParams | undefined;
+				for await (const event of callApprovedMCPToolStream(
+					item.approval_request_id,
+					approvalRequest,
+					mcpToolsMapping,
+					responseObject
+				)) {
+					yield event;
+				}
+				shouldStop = true;
+			}
+			if (shouldStop) {
+				// stop if at least one approval request is processed
+				break;
+			}
+		}
+	}
+	// At this point, we have all tools and we know we want to call the LLM
+	// Let's prepare the payload and make the call!
+	// Resolve model and provider
+	const model = req.body.model.includes("@") ? req.body.model.split("@")[1] : req.body.model;
+	const provider = req.body.model.includes("@") ? req.body.model.split("@")[0] : undefined;
+	// Format input to Chat Completion format
 	const messages: ChatCompletionInputMessage[] = req.body.instructions
 		? [{ role: "system", content: req.body.instructions }]
 		: [];
 	if (Array.isArray(req.body.input)) {
 		messages.push(
 			...req.body.input
 												.filter((item) => item !== undefined),
 							};
 						case "mcp_list_tools": {
+							// Hacky: will be dropped by filter since tools are passed as separate objects
 							return {
 								role: "assistant",
 								name: "mcp_list_tools",
 								content: "",
 							};
 						}
+						case "mcp_call": {
+							return {
+								role: "assistant",
+								name: "mcp_call",
+								content: `MCP call (${item.id}). Server: '${item.server_label}'. Tool: '${item.name}'. Arguments: '${item.arguments}'.`,
+							};
+						}
 						case "mcp_approval_request": {
 							return {
 								role: "assistant",
 		messages.push({ role: "user", content: req.body.input });
 	}
+	// Prepare payload for the LLM
 	const payload: ChatCompletionInput = {
 		// main params
 		model: model,
 		top_p: req.body.top_p,
 	};
+	// Call LLM
+	for await (const event of callLLMStream(apiKey, payload, responseObject, mcpToolsMapping)) {
+		yield event;
+	}
+	// Handle MCP tool calls if any
+	for await (const event of handleMCPToolCallsAfterLLM(responseObject, mcpToolsMapping)) {
+		yield event;
 	}
+}
+async function* listMcpToolsStream(
+	tool: McpServerParams,
+	responseObject: IncompleteResponse
+): AsyncGenerator<ResponseStreamEvent> {
+	yield {
+		type: "response.mcp_list_tools.in_progress",
+		sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+	};
+	try {
+		const mcp = await connectMcpServer(tool);
+		const mcpTools = await mcp.listTools();
+		yield {
+			type: "response.mcp_list_tools.completed",
+			sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
 		};
+		responseObject.output.push({
+			id: generateUniqueId("mcp_list_tools"),
+			type: "mcp_list_tools",
+			server_label: tool.server_label,
+			tools: mcpTools.tools.map((mcpTool) => ({
+				input_schema: mcpTool.inputSchema,
+				name: mcpTool.name,
+				annotations: mcpTool.annotations,
+				description: mcpTool.description,
+			})),
+		});
+	} catch (error) {
+		const errorMessage = `Failed to list tools from MCP server '${tool.server_label}': ${error instanceof Error ? error.message : "Unknown error"}`;
+		console.error(errorMessage);
+		yield {
+			type: "response.mcp_list_tools.failed",
+			sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+		};
+		throw new Error(errorMessage);
+	}
+}
+/*
+ * Call LLM and stream the response.
+ */
+async function* callLLMStream(
+	apiKey: string | undefined,
+	payload: ChatCompletionInput,
+	responseObject: IncompleteResponse,
+	mcpToolsMapping: Record<string, McpServerParams>
+): AsyncGenerator<ResponseStreamEvent> {
+	const stream = new InferenceClient(apiKey).chatCompletionStream(payload);
+	for await (const chunk of stream) {
+		if (chunk.usage) {
+			// Overwrite usage with the latest chunk's usage
+			responseObject.usage = {
+				input_tokens: chunk.usage.prompt_tokens,
+				input_tokens_details: { cached_tokens: 0 },
+				output_tokens: chunk.usage.completion_tokens,
+				output_tokens_details: { reasoning_tokens: 0 },
+				total_tokens: chunk.usage.total_tokens,
+			};
+		}
+		const delta = chunk.choices[0].delta;
+		if (delta.content) {
+			let currentOutputItem = responseObject.output.at(-1);
+			// If start of a new message, create it
+			if (currentOutputItem?.type !== "message" || currentOutputItem?.status !== "in_progress") {
+				const outputObject: ResponseOutputMessage = {
+					id: generateUniqueId("msg"),
+					type: "message",
+					role: "assistant",
+					status: "in_progress",
+					content: [],
+				};
+				responseObject.output.push(outputObject);
+				// Response output item added event
+				yield {
+					type: "response.output_item.added",
+					output_index: 0,
+					item: outputObject,
+					sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+				};
+			}
+			// If start of a new content part, create it
+			currentOutputItem = responseObject.output.at(-1) as ResponseOutputMessage;
+			if (currentOutputItem.content.length === 0) {
+				// Response content part added event
+				const contentPart: ResponseContentPartAddedEvent["part"] = {
+					type: "output_text",
+					text: "",
+					annotations: [],
+				};
+				currentOutputItem.content.push(contentPart);
+				yield {
+					type: "response.content_part.added",
+					item_id: currentOutputItem.id,
+					output_index: responseObject.output.length - 1,
+					content_index: currentOutputItem.content.length - 1,
+					part: contentPart,
+					sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+				};
+			}
+			const contentPart = currentOutputItem.content.at(-1);
+			if (!contentPart || contentPart.type !== "output_text") {
+				throw new StreamingError(
+					`Not implemented: only output_text is supported in response.output[].content[].type. Got ${contentPart?.type}`
+				);
+			}
+			// Add text delta
+			contentPart.text += delta.content;
+			yield {
+				type: "response.output_text.delta",
+				item_id: currentOutputItem.id,
+				output_index: responseObject.output.length - 1,
+				content_index: currentOutputItem.content.length - 1,
+				delta: delta.content,
+				sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+			};
+		} else if (delta.tool_calls && delta.tool_calls.length > 0) {
+			if (delta.tool_calls.length > 1) {
+				throw new StreamingError("Not implemented: multiple tool calls are not supported.");
+			}
+			let currentOutputItem = responseObject.output.at(-1);
+			if (currentOutputItem?.type !== "mcp_call" && currentOutputItem?.type !== "function_call") {
+				if (!delta.tool_calls[0].function.name) {
+					throw new StreamingError("Tool call function name is required when starting a new tool call.");
 				}
+				const newOutputObject: ResponseOutputItem.McpCall | ResponseFunctionToolCall =
+					delta.tool_calls[0].function.name in mcpToolsMapping
+						? {
+								type: "mcp_call",
+								id: generateUniqueId("mcp_call"),
+								name: delta.tool_calls[0].function.name,
+								server_label: mcpToolsMapping[delta.tool_calls[0].function.name].server_label,
+								arguments: "",
+							}
+						: {
+								type: "function_call",
+								id: generateUniqueId("fc"),
+								call_id: delta.tool_calls[0].id,
+								name: delta.tool_calls[0].function.name,
+								arguments: "",
+							};
+				// Response output item added event
+				responseObject.output.push(newOutputObject);
+				yield {
+					type: "response.output_item.added",
+					output_index: responseObject.output.length - 1,
+					item: newOutputObject,
+					sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+				};
+			}
+			// Current item is necessarily a tool call
+			currentOutputItem = responseObject.output.at(-1) as ResponseOutputItem.McpCall | ResponseFunctionToolCall;
+			currentOutputItem.arguments += delta.tool_calls[0].function.arguments;
+			yield {
+				type:
+					currentOutputItem.type === "mcp_call"
+						? "response.mcp_call.arguments_delta"
+						: "response.function_call_arguments.delta",
+				item_id: currentOutputItem.id as string,
+				output_index: responseObject.output.length - 1,
+				delta: delta.tool_calls[0].function.arguments,
+				sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+			};
+		}
+	}
+	const lastOutputItem = responseObject.output.at(-1);
+	if (lastOutputItem) {
+		if (lastOutputItem?.type === "message") {
+			const contentPart = lastOutputItem.content.at(-1);
+			if (contentPart?.type === "output_text") {
+				yield {
+					type: "response.output_text.done",
+					item_id: lastOutputItem.id,
+					output_index: responseObject.output.length - 1,
+					content_index: lastOutputItem.content.length - 1,
+					text: contentPart.text,
+					sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+				};
+				yield {
+					type: "response.content_part.done",
+					item_id: lastOutputItem.id,
+					output_index: responseObject.output.length - 1,
+					content_index: lastOutputItem.content.length - 1,
+					part: contentPart,
+					sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
 				};
+			} else {
+				throw new StreamingError("Not implemented: only output_text is supported in streaming mode.");
 			}
+			// Response output item done event
+			lastOutputItem.status = "completed";
+			yield {
+				type: "response.output_item.done",
+				output_index: responseObject.output.length - 1,
+				item: lastOutputItem,
+				sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+			};
+		} else if (lastOutputItem?.type === "function_call") {
+			yield {
+				type: "response.function_call_arguments.done",
+				item_id: lastOutputItem.id as string,
+				output_index: responseObject.output.length - 1,
+				arguments: lastOutputItem.arguments,
+				sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+			};
+			lastOutputItem.status = "completed";
+			yield {
+				type: "response.output_item.done",
+				output_index: responseObject.output.length - 1,
+				item: lastOutputItem,
+				sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
 			};
+		} else if (lastOutputItem?.type === "mcp_call") {
+			yield {
+				type: "response.mcp_call.arguments_done",
+				item_id: lastOutputItem.id as string,
+				output_index: responseObject.output.length - 1,
+				arguments: lastOutputItem.arguments,
+				sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+			};
+			yield {
+				type: "response.output_item.done",
+				output_index: responseObject.output.length - 1,
+				item: lastOutputItem,
+				sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+			};
+		} else {
+			throw new StreamingError(
+				`Not implemented: expected message, function_call, or mcp_call, got ${lastOutputItem?.type}`
+			);
 		}
 	}
+}
+/*
+ * Perform an approved MCP tool call and stream the response.
+ */
+async function* callApprovedMCPToolStream(
+	approval_request_id: string,
+	approvalRequest: McpApprovalRequestParams | undefined,
+	mcpToolsMapping: Record<string, McpServerParams>,
+	responseObject: IncompleteResponse
+): AsyncGenerator<ResponseStreamEvent> {
+	if (!approvalRequest) {
+		throw new Error(`MCP approval request '${approval_request_id}' not found`);
+	}
+	const outputObject: ResponseOutputItem.McpCall = {
+		type: "mcp_call",
+		id: generateUniqueId("mcp_call"),
+		name: approvalRequest.name,
+		server_label: approvalRequest.server_label,
+		arguments: approvalRequest.arguments,
+	};
+	responseObject.output.push(outputObject);
+	// Response output item added event
+	yield {
+		type: "response.output_item.added",
+		output_index: responseObject.output.length - 1,
+		item: outputObject,
+		sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+	};
+	yield {
+		type: "response.mcp_call.in_progress",
+		item_id: outputObject.id,
+		output_index: responseObject.output.length - 1,
+		sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+	};
+	const toolParams = mcpToolsMapping[approvalRequest.name];
+	const toolResult = await callMcpTool(toolParams, approvalRequest.name, approvalRequest.arguments);
+	if (toolResult.error) {
+		outputObject.error = toolResult.error;
+		yield {
+			type: "response.mcp_call.failed",
+			sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+		};
+		throw new Error(outputObject.error);
+	}
+	outputObject.output = toolResult.output;
+	yield {
+		type: "response.mcp_call.completed",
+		sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+	};
+	yield {
+		type: "response.output_item.done",
+		output_index: responseObject.output.length - 1,
+		item: outputObject,
+		sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+	};
+}
+async function* handleMCPToolCallsAfterLLM(
+	responseObject: IncompleteResponse,
+	mcpToolsMapping: Record<string, McpServerParams>
+): AsyncGenerator<ResponseStreamEvent> {
+	for (let output_index = 0; output_index < responseObject.output.length; output_index++) {
+		const outputItem = responseObject.output[output_index];
+		if (outputItem.type === "mcp_call") {
+			const toolCall = outputItem as ResponseOutputItem.McpCall;
+			const toolParams = mcpToolsMapping[toolCall.name];
+			if (toolParams) {
+				const approvalRequired =
+					toolParams.require_approval === "always"
+						? true
+						: toolParams.require_approval === "never"
+							? false
+							: toolParams.require_approval.always?.tool_names?.includes(toolCall.name)
 								? true
+								: toolParams.require_approval.never?.tool_names?.includes(toolCall.name)
 									? false
+									: true; // behavior is undefined in specs, let's default to
+				if (approvalRequired) {
+					const approvalRequest: ResponseOutputItem.McpApprovalRequest = {
+						type: "mcp_approval_request",
+						id: generateUniqueId("mcp_approval_request"),
+						name: toolCall.name,
+						server_label: toolParams.server_label,
+						arguments: toolCall.arguments,
+					};
+					responseObject.output.push(approvalRequest);
+					yield {
+						type: "response.output_item.added",
+						output_index: responseObject.output.length,
+						item: approvalRequest,
+						sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+					};
+				} else {
+					responseObject.output.push;
+					yield {
+						type: "response.mcp_call.in_progress",
+						item_id: toolCall.id,
+						sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+						output_index,
+					};
+					const toolResult = await callMcpTool(toolParams, toolCall.name, toolCall.arguments);
+					if (toolResult.error) {
+						toolCall.error = toolResult.error;
+						yield {
+							type: "response.mcp_call.failed",
+							sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+						};
+						throw new Error(toolCall.error);
 					} else {
+						toolCall.output = toolResult.output;
+						yield {
+							type: "response.mcp_call.completed",
+							sequence_number: SEQUENCE_NUMBER_PLACEHOLDER,
+						};
 					}
 				}
 			}
 		}
 	}
+}

src/schemas.ts CHANGED Viewed

@@ -60,10 +60,17 @@ const mcpApprovalRequestParamsSchema = z.object({
 });
 const mcpApprovalResponseParamsSchema = z.object({
 	type: z.literal("mcp_approval_response"),
-	id: z.string().optional(),
 	approval_request_id: z.string(),
 	approve: z.boolean(),
-	reason: z.string().optional(),
 });
 export const createResponseParamsSchema = z.object({
@@ -136,6 +143,7 @@ export const createResponseParamsSchema = z.object({
 				}),
 				mcpApprovalRequestParamsSchema,
 				mcpApprovalResponseParamsSchema,
 			])
 		),
 	]),
@@ -215,3 +223,4 @@ export type CreateResponseParams = z.infer<typeof createResponseParamsSchema>;
 export type McpServerParams = z.infer<typeof mcpServerParamsSchema>;
 export type McpApprovalRequestParams = z.infer<typeof mcpApprovalRequestParamsSchema>;
 export type McpApprovalResponseParams = z.infer<typeof mcpApprovalResponseParamsSchema>;

 });
 const mcpApprovalResponseParamsSchema = z.object({
 	type: z.literal("mcp_approval_response"),
+	id: z.string().nullable().default(null),
 	approval_request_id: z.string(),
 	approve: z.boolean(),
+	reason: z.string().nullable().default(null),
+});
+const mcpCallParamsSchema = z.object({
+	type: z.literal("mcp_call"),
+	id: z.string(),
+	name: z.string(),
+	server_label: z.string(),
+	arguments: z.string(),
 });
 export const createResponseParamsSchema = z.object({
 				}),
 				mcpApprovalRequestParamsSchema,
 				mcpApprovalResponseParamsSchema,
+				mcpCallParamsSchema,
 			])
 		),
 	]),
 export type McpServerParams = z.infer<typeof mcpServerParamsSchema>;
 export type McpApprovalRequestParams = z.infer<typeof mcpApprovalRequestParamsSchema>;
 export type McpApprovalResponseParams = z.infer<typeof mcpApprovalResponseParamsSchema>;
+export type McpCallParams = z.infer<typeof mcpCallParamsSchema>;