Spaces:

Wauplin
/

responses.js

Running

App Files Files Community

Wauplin HF Staff commited on 6 days ago

Commit

3d97d52

verified ·

1 Parent(s): fc023ef

Upload folder using huggingface_hub

Browse files

Files changed (16) hide show

Dockerfile +40 -0
README.md +176 -6
package.json +80 -0
pnpm-lock.yaml +0 -0
src/index.ts +26 -0
src/lib/McpResultFormatter.ts +92 -0
src/lib/generateUniqueId.ts +11 -0
src/mcp.ts +69 -0
src/middleware/logging.ts +68 -0
src/middleware/validation.ts +42 -0
src/routes/index.ts +2 -0
src/routes/landingPageHtml.ts +567 -0
src/routes/responses.ts +659 -0
src/schemas.ts +217 -0
src/server.ts +20 -0
tsconfig.json +21 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,40 @@

+# ---- Build Stage ----
+FROM node:18-alpine AS builder
+# Install pnpm as root
+RUN corepack enable && corepack prepare [email protected] --activate
+USER node
+ENV HOME=/home/node \
+    PATH=/home/node/.local/bin:$PATH
+WORKDIR $HOME/app
+# Install dependencies and build
+COPY --chown=node package.json pnpm-lock.yaml* ./
+COPY --chown=node tsconfig.json ./
+COPY --chown=node src ./src
+RUN pnpm install --frozen-lockfile
+RUN pnpm run build
+RUN chown -R node:node $HOME/app
+# ---- Production Stage ----
+FROM node:18-alpine AS runner
+# No need to install pnpm here, just switch to node user
+USER node
+ENV HOME=/home/node \
+    PATH=/home/node/.local/bin:$PATH
+# Create app directory
+WORKDIR $HOME/app
+# Copy only necessary files from builder
+COPY --chown=node --from=builder /home/node/app/dist ./dist
+COPY --chown=node --from=builder /home/node/app/package.json ./
+COPY --chown=node --from=builder /home/node/app/node_modules ./node_modules
+# Use a non-root user for security
+EXPOSE 3000
+CMD ["node", "dist/index.js"]

README.md CHANGED Viewed

@@ -1,12 +1,182 @@
 ---
 title: Responses.js
-emoji: 🦀
 colorFrom: red
-colorTo: yellow
-sdk: gradio
-sdk_version: 5.35.0
-app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Responses.js
+emoji: 😻
 colorFrom: red
+colorTo: red
+sdk: docker
 pinned: false
+license: mit
+short_description: Check out https://github.com/huggingface/responses.js
+app_port: 3000
 ---
+# responses.js
+A lightweight Express.js server that implements OpenAI's Responses API, built on top of Chat Completions and powered by Hugging Face Inference Providers.
+## ✨ Features
+- **ResponsesAPI**: Partial implementation of [OpenAI's Responses API](https://platform.openai.com/docs/api-reference/responses), on top of Chat Completion API
+- **Inference Providers**: Powered by Hugging Face Inference Providers
+- **Streaming Support**: Support for streamed responses
+- **Structured Output**: Support for structured data responses (e.g. jsonschema)
+- **Function Calling**: Tool and function calling capabilities
+- **Multi-modal Input**: Text and image input support
+- **Demo UI**: Interactive web interface for testing
+Not implemented: remote function calling, MCP server, file upload, stateful API, etc.
+## 🚀 Quick Start
+### Prerequisites
+- Node.js (v18 or higher)
+- pnpm (recommended) or npm
+- an Hugging Face token with inference permissions. Create one from your [user settings](https://huggingface.co/settings/tokens).
+### Installation & Setup
+```bash
+# Clone the repository
+git clone https://github.com/huggingface/responses.js.git
+cd responses.js
+# Install dependencies
+pnpm install
+# Start the development server
+pnpm dev
+```
+The server will be available at `http://localhost:3000`.
+### Running Examples
+Explore the various capabilities with our example scripts located in the [./examples](./examples) folder:
+```bash
+# Basic text input
+pnpm run example text
+# Multi-turn conversations
+pnpm run example multi_turn
+# Text + image input
+pnpm run example image
+# Streaming responses
+pnpm run example streaming
+# Structured output
+pnpm run example structured_output
+pnpm run example structured_output_streaming
+# Function calling
+pnpm run example function
+pnpm run example function_streaming
+```
+### Interactive Demo UI
+Experience the API through our interactive web interface, adapted from the [openai-responses-starter-app](https://github.com/openai/openai-responses-starter-app).
+[![Demo Video](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/responses.js/demo_mini.png)](https://youtu.be/F-tAUnW-nd0)
+#### Setup
+1. Create a configuration file:
+```bash
+# Create demo/.env
+cat > demo/.env << EOF
+MODEL="cohere@CohereLabs/c4ai-command-a-03-2025"
+OPENAI_BASE_URL=http://localhost:3000/v1
+OPENAI_API_KEY=${HF_TOKEN:-<your-huggingface-token>}
+EOF
+```
+2. Install demo dependencies:
+```bash
+pnpm demo:install
+```
+3. Launch the demo:
+```bash
+pnpm demo:dev
+```
+The demo will be available at `http://localhost:3001`.
+## 🐳 Running with Docker
+You can run the server in a production-ready container using Docker.
+### Build the Docker image
+```bash
+docker build -t responses.js .
+```
+### Run the server
+```bash
+docker run -p 3000:3000 responses.js
+```
+The server will be available at `http://localhost:3000`.
+## 📁 Project Structure
+```
+responses.js/
+├── demo/             # Interactive chat UI demo
+├── examples/         # Example scripts using openai-node client
+├── src/
+│   ├── index.ts      # Application entry point
+│   ├── server.ts     # Express app configuration and route definitions
+│   ├── routes/       # API route implementations
+│   ├── middleware/   # Middleware (validation, logging, etc.)
+│   └── schemas/      # Zod validation schemas
+├── scripts/          # Utility and build scripts
+├── package.json      # Package configuration and dependencies
+└── README.md         # This file
+```
+## 🛣️ Done / TODOs
+> **Note**: This project is in active development. The roadmap below represents our current priorities and may evolve. Do not take anything for granted.
+- [x] OpenAI types integration for consistent output
+- [x] Streaming mode support
+- [x] Structured output capabilities
+- [x] Function calling implementation
+- [x] Repository migration to dedicated responses.js repo
+- [x] Basic development tooling setup
+- [x] Demo application with comprehensive instructions
+- [x] Multi-turn conversation fixes for text messages + tool calls
+- [x] Correctly return "usage" field
+- [x] MCP support (non-streaming)
+- [ ] MCP support (streaming)
+- [ ] Tools execution (web search, file search, image generation, code interpreter)
+- [ ] Background mode support
+- [ ] Additional API routes (GET, DELETE, CANCEL, LIST responses)
+- [ ] Reasoning capabilities
+## 🤝 Contributing
+We welcome contributions! Please feel free to submit issues, feature requests, or pull requests.
+## 📄 License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+## 🙏 Acknowledgments
+- Based on OpenAI's [Responses API specification](https://platform.openai.com/docs/api-reference/responses)
+- Built on top of [OpenAI's nodejs client](https://github.com/openai/openai-node)
+- Demo UI adapted from [openai-responses-starter-app](https://github.com/openai/openai-responses-starter-app)
+- Built on top of [Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers/index)

package.json ADDED Viewed

	@@ -0,0 +1,80 @@

+{
+	"name": "@huggingface/responses.js",
+	"packageManager": "[email protected]",
+	"version": "0.1.0",
+	"type": "module",
+	"description": "Server for handling AI responses",
+	"repository": "https://github.com/huggingface/huggingface.js.git",
+	"publishConfig": {
+		"access": "public"
+	},
+	"main": "./dist/index.js",
+	"module": "./dist/index.mjs",
+	"types": "./dist/index.d.ts",
+	"exports": {
+		".": {
+			"types": "./dist/index.d.ts",
+			"require": "./dist/index.js",
+			"import": "./dist/index.mjs"
+		}
+	},
+	"engines": {
+		"node": ">=18"
+	},
+	"source": "index.ts",
+	"scripts": {
+		"build": "tsup src/*.ts --format cjs,esm --clean && tsc --emitDeclarationOnly --declaration",
+		"check": "tsc",
+		"dev": "tsx watch src/index.ts",
+		"format": "prettier --write .",
+		"format:check": "prettier --check .",
+		"lint": "eslint --quiet --fix --ext .cjs,.ts .",
+		"lint:check": "eslint --ext .cjs,.ts .",
+		"prepublishOnly": "pnpm run build",
+		"prepare": "pnpm run build",
+		"start": "node dist/index.js",
+		"example": "node examples/_run.js",
+		"demo:build": "cd demo && npm run build",
+		"demo:dev": "cd demo && npm run dev",
+		"demo:install": "cd demo && npm install",
+		"demo:lint": "cd demo && npm run lint",
+		"demo:format": "cd demo && npm run format",
+		"demo:start": "cd demo && npm run start",
+		"deploy:spaces": "./push_to_space.sh"
+	},
+	"files": [
+		"src",
+		"dist",
+		"tsconfig.json"
+	],
+	"keywords": [
+		"huggingface",
+		"ai",
+		"llm",
+		"responses-api",
+		"server"
+	],
+	"author": "Hugging Face",
+	"license": "MIT",
+	"dependencies": {
+		"@huggingface/inference": "^4.3.1",
+		"@huggingface/tasks": "^0.19.22",
+		"@modelcontextprotocol/sdk": "^1.15.0",
+		"express": "^4.21.2",
+		"openai": "^5.8.2",
+		"zod": "^3.25.71"
+	},
+	"devDependencies": {
+		"@eslint/js": "^9.30.1",
+		"@types/express": "^4.17.23",
+		"@typescript-eslint/eslint-plugin": "^8.35.1",
+		"@typescript-eslint/parser": "^8.35.1",
+		"eslint": "^9.30.1",
+		"eslint-config-prettier": "^10.1.5",
+		"eslint-plugin-prettier": "^5.5.1",
+		"prettier": "^3.6.2",
+		"tsup": "^8.5.0",
+		"tsx": "^4.20.3",
+		"typescript": "^5.8.3"
+	}
+}

pnpm-lock.yaml ADDED Viewed

The diff for this file is too large to render. See raw diff

src/index.ts ADDED Viewed

	@@ -0,0 +1,26 @@

+import { createApp } from "./server.js";
+const app = createApp();
+const port = process.env.PORT || 3000;
+// Start server
+app.listen(port, () => {
+	console.log(`🚀 Server started at ${new Date().toISOString()}`);
+	console.log(`🌐 Server is running on http://localhost:${port}`);
+	console.log("─".repeat(60));
+});
+// Graceful shutdown logging
+process.on("SIGINT", () => {
+	console.log("─".repeat(60));
+	console.log(`🛑 Server shutting down at ${new Date().toISOString()}`);
+	process.exit(0);
+});
+process.on("SIGTERM", () => {
+	console.log("─".repeat(60));
+	console.log(`🛑 Server shutting down at ${new Date().toISOString()}`);
+	process.exit(0);
+});
+export default app;

src/lib/McpResultFormatter.ts ADDED Viewed

	@@ -0,0 +1,92 @@

+/**
+ * Vendored from `@huggingface/mcp-client`
+ *
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/mcp-client/src/ResultFormatter.ts
+ */
+import type {
+	TextResourceContents,
+	BlobResourceContents,
+	CompatibilityCallToolResult,
+} from "@modelcontextprotocol/sdk/types";
+/**
+ * A utility class for formatting CallToolResult contents into human-readable text.
+ * Processes different content types, extracts text, and summarizes binary data.
+ */
+export class McpResultFormatter {
+	/**
+	 * Formats a CallToolResult's contents into a single string.
+	 * - Text content is included directly
+	 * - Binary content (images, audio, blobs) is summarized
+	 *
+	 * @param result The CallToolResult to format
+	 * @returns A human-readable string representation of the result contents
+	 */
+	static format(result: CompatibilityCallToolResult): string {
+		if (!result.content || !Array.isArray(result.content) || result.content.length === 0) {
+			return "[No content]";
+		}
+		const formattedParts: string[] = [];
+		for (const item of result.content) {
+			switch (item.type) {
+				case "text":
+					// Extract text content directly
+					formattedParts.push(item.text);
+					break;
+				case "image": {
+					// Summarize image content
+					const imageSize = this.getBase64Size(item.data);
+					formattedParts.push(
+						`[Binary Content: Image ${item.mimeType}, ${imageSize} bytes]\nThe task is complete and the content accessible to the User`
+					);
+					break;
+				}
+				case "audio": {
+					// Summarize audio content
+					const audioSize = this.getBase64Size(item.data);
+					formattedParts.push(
+						`[Binary Content: Audio ${item.mimeType}, ${audioSize} bytes]\nThe task is complete and the content accessible to the User`
+					);
+					break;
+				}
+				case "resource":
+					// Handle embedded resources - explicitly type the resource
+					if ("text" in item.resource) {
+						// It's a text resource with a text property
+						const textResource = item.resource as TextResourceContents;
+						formattedParts.push(textResource.text);
+					} else if ("blob" in item.resource) {
+						// It's a binary resource with a blob property
+						const blobResource = item.resource as BlobResourceContents;
+						const blobSize = this.getBase64Size(blobResource.blob);
+						const uri = blobResource.uri ? ` (${blobResource.uri})` : "";
+						const mimeType = blobResource.mimeType ? blobResource.mimeType : "unknown type";
+						formattedParts.push(
+							`[Binary Content${uri}: ${mimeType}, ${blobSize} bytes]\nThe task is complete and the content accessible to the User`
+						);
+					}
+					break;
+			}
+		}
+		return formattedParts.join("\n");
+	}
+	/**
+	 * Calculates the approximate size in bytes of base64-encoded data
+	 */
+	private static getBase64Size(base64: string): number {
+		// Remove base64 header if present (e.g., data:image/png;base64,)
+		const cleanBase64 = base64.includes(",") ? base64.split(",")[1] : base64;
+		// Calculate size: Base64 encodes 3 bytes into 4 characters
+		const padding = cleanBase64.endsWith("==") ? 2 : cleanBase64.endsWith("=") ? 1 : 0;
+		return Math.floor((cleanBase64.length * 3) / 4 - padding);
+	}
+}

src/lib/generateUniqueId.ts ADDED Viewed

	@@ -0,0 +1,11 @@

+/**
+ * AI-generated file using Cursor + Claude 4
+ *
+ * Generate a unique ID for the response
+ */
+import { randomBytes } from "crypto";
+export function generateUniqueId(prefix?: string): string {
+	const id = randomBytes(24).toString("hex");
+	return prefix ? `${prefix}_${id}` : id;
+}

src/mcp.ts ADDED Viewed

	@@ -0,0 +1,69 @@

+import { Client } from "@modelcontextprotocol/sdk/client/index.js";
+import { SSEClientTransport } from "@modelcontextprotocol/sdk/client/sse.js";
+import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js";
+import { version as packageVersion } from "../package.json";
+import { URL } from "url";
+import type { McpServerParams } from "./schemas";
+import { McpResultFormatter } from "./lib/McpResultFormatter";
+import { generateUniqueId } from "./lib/generateUniqueId";
+import type { ResponseOutputItem } from "openai/resources/responses/responses";
+export async function connectMcpServer(mcpServer: McpServerParams): Promise<Client> {
+	const mcp = new Client({ name: "@huggingface/responses.js", version: packageVersion });
+	// Try to connect with http first, if that fails, try sse
+	const url = new URL(mcpServer.server_url);
+	const options = {
+		requestInit: mcpServer.headers
+			? {
+					headers: mcpServer.headers,
+				}
+			: undefined,
+	};
+	try {
+		const transport = new StreamableHTTPClientTransport(url, options);
+		await mcp.connect(transport);
+	} catch {
+		const transport = new SSEClientTransport(url, options);
+		await mcp.connect(transport);
+	}
+	console.log("Connected to MCP server", mcpServer.server_url);
+	return mcp;
+}
+export async function callMcpTool(
+	mcpServer: McpServerParams,
+	toolName: string,
+	server_label: string,
+	argumentsString: string
+): Promise<ResponseOutputItem> {
+	try {
+		const client = await connectMcpServer(mcpServer);
+		const toolArgs: Record<string, unknown> = argumentsString === "" ? {} : JSON.parse(argumentsString);
+		console.log(`Calling MCP tool '${toolName}'`);
+		const toolResponse = await client.callTool({ name: toolName, arguments: toolArgs });
+		const formattedResult = McpResultFormatter.format(toolResponse);
+		return {
+			type: "mcp_call",
+			id: generateUniqueId("mcp_call"),
+			name: toolName,
+			server_label: server_label,
+			arguments: argumentsString,
+			output: formattedResult,
+		};
+	} catch (error) {
+		const errorMessage =
+			error instanceof Error ? error.message : typeof error === "string" ? error : JSON.stringify(error);
+		return {
+			type: "mcp_call",
+			id: generateUniqueId("mcp_call"),
+			name: toolName,
+			server_label: server_label,
+			arguments: argumentsString,
+			error: errorMessage,
+		};
+	}
+}

src/middleware/logging.ts ADDED Viewed

	@@ -0,0 +1,68 @@

+/**
+ * AI-generated file using Cursor + Claude 4
+ *
+ * Middleware to log all HTTP requests with duration, status code, method, and route
+ */
+import { type Request, type Response, type NextFunction } from "express";
+interface LogContext {
+	timestamp: string;
+	method: string;
+	url: string;
+	statusCode?: number;
+	duration?: number;
+}
+function formatLogMessage(context: LogContext): string {
+	const { timestamp, method, url, statusCode, duration } = context;
+	if (statusCode === undefined) {
+		return `[${timestamp}] 📥 ${method} ${url}`;
+	}
+	const statusEmoji =
+		statusCode >= 200 && statusCode < 300
+			? "✅"
+			: statusCode >= 400 && statusCode < 500
+				? "⚠️"
+				: statusCode >= 500
+					? "❌"
+					: "ℹ️";
+	return `[${timestamp}] ${statusEmoji} ${statusCode} ${method} ${url} (${duration}ms)`;
+}
+/**
+ * Middleware to log all HTTP requests with duration, status code, method, and route
+ */
+export function requestLogger() {
+	return (req: Request, res: Response, next: NextFunction): void => {
+		const startTime = Date.now();
+		const { method, url } = req;
+		// Log incoming request
+		console.log(
+			formatLogMessage({
+				timestamp: new Date().toISOString(),
+				method,
+				url,
+			})
+		);
+		// Listen for when the response finishes
+		res.on("finish", () => {
+			const duration = Date.now() - startTime;
+			console.log(
+				formatLogMessage({
+					timestamp: new Date().toISOString(),
+					method,
+					url,
+					statusCode: res.statusCode,
+					duration,
+				})
+			);
+		});
+		next();
+	};
+}

src/middleware/validation.ts ADDED Viewed

	@@ -0,0 +1,42 @@

+/**
+ * AI-generated file using Cursor + Claude 4
+ */
+import { type Request, type Response, type NextFunction } from "express";
+import { z } from "zod";
+/**
+ * Middleware to validate request body against a Zod schema
+ * @param schema - Zod schema to validate against
+ * @returns Express middleware function
+ */
+export function validateBody<T extends z.ZodTypeAny>(schema: T) {
+	return (req: Request, res: Response, next: NextFunction): void => {
+		try {
+			const validatedBody = schema.parse(req.body);
+			req.body = validatedBody;
+			next();
+		} catch (error) {
+			if (error instanceof z.ZodError) {
+				console.log(req.body);
+				res.status(400).json({
+					success: false,
+					error: error.errors,
+					details: error.errors,
+				});
+			} else {
+				res.status(500).json({
+					success: false,
+					error: "Internal server error",
+				});
+			}
+		}
+	};
+}
+/**
+ * Type helper to create a properly typed request with validated body
+ */
+export interface ValidatedRequest<T> extends Request {
+	body: T;
+}

src/routes/index.ts ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ export { postCreateResponse } from "./responses.js";
2	+ export { getLandingPageHtml } from "./landingPageHtml.js";

src/routes/landingPageHtml.ts ADDED Viewed

	@@ -0,0 +1,567 @@

+import type { Request, Response } from "express";
+export function getLandingPageHtml(req: Request, res: Response): void {
+	const baseUrl = `${req.protocol}://${req.get("host")}/v1`;
+	res.setHeader("Content-Type", "text/html; charset=utf-8");
+	res.send(`
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>responses.js – OpenAI-compatible Responses API</title>
+  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap" rel="stylesheet">
+  <style>
+    :root {
+      --primary: #2563eb;
+      --primary-dark: #1e40af;
+      --accent: #fbbf24;
+      --bg: #f8fafc;
+      --card-bg: #fff;
+      --border: #e5e7eb;
+      --text: #1e293b;
+      --muted: #64748b;
+      --radius: 14px;
+      --shadow: 0 4px 24px #0002;
+    }
+    html, body { height: 100%; }
+    body {
+      font-family: 'Inter', Arial, sans-serif;
+      background: var(--bg);
+      color: var(--text);
+      margin: 0;
+      min-height: 100vh;
+      display: flex;
+      flex-direction: column;
+    }
+    .sticky-header {
+      position: sticky;
+      top: 0;
+      z-index: 100;
+      background: linear-gradient(90deg, var(--primary) 0%, #60a5fa 100%);
+      color: #fff;
+      box-shadow: 0 2px 12px #0001;
+    }
+    .header-inner {
+      max-width: 1100px;
+      margin: 0 auto;
+      display: flex;
+      align-items: center;
+      justify-content: space-between;
+      padding: 1.5rem 1.5rem 1.2rem 1.5rem;
+    }
+    .header-title {
+      display: flex;
+      align-items: center;
+      gap: 0.8rem;
+    }
+    .header-title svg {
+      height: 2.2rem;
+      width: 2.2rem;
+      display: block;
+    }
+    .header-title h1 {
+      font-size: 2.1rem;
+      font-weight: 700;
+      margin: 0;
+      letter-spacing: -1px;
+    }
+    .github-btn {
+      background: #fff2;
+      color: #fff;
+      border: 1.5px solid #fff4;
+      border-radius: 8px;
+      padding: 0.6em 1.3em;
+      font-weight: 600;
+      font-size: 1.05em;
+      text-decoration: none;
+      display: flex;
+      align-items: center;
+      gap: 0.5em;
+      transition: background 0.2s, color 0.2s;
+    }
+    .github-btn:hover {
+      background: #fff;
+      color: var(--primary-dark);
+    }
+    main {
+      flex: 1;
+      max-width: 900px;
+      margin: 0 auto;
+      padding: 2.5rem 1.2rem 1.5rem 1.2rem;
+      display: flex;
+      flex-direction: column;
+      gap: 2.5rem;
+    }
+    .hero {
+      background: linear-gradient(120deg, #dbeafe 0%, #f0fdf4 100%);
+      border-radius: var(--radius);
+      box-shadow: var(--shadow);
+      padding: 2.5rem 2rem 2rem 2rem;
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      text-align: center;
+      position: relative;
+      overflow: hidden;
+    }
+    .hero h2 {
+      font-size: 2rem;
+      font-weight: 700;
+      margin: 0 0 0.7rem 0;
+      color: var(--primary-dark);
+    }
+    .hero p {
+      font-size: 1.18rem;
+      color: var(--muted);
+      margin: 0 0 1.5rem 0;
+    }
+    .api-endpoint-box {
+      background: #fff;
+      border: 2px solid var(--primary);
+      border-radius: 12px;
+      padding: 1.3rem 1.2rem 1.3rem 1.2rem;
+      margin: 1.5rem 0 1.5rem 0;
+      text-align: center;
+      font-size: 1.18rem;
+      box-shadow: 0 2px 8px #174ea610;
+      position: relative;
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      gap: 0.5em;
+    }
+    .api-endpoint-url {
+      display: inline-block;
+      background: #f1f5f9;
+      color: var(--primary-dark);
+      font-family: 'Fira Mono', 'Consolas', monospace;
+      font-size: 1.15em;
+      padding: 0.3em 0.7em;
+      border-radius: 6px;
+      border: 1px solid #cbd5e1;
+      margin: 0.5em 0 0.5em 0;
+      word-break: break-all;
+    }
+    .copy-endpoint-btn {
+      position: absolute;
+      top: 16px;
+      right: 16px;
+      background: var(--primary);
+      color: #fff;
+      border: none;
+      border-radius: 4px;
+      padding: 0.3em 1em;
+      font-size: 1em;
+      cursor: pointer;
+      opacity: 0.85;
+      transition: background 0.2s, opacity 0.2s;
+      z-index: 2;
+    }
+    .copy-endpoint-btn:hover { background: var(--primary-dark); opacity: 1; }
+    .copy-endpoint-btn.copied { background: #388e3c; color: #fff; opacity: 1; }
+    .cta {
+      margin: 1.5rem auto 0 auto;
+      background: var(--primary);
+      color: #fff;
+      text-decoration: none;
+      font-weight: bold;
+      padding: 1rem 2.5rem;
+      border-radius: 8px;
+      font-size: 1.2rem;
+      transition: background 0.2s;
+      box-shadow: 0 2px 8px #2563eb20;
+      display: inline-block;
+    }
+    .cta:hover { background: var(--primary-dark); }
+    .features {
+      display: grid;
+      grid-template-columns: repeat(2, 1fr); /* 2 columns for 2x2 grid */
+      gap: 1.5rem;
+      margin: 2rem 0 0 0;
+    }
+    .feature-card {
+      background: var(--card-bg);
+      border-radius: var(--radius);
+      box-shadow: 0 1px 6px #0001;
+      padding: 1.2rem 1.3rem;
+      border: 1.5px solid var(--border);
+      display: flex;
+      flex-direction: column;
+      align-items: flex-start;
+      gap: 0.5em;
+      min-height: 120px;
+      position: relative;
+      transition: box-shadow 0.2s, border 0.2s;
+    }
+    .feature-card:hover {
+      box-shadow: 0 4px 16px #2563eb22;
+      border: 1.5px solid var(--primary);
+    }
+    .feature-card b {
+      font-size: 1.08em;
+      color: var(--primary-dark);
+    }
+    .examples-section {
+      margin-top: 2.5rem;
+    }
+    .examples-tabs {
+      display: flex;
+      gap: 0.5em;
+      margin-bottom: 1.2em;
+      border-bottom: 2px solid #e5e7eb;
+    }
+    .examples-tab {
+      background: none;
+      border: none;
+      font-size: 1.08em;
+      font-weight: 600;
+      color: var(--muted);
+      padding: 0.7em 1.2em 0.5em 1.2em;
+      cursor: pointer;
+      border-radius: 8px 8px 0 0;
+      transition: color 0.2s, background 0.2s;
+    }
+    .examples-tab.active {
+      color: var(--primary-dark);
+      background: #fff;
+      border-bottom: 2px solid var(--primary);
+    }
+    .example-panel { display: none; }
+    .example-panel.active { display: block; }
+    pre {
+      background: #f4f4f8;
+      border-radius: 8px;
+      padding: 1.1rem 1rem 1.1rem 1rem;
+      overflow-x: auto;
+      font-size: 0.98rem;
+      position: relative;
+      margin: 0.5em 0 0.5em 0;
+    }
+    code {
+      font-family: 'Fira Mono', 'Consolas', monospace;
+      font-size: 1em;
+      background: none;
+      color: #222;
+    }
+    .copy-btn {
+      position: absolute;
+      top: 10px;
+      right: 10px;
+      background: #e0e4ea;
+      border: none;
+      border-radius: 4px;
+      padding: 0.2em 0.7em;
+      font-size: 0.95em;
+      color: var(--primary-dark);
+      cursor: pointer;
+      opacity: 0.7;
+      transition: opacity 0.2s, background 0.2s;
+      z-index: 2;
+    }
+    .copy-btn:hover { opacity: 1; background: #c9d3e6; }
+    .copy-btn.copied { color: #388e3c; background: #d0f5dd; opacity: 1; }
+    .more-info-footer {
+      background: #f1f5f9;
+      border-top: 1.5px solid #e5e7eb;
+      margin-top: 3rem;
+      padding: 2rem 1rem 1.5rem 1rem;
+      border-radius: 0 0 var(--radius) var(--radius);
+      text-align: center;
+      color: var(--muted);
+      font-size: 1.08em;
+    }
+    .more-info-footer ul {
+      list-style: none;
+      padding: 0;
+      margin: 0.5em 0 0 0;
+      display: flex;
+      flex-wrap: wrap;
+      gap: 1.5em;
+      justify-content: center;
+    }
+    .more-info-footer a {
+      color: var(--primary-dark);
+      text-decoration: none;
+      font-weight: 500;
+      transition: color 0.2s;
+    }
+    .more-info-footer a:hover { color: var(--primary); }
+    @media (max-width: 700px) {
+      .header-inner { flex-direction: column; align-items: flex-start; gap: 1.2em; }
+      .header-title h1 { font-size: 1.5rem; }
+      main { padding: 1.2rem; }
+      .hero { padding: 1.2rem 0.7rem 1.2rem 0.7rem; }
+    }
+  </style>
+  <!-- Prism.js for syntax highlighting -->
+  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/themes/prism.min.css">
+  <script src="https://cdn.jsdelivr.net/npm/[email protected]/prism.min.js"></script>
+  <script src="https://cdn.jsdelivr.net/npm/[email protected]/components/prism-javascript.min.js"></script>
+  <script>
+    function copyCode(btn) {
+      const pre = btn.parentElement;
+      const code = pre.querySelector('code');
+      if (!code) return;
+      const text = code.innerText;
+      navigator.clipboard.writeText(text).then(() => {
+        btn.textContent = 'Copied!';
+        btn.classList.add('copied');
+        setTimeout(() => {
+          btn.textContent = 'Copy';
+          btn.classList.remove('copied');
+        }, 1200);
+      });
+    }
+    function copyEndpointUrl(btn) {
+      const url = document.getElementById('api-endpoint-url').innerText;
+      navigator.clipboard.writeText(url).then(() => {
+        btn.textContent = 'Copied!';
+        btn.classList.add('copied');
+        setTimeout(() => {
+          btn.textContent = 'Copy';
+          btn.classList.remove('copied');
+        }, 1200);
+      });
+    }
+    // Tabs for examples
+    function showExampleTab(idx) {
+      document.querySelectorAll('.examples-tab').forEach((tab, i) => {
+        tab.classList.toggle('active', i === idx);
+      });
+      document.querySelectorAll('.example-panel').forEach((panel, i) => {
+        panel.classList.toggle('active', i === idx);
+      });
+    }
+    window.addEventListener('DOMContentLoaded', function() {
+      showExampleTab(0);
+      document.querySelectorAll('.examples-tab').forEach((tab, i) => {
+        tab.addEventListener('click', () => showExampleTab(i));
+      });
+    });
+  </script>
+</head>
+<body>
+  <header class="sticky-header">
+    <div class="header-inner">
+      <div class="header-title">
+        <img src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg" alt="Hugging Face Logo" style="height:4.0rem;width:4.0rem;display:block;"/>
+        <h1>responses.js</h1>
+      </div>
+      <a href="https://github.com/huggingface/responses.js" target="_blank" aria-label="GitHub Repository" class="github-btn">
+        <svg height="20" width="20" viewBox="0 0 16 16" fill="currentColor" style="display: block;"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.19 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"/></svg>
+        GitHub
+      </a>
+    </div>
+  </header>
+  <main>
+    <section class="hero">
+      <h2>OpenAI-compatible Responses API</h2>
+      <p><b>responses.js</b> is an open-source, lightweight server implementing OpenAI's Responses API, built on top of Chat Completions and powered by Hugging Face Inference Providers.</p>
+      <div class="api-endpoint-box">
+        <button class="copy-endpoint-btn" onclick="copyEndpointUrl(this)">Copy</button>
+        <div><b>API Endpoint:</b></div>
+        <span class="api-endpoint-url" id="api-endpoint-url">${baseUrl}/responses</span>
+        <div style="font-size:0.98em; color:#333; margin-top:0.5em;">Get started by sending requests to this endpoint</div>
+      </div>
+      <a class="cta" href="https://github.com/huggingface/responses.js" target="_blank">View on GitHub</a>
+    </section>
+    <section>
+      <div class="features">
+        <div class="feature-card">
+          <b>OpenAI-compatible</b><br>Stateless implementation of the <a href="https://platform.openai.com/docs/api-reference/responses" target="_blank">Responses API</a>
+        </div>
+        <div class="feature-card">
+          <b>Inference Providers</b><br>Powered by Hugging Face Inference Providers
+        </div>
+        <div class="feature-card">
+          <b>Multi-modal</b><br>Text and image input support
+        </div>
+        <div class="feature-card">
+          <b>Streaming, & Structured Output</b><br>Supports streaming, JSON schema, and function calling
+        </div>
+      </div>
+    </section>
+    <section class="examples-section">
+      <h2 style="color:var(--primary-dark);margin-bottom:1.2em;">Examples</h2>
+      <div class="examples-tabs">
+        <button class="examples-tab active" type="button">Text</button>
+        <button class="examples-tab" type="button">Text + Image Input</button>
+        <button class="examples-tab" type="button">Multi-turn</button>
+        <button class="examples-tab" type="button">Streaming</button>
+        <button class="examples-tab" type="button">Function Calling</button>
+        <button class="examples-tab" type="button">Structured Output</button>
+      </div>
+      <div class="example-panel active">
+        <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-js">import OpenAI from "openai";
+const openai = new OpenAI({
+  baseURL: "http://localhost:3000/v1",
+  apiKey: "YOUR_API_KEY_HERE", // visit https://huggingface.co/settings/tokens
+});
+const response = await openai.responses.create({
+  model: "Qwen/Qwen2.5-VL-7B-Instruct",
+  instructions: "You are a helpful assistant.",
+  input: "Tell me a three sentence bedtime story about a unicorn.",
+});
+console.log(response);
+console.log(response.output_text);</code></pre>
+      </div>
+      <div class="example-panel">
+        <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-js">import OpenAI from "openai";
+const openai = new OpenAI({
+  baseURL: "${baseUrl}",
+  apiKey: "YOUR_API_KEY_HERE", // visit https://huggingface.co/settings/tokens
+});
+const response = await openai.responses.create({
+  model: "Qwen/Qwen2.5-VL-7B-Instruct",
+  input: [
+    {
+      role: "user",
+      content: [
+        { type: "input_text", text: "what is in this image?" },
+        {
+          type: "input_image",
+          image_url: "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+        }
+      ]
+    }
+  ]
+});
+console.log(response);
+console.log(response.output_text);</code></pre>
+      </div>
+      <div class="example-panel">
+        <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-js">import OpenAI from "openai";
+const openai = new OpenAI({
+  baseURL: "http://localhost:3000/v1",
+  apiKey: "YOUR_API_KEY_HERE", // visit https://huggingface.co/settings/tokens
+});
+const response = await openai.responses.create({
+  model: "Qwen/Qwen2.5-VL-7B-Instruct",
+  input: [
+    {
+      role: "developer",
+      content: "Talk like a pirate.",
+    },
+    {
+      role: "user",
+      content: "Are semicolons optional in JavaScript?",
+    },
+  ],
+});
+console.log(response);
+console.log(response.output_text);</code></pre>
+      </div>
+      <div class="example-panel">
+        <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-js">import { OpenAI } from "openai";
+const openai = new OpenAI({
+  baseURL: "http://localhost:3000/v1",
+  apiKey: "YOUR_API_KEY_HERE", // visit https://huggingface.co/settings/tokens
+});
+const stream = await openai.responses.create({
+  model: "hyperbolic@Qwen/Qwen2.5-VL-7B-Instruct",
+  input: [
+    {
+      role: "user",
+      content: "Say 'double bubble bath' ten times fast.",
+    },
+  ],
+  stream: true,
+});
+for await (const event of stream) {
+  console.log(event);
+}</code></pre>
+      </div>
+      <div class="example-panel">
+        <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-js">import OpenAI from "openai";
+const openai = new OpenAI({
+  baseURL: "${baseUrl}",
+  apiKey: "YOUR_API_KEY_HERE", // visit https://huggingface.co/settings/tokens
+});
+const tools = [
+  {
+    type: "function",
+    name: "get_current_weather",
+    description: "Get the current weather in a given location",
+    parameters: {
+      type: "object",
+      properties: {
+        location: { type: "string", description: "The city and state, e.g. San Francisco, CA" },
+        unit: { type: "string", enum: ["celsius", "fahrenheit"] }
+      },
+      required: ["location", "unit"]
+    }
+  }
+];
+const response = await openai.responses.create({
+  model: "cerebras@meta-llama/Llama-3.3-70B-Instruct",
+  tools: tools,
+  input: "What is the weather like in Boston today?",
+  tool_choice: "auto"
+});
+console.log(response);</code></pre>
+      </div>
+      <div class="example-panel">
+        <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-js">import OpenAI from "openai";
+import { zodTextFormat } from "openai/helpers/zod";
+import { z } from "zod";
+const openai = new OpenAI({
+  baseURL: "http://localhost:3000/v1",
+  apiKey: "YOUR_API_KEY_HERE", // visit https://huggingface.co/settings/tokens
+});
+const Step = z.object({
+  explanation: z.string(),
+  output: z.string(),
+});
+const MathReasoning = z.object({
+  steps: z.array(Step),
+  final_answer: z.string(),
+});
+const response = await openai.responses.parse({
+  model: "novita@meta-llama/Meta-Llama-3-70B-Instruct",
+  input: [
+    {
+      role: "system",
+      content: "You are a helpful math tutor. Guide the user through the solution step by step.",
+    },
+    { role: "user", content: "how can I solve 8x + 7 = -23" },
+  ],
+  text: {
+    format: zodTextFormat(MathReasoning, "math_reasoning"),
+  },
+});
+console.log(response.output_parsed);</code></pre>
+      </div>
+    </section>
+    <footer class="more-info-footer">
+      <div style="font-weight:600; color:var(--primary-dark); font-size:1.13em; margin-bottom:0.5em;">More Info</div>
+      <ul>
+        <li><a href="https://github.com/huggingface/responses.js" target="_blank">GitHub Repository</a></li>
+        <li><a href="https://platform.openai.com/docs/api-reference/responses" target="_blank">OpenAI Responses API Docs</a></li>
+        <li><a href="https://huggingface.co/docs/inference-providers/index" target="_blank">Hugging Face Inference Providers</a></li>
+      </ul>
+    </footer>
+  </main>
+</body>
+</html>
+  `);
+}

src/routes/responses.ts ADDED Viewed

	@@ -0,0 +1,659 @@

+import { type Response as ExpressResponse } from "express";
+import { type ValidatedRequest } from "../middleware/validation.js";
+import type { CreateResponseParams, McpServerParams, McpApprovalRequestParams } from "../schemas.js";
+import { generateUniqueId } from "../lib/generateUniqueId.js";
+import { InferenceClient } from "@huggingface/inference";
+import type {
+	ChatCompletionInputMessage,
+	ChatCompletionInputMessageChunkType,
+	ChatCompletionInput,
+} from "@huggingface/tasks";
+import type {
+	Response,
+	ResponseStreamEvent,
+	ResponseContentPartAddedEvent,
+	ResponseOutputMessage,
+	ResponseFunctionToolCall,
+	ResponseOutputItem,
+} from "openai/resources/responses/responses";
+import type {
+	ChatCompletionInputTool,
+	ChatCompletionStreamOutputUsage,
+} from "@huggingface/tasks/dist/commonjs/tasks/chat-completion/inference.js";
+import { callMcpTool, connectMcpServer } from "../mcp.js";
+class StreamingError extends Error {
+	constructor(message: string) {
+		super(message);
+		this.name = "StreamingError";
+	}
+}
+export const postCreateResponse = async (
+	req: ValidatedRequest<CreateResponseParams>,
+	res: ExpressResponse
+): Promise<void> => {
+	const apiKey = req.headers.authorization?.split(" ")[1];
+	if (!apiKey) {
+		res.status(401).json({
+			success: false,
+			error: "Unauthorized",
+		});
+		return;
+	}
+	const client = new InferenceClient(apiKey);
+	const messages: ChatCompletionInputMessage[] = req.body.instructions
+		? [{ role: "system", content: req.body.instructions }]
+		: [];
+	if (Array.isArray(req.body.input)) {
+		messages.push(
+			...req.body.input
+				.map((item) => {
+					switch (item.type) {
+						case "function_call":
+							return {
+								// hacky but best fit for now
+								role: "assistant",
+								name: `function_call ${item.name} ${item.call_id}`,
+								content: item.arguments,
+							};
+						case "function_call_output":
+							return {
+								// hacky but best fit for now
+								role: "assistant",
+								name: `function_call_output ${item.call_id}`,
+								content: item.output,
+							};
+						case "message":
+							return {
+								role: item.role,
+								content:
+									typeof item.content === "string"
+										? item.content
+										: item.content
+												.map((content) => {
+													switch (content.type) {
+														case "input_image":
+															return {
+																type: "image_url" as ChatCompletionInputMessageChunkType,
+																image_url: {
+																	url: content.image_url,
+																},
+															};
+														case "output_text":
+															return content.text
+																? {
+																		type: "text" as ChatCompletionInputMessageChunkType,
+																		text: content.text,
+																	}
+																: undefined;
+														case "refusal":
+															return undefined;
+														case "input_text":
+															return {
+																type: "text" as ChatCompletionInputMessageChunkType,
+																text: content.text,
+															};
+													}
+												})
+												.filter((item) => item !== undefined),
+							};
+						case "mcp_list_tools": {
+							// Hacky: will be dropped by filter
+							return {
+								role: "assistant",
+								name: "mcp_list_tools",
+								content: "",
+							};
+						}
+						case "mcp_approval_request": {
+							return {
+								role: "assistant",
+								name: "mcp_approval_request",
+								content: `MCP approval request (${item.id}). Server: '${item.server_label}'. Tool: '${item.name}'. Arguments: '${item.arguments}'.`,
+							};
+						}
+						case "mcp_approval_response": {
+							return {
+								role: "assistant",
+								name: "mcp_approval_response",
+								content: `MCP approval response (${item.id}). Approved: ${item.approve}. Reason: ${item.reason}.`,
+							};
+						}
+					}
+				})
+				.filter((message) => message.content?.length !== 0)
+		);
+	} else {
+		messages.push({ role: "user", content: req.body.input });
+	}
+	const output: ResponseOutputItem[] = [];
+	let tools: ChatCompletionInputTool[] | undefined = [];
+	const mcpToolsMapping: Record<string, McpServerParams> = {};
+	if (req.body.tools) {
+		await Promise.all(
+			req.body.tools.map(async (tool) => {
+				switch (tool.type) {
+					case "function":
+						tools?.push({
+							type: tool.type,
+							function: {
+								name: tool.name,
+								parameters: tool.parameters,
+								description: tool.description,
+								strict: tool.strict,
+							},
+						});
+						break;
+					case "mcp": {
+						let mcpListTools: ResponseOutputItem.McpListTools | undefined;
+						// If MCP list tools is already in the input, use it
+						if (Array.isArray(req.body.input)) {
+							for (const item of req.body.input) {
+								if (item.type === "mcp_list_tools" && item.server_label === tool.server_label) {
+									mcpListTools = item;
+									console.debug(`Using MCP list tools from input for server '${tool.server_label}'`);
+									break;
+								}
+							}
+						}
+						// Otherwise, list tools from MCP server
+						if (!mcpListTools) {
+							try {
+								const mcp = await connectMcpServer(tool);
+								console.debug("Listing MCP tools from server");
+								const mcpTools = await mcp.listTools();
+								console.debug(`Fetched ${mcpTools.tools.length} tools from MCP server '${tool.server_label}'`);
+								// All tools are returned in Response object
+								mcpListTools = {
+									id: generateUniqueId("mcp_list_tools"),
+									type: "mcp_list_tools",
+									server_label: tool.server_label,
+									tools: mcpTools.tools.map((mcpTool) => ({
+										input_schema: mcpTool.inputSchema,
+										name: mcpTool.name,
+										annotations: mcpTool.annotations,
+										description: mcpTool.description,
+									})),
+								};
+							} catch (error) {
+								console.error("Error listing tools from MCP server", error);
+								mcpListTools = {
+									id: generateUniqueId("mcp_list_tools"),
+									type: "mcp_list_tools",
+									server_label: tool.server_label,
+									tools: [],
+									error: `Failed to list tools from MCP server '${tool.server_label}': ${error instanceof Error ? error.message : "Unknown error"}`,
+								};
+							}
+							output.push(mcpListTools);
+						}
+						// Only allowed tools are forwarded to the LLM
+						const allowedTools = tool.allowed_tools
+							? Array.isArray(tool.allowed_tools)
+								? tool.allowed_tools
+								: tool.allowed_tools.tool_names
+							: [];
+						if (mcpListTools?.tools) {
+							for (const mcpTool of mcpListTools.tools) {
+								if (allowedTools.length === 0 || allowedTools.includes(mcpTool.name)) {
+									tools?.push({
+										type: "function" as const,
+										function: {
+											name: mcpTool.name,
+											parameters: mcpTool.input_schema,
+											description: mcpTool.description ?? undefined,
+										},
+									});
+								}
+								mcpToolsMapping[mcpTool.name] = tool;
+							}
+							break;
+						}
+					}
+				}
+			})
+		);
+	}
+	if (tools.length === 0) {
+		tools = undefined;
+	}
+	const model = req.body.model.includes("@") ? req.body.model.split("@")[1] : req.body.model;
+	const provider = req.body.model.includes("@") ? req.body.model.split("@")[0] : undefined;
+	const payload: ChatCompletionInput = {
+		// main params
+		model: model,
+		provider: provider,
+		messages: messages,
+		stream: req.body.stream,
+		// options
+		max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
+		response_format: req.body.text?.format
+			? {
+					type: req.body.text.format.type,
+					json_schema:
+						req.body.text.format.type === "json_schema"
+							? {
+									description: req.body.text.format.description,
+									name: req.body.text.format.name,
+									schema: req.body.text.format.schema,
+									strict: req.body.text.format.strict,
+								}
+							: undefined,
+				}
+			: undefined,
+		temperature: req.body.temperature,
+		tool_choice:
+			typeof req.body.tool_choice === "string"
+				? req.body.tool_choice
+				: req.body.tool_choice
+					? {
+							type: "function",
+							function: {
+								name: req.body.tool_choice.name,
+							},
+						}
+					: undefined,
+		tools,
+		top_p: req.body.top_p,
+	};
+	const responseObject: Omit<Response, "incomplete_details" | "output_text" | "parallel_tool_calls"> = {
+		created_at: Math.floor(new Date().getTime() / 1000),
+		error: null,
+		id: generateUniqueId("resp"),
+		instructions: req.body.instructions,
+		max_output_tokens: req.body.max_output_tokens,
+		metadata: req.body.metadata,
+		model: req.body.model,
+		object: "response",
+		output,
+		// parallel_tool_calls: req.body.parallel_tool_calls,
+		status: "in_progress",
+		text: req.body.text,
+		tool_choice: req.body.tool_choice ?? "auto",
+		tools: req.body.tools ?? [],
+		temperature: req.body.temperature,
+		top_p: req.body.top_p,
+		usage: {
+			input_tokens: 0,
+			input_tokens_details: { cached_tokens: 0 },
+			output_tokens: 0,
+			output_tokens_details: { reasoning_tokens: 0 },
+			total_tokens: 0,
+		},
+	};
+	// MCP approval requests => do not call LLM at all
+	if (Array.isArray(req.body.input)) {
+		for (const item of req.body.input) {
+			// Note: currently supporting only 1 mcp_approval_response per request
+			if (item.type === "mcp_approval_response" && item.approve) {
+				const approvalRequest = req.body.input.find(
+					(i) => i.type === "mcp_approval_request" && i.id === item.approval_request_id
+				) as McpApprovalRequestParams | undefined;
+				console.log("approvalRequest", approvalRequest);
+				if (approvalRequest) {
+					const toolParams = mcpToolsMapping[approvalRequest.name];
+					responseObject.output.push(
+						await callMcpTool(toolParams, approvalRequest.name, toolParams.server_label, approvalRequest.arguments)
+					);
+					responseObject.status = "completed";
+					res.json(responseObject);
+					return;
+				} else {
+					responseObject.status = "failed";
+					const errorMessage = `MCP approval response for approval request '${item.approval_request_id}' not found`;
+					console.error(errorMessage);
+					responseObject.error = {
+						code: "server_error",
+						message: errorMessage,
+					};
+					res.json(responseObject);
+					return;
+				}
+			}
+		}
+	}
+	// Streaming mode
+	if (req.body.stream) {
+		res.setHeader("Content-Type", "text/event-stream");
+		res.setHeader("Connection", "keep-alive");
+		let sequenceNumber = 0;
+		// Emit events in sequence
+		const emitEvent = (event: ResponseStreamEvent) => {
+			res.write(`data: ${JSON.stringify(event)}\n\n`);
+		};
+		try {
+			// Response created event
+			emitEvent({
+				type: "response.created",
+				response: responseObject as Response,
+				sequence_number: sequenceNumber++,
+			});
+			// Response in progress event
+			emitEvent({
+				type: "response.in_progress",
+				response: responseObject as Response,
+				sequence_number: sequenceNumber++,
+			});
+			const stream = client.chatCompletionStream(payload);
+			let usage: ChatCompletionStreamOutputUsage | undefined;
+			for await (const chunk of stream) {
+				if (chunk.usage) {
+					usage = chunk.usage;
+				}
+				if (chunk.choices[0].delta.content) {
+					if (responseObject.output.length === 0) {
+						const outputObject: ResponseOutputMessage = {
+							id: generateUniqueId("msg"),
+							type: "message",
+							role: "assistant",
+							status: "in_progress",
+							content: [],
+						};
+						responseObject.output = [outputObject];
+						// Response output item added event
+						emitEvent({
+							type: "response.output_item.added",
+							output_index: 0,
+							item: outputObject,
+							sequence_number: sequenceNumber++,
+						});
+					}
+					const outputObject = responseObject.output.at(-1);
+					if (!outputObject || outputObject.type !== "message") {
+						throw new StreamingError("Not implemented: only single output item type is supported in streaming mode.");
+					}
+					if (outputObject.content.length === 0) {
+						// Response content part added event
+						const contentPart: ResponseContentPartAddedEvent["part"] = {
+							type: "output_text",
+							text: "",
+							annotations: [],
+						};
+						outputObject.content.push(contentPart);
+						emitEvent({
+							type: "response.content_part.added",
+							item_id: outputObject.id,
+							output_index: 0,
+							content_index: 0,
+							part: contentPart,
+							sequence_number: sequenceNumber++,
+						});
+					}
+					const contentPart = outputObject.content.at(-1);
+					if (!contentPart || contentPart.type !== "output_text") {
+						throw new StreamingError("Not implemented: only output_text is supported in streaming mode.");
+					}
+					// Add text delta
+					contentPart.text += chunk.choices[0].delta.content;
+					emitEvent({
+						type: "response.output_text.delta",
+						item_id: outputObject.id,
+						output_index: 0,
+						content_index: 0,
+						delta: chunk.choices[0].delta.content,
+						sequence_number: sequenceNumber++,
+					});
+				} else if (chunk.choices[0].delta.tool_calls && chunk.choices[0].delta.tool_calls.length > 0) {
+					if (chunk.choices[0].delta.tool_calls.length > 1) {
+						throw new StreamingError("Not implemented: only single tool call is supported in streaming mode.");
+					}
+					if (responseObject.output.length === 0) {
+						if (!chunk.choices[0].delta.tool_calls[0].function.name) {
+							throw new StreamingError("Tool call function name is required.");
+						}
+						const outputObject: ResponseFunctionToolCall = {
+							type: "function_call",
+							id: generateUniqueId("fc"),
+							call_id: chunk.choices[0].delta.tool_calls[0].id,
+							name: chunk.choices[0].delta.tool_calls[0].function.name,
+							arguments: "",
+						};
+						responseObject.output = [outputObject];
+						// Response output item added event
+						emitEvent({
+							type: "response.output_item.added",
+							output_index: 0,
+							item: outputObject,
+							sequence_number: sequenceNumber++,
+						});
+					}
+					const outputObject = responseObject.output.at(-1);
+					if (!outputObject || !outputObject.id || outputObject.type !== "function_call") {
+						throw new StreamingError("Not implemented: can only support single output item type in streaming mode.");
+					}
+					outputObject.arguments += chunk.choices[0].delta.tool_calls[0].function.arguments;
+					emitEvent({
+						type: "response.function_call_arguments.delta",
+						item_id: outputObject.id,
+						output_index: 0,
+						delta: chunk.choices[0].delta.tool_calls[0].function.arguments,
+						sequence_number: sequenceNumber++,
+					});
+				}
+			}
+			const lastOutputItem = responseObject.output.at(-1);
+			if (lastOutputItem) {
+				if (lastOutputItem?.type === "message") {
+					const contentPart = lastOutputItem.content.at(-1);
+					if (contentPart?.type === "output_text") {
+						emitEvent({
+							type: "response.output_text.done",
+							item_id: lastOutputItem.id,
+							output_index: responseObject.output.length - 1,
+							content_index: lastOutputItem.content.length - 1,
+							text: contentPart.text,
+							sequence_number: sequenceNumber++,
+						});
+						emitEvent({
+							type: "response.content_part.done",
+							item_id: lastOutputItem.id,
+							output_index: responseObject.output.length - 1,
+							content_index: lastOutputItem.content.length - 1,
+							part: contentPart,
+							sequence_number: sequenceNumber++,
+						});
+					} else {
+						throw new StreamingError("Not implemented: only output_text is supported in streaming mode.");
+					}
+					// Response output item done event
+					lastOutputItem.status = "completed";
+					emitEvent({
+						type: "response.output_item.done",
+						output_index: responseObject.output.length - 1,
+						item: lastOutputItem,
+						sequence_number: sequenceNumber++,
+					});
+				} else if (lastOutputItem?.type === "function_call") {
+					if (!lastOutputItem.id) {
+						throw new StreamingError("Function call id is required.");
+					}
+					emitEvent({
+						type: "response.function_call_arguments.done",
+						item_id: lastOutputItem.id,
+						output_index: responseObject.output.length - 1,
+						arguments: lastOutputItem.arguments,
+						sequence_number: sequenceNumber++,
+					});
+					lastOutputItem.status = "completed";
+					emitEvent({
+						type: "response.output_item.done",
+						output_index: responseObject.output.length - 1,
+						item: lastOutputItem,
+						sequence_number: sequenceNumber++,
+					});
+				} else {
+					throw new StreamingError("Not implemented: only message output is supported in streaming mode.");
+				}
+			}
+			// Response completed event
+			responseObject.status = "completed";
+			if (usage) {
+				responseObject.usage = {
+					input_tokens: usage.prompt_tokens,
+					input_tokens_details: { cached_tokens: 0 },
+					output_tokens: usage.completion_tokens,
+					output_tokens_details: { reasoning_tokens: 0 },
+					total_tokens: usage.total_tokens,
+				};
+			}
+			emitEvent({
+				type: "response.completed",
+				response: responseObject as Response,
+				sequence_number: sequenceNumber++,
+			});
+		} catch (streamError) {
+			console.error("Error in streaming chat completion:", streamError);
+			let message = "An error occurred while streaming from inference server.";
+			if (streamError instanceof StreamingError) {
+				message = streamError.message;
+			} else if (
+				typeof streamError === "object" &&
+				streamError &&
+				"message" in streamError &&
+				typeof streamError.message === "string"
+			) {
+				message = streamError.message;
+			}
+			responseObject.status = "failed";
+			responseObject.error = {
+				code: "server_error",
+				message,
+			};
+			emitEvent({
+				type: "response.failed",
+				response: responseObject as Response,
+				sequence_number: sequenceNumber++,
+			});
+		}
+		res.end();
+		return;
+	}
+	try {
+		const chatCompletionResponse = await client.chatCompletion(payload);
+		responseObject.status = "completed";
+		for (const choice of chatCompletionResponse.choices) {
+			if (choice.message.content) {
+				responseObject.output.push({
+					id: generateUniqueId("msg"),
+					type: "message",
+					role: "assistant",
+					status: "completed",
+					content: [
+						{
+							type: "output_text",
+							text: choice.message.content,
+							annotations: [],
+						},
+					],
+				});
+			}
+			if (choice.message.tool_calls) {
+				for (const toolCall of choice.message.tool_calls) {
+					if (toolCall.function.name in mcpToolsMapping) {
+						const toolParams = mcpToolsMapping[toolCall.function.name];
+						// Check if approval is required
+						const approvalRequired =
+							toolParams.require_approval === "always"
+								? true
+								: toolParams.require_approval === "never"
+									? false
+									: toolParams.require_approval.always?.tool_names?.includes(toolCall.function.name)
+										? true
+										: toolParams.require_approval.never?.tool_names?.includes(toolCall.function.name)
+											? false
+											: true; // behavior is undefined in specs, let's default to
+						if (approvalRequired) {
+							// TODO: Implement approval logic
+							console.log(`Requesting approval for MCP tool '${toolCall.function.name}'`);
+							responseObject.output.push({
+								type: "mcp_approval_request",
+								id: generateUniqueId("mcp_approval_request"),
+								name: toolCall.function.name,
+								server_label: toolParams.server_label,
+								arguments: toolCall.function.arguments,
+							});
+						} else {
+							responseObject.output.push(
+								await callMcpTool(
+									toolParams,
+									toolCall.function.name,
+									toolParams.server_label,
+									toolCall.function.arguments
+								)
+							);
+						}
+					} else {
+						responseObject.output.push({
+							type: "function_call",
+							id: generateUniqueId("fc"),
+							call_id: toolCall.id,
+							name: toolCall.function.name,
+							arguments: toolCall.function.arguments,
+							status: "completed",
+						});
+					}
+				}
+			}
+		}
+		responseObject.usage = {
+			input_tokens: chatCompletionResponse.usage.prompt_tokens,
+			input_tokens_details: { cached_tokens: 0 },
+			output_tokens: chatCompletionResponse.usage.completion_tokens,
+			output_tokens_details: { reasoning_tokens: 0 },
+			total_tokens: chatCompletionResponse.usage.total_tokens,
+		};
+		res.json(responseObject);
+	} catch (error) {
+		console.error(error);
+		res.status(500).json({
+			success: false,
+			error: error instanceof Error ? error.message : "Unknown error",
+		});
+	}
+};

src/schemas.ts ADDED Viewed

	@@ -0,0 +1,217 @@

+import { z } from "zod";
+/**
+ * https://platform.openai.com/docs/api-reference/responses/create
+ * commented out properties are not supported by the server
+ */
+const inputContentSchema = z.array(
+	z.union([
+		z.object({
+			type: z.literal("input_text"),
+			text: z.string(),
+		}),
+		z.object({
+			type: z.literal("input_image"),
+			// file_id: z.string().nullable().default(null),
+			image_url: z.string(),
+			// detail: z.enum(["auto", "low", "high"]).default("auto"),
+		}),
+		// z.object({
+		// 	type: z.literal("input_file"),
+		// 	file_data: z.string().nullable().default(null),
+		// 	file_id: z.string().nullable().default(null),
+		// 	filename: z.string().nullable().default(null),
+		// }),
+	])
+);
+const mcpServerParamsSchema = z.object({
+	server_label: z.string(),
+	server_url: z.string(),
+	type: z.literal("mcp"),
+	allowed_tools: z
+		.union([
+			z.array(z.string()),
+			z.object({
+				tool_names: z.array(z.string()),
+			}),
+		])
+		.nullable()
+		.default(null),
+	headers: z.record(z.string()).nullable().default(null),
+	require_approval: z
+		.union([
+			z.enum(["always", "never"]),
+			z.object({
+				always: z.object({ tool_names: z.array(z.string()).optional() }).optional(),
+				never: z.object({ tool_names: z.array(z.string()).optional() }).optional(),
+			}),
+		])
+		.default("always"),
+});
+const mcpApprovalRequestParamsSchema = z.object({
+	type: z.literal("mcp_approval_request"),
+	id: z.string(),
+	server_label: z.string(),
+	name: z.string(),
+	arguments: z.string(),
+});
+const mcpApprovalResponseParamsSchema = z.object({
+	type: z.literal("mcp_approval_response"),
+	id: z.string().optional(),
+	approval_request_id: z.string(),
+	approve: z.boolean(),
+	reason: z.string().optional(),
+});
+export const createResponseParamsSchema = z.object({
+	// background: z.boolean().default(false),
+	// include:
+	input: z.union([
+		z.string(),
+		z.array(
+			z.union([
+				z.object({
+					content: z.union([z.string(), inputContentSchema]),
+					role: z.enum(["user", "assistant", "system", "developer"]),
+					type: z.enum(["message"]).default("message"),
+				}),
+				z.object({
+					role: z.enum(["user", "system", "developer"]),
+					status: z.enum(["in_progress", "completed", "incomplete"]).nullable().default(null),
+					content: inputContentSchema,
+					type: z.enum(["message"]).default("message"),
+				}),
+				z.object({
+					id: z.string().optional(),
+					role: z.enum(["assistant"]),
+					status: z.enum(["in_progress", "completed", "incomplete"]).optional(),
+					type: z.enum(["message"]).default("message"),
+					content: z.array(
+						z.union([
+							z.object({
+								type: z.literal("output_text"),
+								text: z.string(),
+								annotations: z.array(z.object({})).optional(), // TODO: incomplete
+								logprobs: z.array(z.object({})).optional(), // TODO: incomplete
+							}),
+							z.object({
+								type: z.literal("refusal"),
+								refusal: z.string(),
+							}),
+							// TODO: much more objects: File search tool call, Computer tool call, Computer tool call output, Web search tool call, Function tool call, Function tool call output, Reasoning, Image generation call, Code interpreter tool call, Local shell call, Local shell call output, MCP list tools, MCP approval request, MCP approval response, MCP tool call
+						])
+					),
+				}),
+				z.object({
+					type: z.literal("function_call"),
+					id: z.string().optional(),
+					call_id: z.string(),
+					name: z.string(),
+					arguments: z.string(),
+					status: z.enum(["in_progress", "completed", "incomplete"]).optional(),
+				}),
+				z.object({
+					call_id: z.string(),
+					output: z.string(),
+					type: z.literal("function_call_output"),
+					id: z.string().optional(),
+					status: z.enum(["in_progress", "completed", "incomplete"]),
+				}),
+				z.object({
+					type: z.literal("mcp_list_tools"),
+					id: z.string(),
+					server_label: z.string(),
+					tools: z.array(
+						z.object({
+							name: z.string(),
+							input_schema: z.record(z.any()),
+							description: z.string().nullable().optional(),
+							annotations: z.object({}).optional(),
+						})
+					),
+					error: z.string().nullable().optional(),
+				}),
+				mcpApprovalRequestParamsSchema,
+				mcpApprovalResponseParamsSchema,
+			])
+		),
+	]),
+	instructions: z.string().nullable().default(null),
+	max_output_tokens: z.number().int().min(0).nullable().default(null),
+	// max_tool_calls: z.number().min(0).nullable().default(null),
+	metadata: z
+		.record(z.string().max(64), z.string().max(512))
+		.refine((val) => Object.keys(val).length <= 16, {
+			message: "Must have at most 16 items",
+		})
+		.nullable()
+		.default(null),
+	model: z.string(),
+	// parallel_tool_calls: z.boolean().default(true), // TODO: how to handle this if chat completion doesn't?
+	// previous_response_id: z.string().nullable().default(null),
+	// reasoning: z.object({
+	// 	effort: z.enum(["low", "medium", "high"]).default("medium"),
+	// 	summary: z.enum(["auto", "concise", "detailed"]).nullable().default(null),
+	// }),
+	// store: z.boolean().default(true),
+	stream: z.boolean().default(false),
+	temperature: z.number().min(0).max(2).default(1),
+	text: z
+		.object({
+			format: z.union([
+				z.object({
+					type: z.literal("text"),
+				}),
+				z.object({
+					type: z.literal("json_object"),
+				}),
+				z.object({
+					type: z.literal("json_schema"),
+					name: z
+						.string()
+						.max(64, "Must be at most 64 characters")
+						.regex(/^[a-zA-Z0-9_-]+$/, "Only letters, numbers, underscores, and dashes are allowed"),
+					description: z.string().optional(),
+					schema: z.record(z.any()),
+					strict: z.boolean().default(false),
+				}),
+			]),
+		})
+		.optional(),
+	tool_choice: z
+		.union([
+			z.enum(["auto", "none", "required"]),
+			z.object({
+				type: z.enum(["function"]),
+				name: z.string(),
+			}),
+			// TODO: also hosted tool and MCP tool
+		])
+		.optional(),
+	tools: z
+		.array(
+			z.union([
+				z.object({
+					name: z.string(),
+					parameters: z.record(z.any()),
+					strict: z.boolean().default(true),
+					type: z.enum(["function"]),
+					description: z.string().optional(),
+				}),
+				mcpServerParamsSchema,
+			])
+		)
+		.optional(),
+	// top_logprobs: z.number().min(0).max(20).nullable().default(null),
+	top_p: z.number().min(0).max(1).default(1),
+	// truncation: z.enum(["auto", "disabled"]).default("disabled"),
+	// user
+});
+export type CreateResponseParams = z.infer<typeof createResponseParamsSchema>;
+export type McpServerParams = z.infer<typeof mcpServerParamsSchema>;
+export type McpApprovalRequestParams = z.infer<typeof mcpApprovalRequestParamsSchema>;
+export type McpApprovalResponseParams = z.infer<typeof mcpApprovalResponseParamsSchema>;

src/server.ts ADDED Viewed

	@@ -0,0 +1,20 @@

+import express, { type Express } from "express";
+import { createResponseParamsSchema } from "./schemas.js";
+import { validateBody } from "./middleware/validation.js";
+import { requestLogger } from "./middleware/logging.js";
+import { getLandingPageHtml, postCreateResponse } from "./routes/index.js";
+export const createApp = (): Express => {
+	const app: Express = express();
+	// Middleware
+	app.use(requestLogger());
+	app.use(express.json());
+	// Routes
+	app.get("/", getLandingPageHtml);
+	app.post("/v1/responses", validateBody(createResponseParamsSchema), postCreateResponse);
+	return app;
+};

tsconfig.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+	"compilerOptions": {
+		"allowSyntheticDefaultImports": true,
+		"lib": ["ES2022", "DOM"],
+		"module": "CommonJS",
+		"moduleResolution": "node",
+		"target": "ES2022",
+		"forceConsistentCasingInFileNames": true,
+		"strict": true,
+		"noImplicitAny": true,
+		"strictNullChecks": true,
+		"skipLibCheck": true,
+		"noImplicitOverride": true,
+		"outDir": "./dist",
+		"declaration": true,
+		"declarationMap": true,
+		"resolveJsonModule": true
+	},
+	"include": ["src", "test"],
+	"exclude": ["dist"]
+}