Wauplin HF Staff commited on
Commit
3d97d52
·
verified ·
1 Parent(s): fc023ef

Upload folder using huggingface_hub

Browse files
Dockerfile ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---- Build Stage ----
2
+ FROM node:18-alpine AS builder
3
+
4
+ # Install pnpm as root
5
+ RUN corepack enable && corepack prepare [email protected] --activate
6
+
7
+ USER node
8
+ ENV HOME=/home/node \
9
+ PATH=/home/node/.local/bin:$PATH
10
+
11
+ WORKDIR $HOME/app
12
+
13
+ # Install dependencies and build
14
+ COPY --chown=node package.json pnpm-lock.yaml* ./
15
+ COPY --chown=node tsconfig.json ./
16
+ COPY --chown=node src ./src
17
+ RUN pnpm install --frozen-lockfile
18
+ RUN pnpm run build
19
+ RUN chown -R node:node $HOME/app
20
+
21
+ # ---- Production Stage ----
22
+ FROM node:18-alpine AS runner
23
+
24
+ # No need to install pnpm here, just switch to node user
25
+ USER node
26
+ ENV HOME=/home/node \
27
+ PATH=/home/node/.local/bin:$PATH
28
+
29
+ # Create app directory
30
+ WORKDIR $HOME/app
31
+
32
+ # Copy only necessary files from builder
33
+ COPY --chown=node --from=builder /home/node/app/dist ./dist
34
+ COPY --chown=node --from=builder /home/node/app/package.json ./
35
+ COPY --chown=node --from=builder /home/node/app/node_modules ./node_modules
36
+
37
+ # Use a non-root user for security
38
+ EXPOSE 3000
39
+
40
+ CMD ["node", "dist/index.js"]
README.md CHANGED
@@ -1,12 +1,182 @@
1
  ---
2
  title: Responses.js
3
- emoji: 🦀
4
  colorFrom: red
5
- colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 5.35.0
8
- app_file: app.py
9
  pinned: false
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Responses.js
3
+ emoji: 😻
4
  colorFrom: red
5
+ colorTo: red
6
+ sdk: docker
 
 
7
  pinned: false
8
+ license: mit
9
+ short_description: Check out https://github.com/huggingface/responses.js
10
+ app_port: 3000
11
  ---
12
 
13
+
14
+ # responses.js
15
+
16
+ A lightweight Express.js server that implements OpenAI's Responses API, built on top of Chat Completions and powered by Hugging Face Inference Providers.
17
+
18
+ ## ✨ Features
19
+
20
+ - **ResponsesAPI**: Partial implementation of [OpenAI's Responses API](https://platform.openai.com/docs/api-reference/responses), on top of Chat Completion API
21
+ - **Inference Providers**: Powered by Hugging Face Inference Providers
22
+ - **Streaming Support**: Support for streamed responses
23
+ - **Structured Output**: Support for structured data responses (e.g. jsonschema)
24
+ - **Function Calling**: Tool and function calling capabilities
25
+ - **Multi-modal Input**: Text and image input support
26
+ - **Demo UI**: Interactive web interface for testing
27
+
28
+ Not implemented: remote function calling, MCP server, file upload, stateful API, etc.
29
+
30
+ ## 🚀 Quick Start
31
+
32
+ ### Prerequisites
33
+
34
+ - Node.js (v18 or higher)
35
+ - pnpm (recommended) or npm
36
+ - an Hugging Face token with inference permissions. Create one from your [user settings](https://huggingface.co/settings/tokens).
37
+
38
+ ### Installation & Setup
39
+
40
+ ```bash
41
+ # Clone the repository
42
+ git clone https://github.com/huggingface/responses.js.git
43
+ cd responses.js
44
+
45
+ # Install dependencies
46
+ pnpm install
47
+
48
+ # Start the development server
49
+ pnpm dev
50
+ ```
51
+
52
+ The server will be available at `http://localhost:3000`.
53
+
54
+ ### Running Examples
55
+
56
+ Explore the various capabilities with our example scripts located in the [./examples](./examples) folder:
57
+
58
+ ```bash
59
+ # Basic text input
60
+ pnpm run example text
61
+
62
+ # Multi-turn conversations
63
+ pnpm run example multi_turn
64
+
65
+ # Text + image input
66
+ pnpm run example image
67
+
68
+ # Streaming responses
69
+ pnpm run example streaming
70
+
71
+ # Structured output
72
+ pnpm run example structured_output
73
+ pnpm run example structured_output_streaming
74
+
75
+ # Function calling
76
+ pnpm run example function
77
+ pnpm run example function_streaming
78
+ ```
79
+
80
+ ### Interactive Demo UI
81
+
82
+ Experience the API through our interactive web interface, adapted from the [openai-responses-starter-app](https://github.com/openai/openai-responses-starter-app).
83
+
84
+ [![Demo Video](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/responses.js/demo_mini.png)](https://youtu.be/F-tAUnW-nd0)
85
+
86
+
87
+ #### Setup
88
+
89
+ 1. Create a configuration file:
90
+
91
+ ```bash
92
+ # Create demo/.env
93
+ cat > demo/.env << EOF
94
+ MODEL="cohere@CohereLabs/c4ai-command-a-03-2025"
95
+ OPENAI_BASE_URL=http://localhost:3000/v1
96
+ OPENAI_API_KEY=${HF_TOKEN:-<your-huggingface-token>}
97
+ EOF
98
+ ```
99
+
100
+ 2. Install demo dependencies:
101
+
102
+ ```bash
103
+ pnpm demo:install
104
+ ```
105
+
106
+ 3. Launch the demo:
107
+
108
+ ```bash
109
+ pnpm demo:dev
110
+ ```
111
+
112
+ The demo will be available at `http://localhost:3001`.
113
+
114
+ ## 🐳 Running with Docker
115
+
116
+ You can run the server in a production-ready container using Docker.
117
+
118
+ ### Build the Docker image
119
+
120
+ ```bash
121
+ docker build -t responses.js .
122
+ ```
123
+
124
+ ### Run the server
125
+
126
+ ```bash
127
+ docker run -p 3000:3000 responses.js
128
+ ```
129
+
130
+ The server will be available at `http://localhost:3000`.
131
+
132
+ ## 📁 Project Structure
133
+
134
+ ```
135
+ responses.js/
136
+ ├── demo/ # Interactive chat UI demo
137
+ ├── examples/ # Example scripts using openai-node client
138
+ ├── src/
139
+ │ ├── index.ts # Application entry point
140
+ │ ├── server.ts # Express app configuration and route definitions
141
+ │ ├── routes/ # API route implementations
142
+ │ ├── middleware/ # Middleware (validation, logging, etc.)
143
+ │ └── schemas/ # Zod validation schemas
144
+ ├── scripts/ # Utility and build scripts
145
+ ├── package.json # Package configuration and dependencies
146
+ └── README.md # This file
147
+ ```
148
+
149
+ ## 🛣️ Done / TODOs
150
+
151
+ > **Note**: This project is in active development. The roadmap below represents our current priorities and may evolve. Do not take anything for granted.
152
+
153
+ - [x] OpenAI types integration for consistent output
154
+ - [x] Streaming mode support
155
+ - [x] Structured output capabilities
156
+ - [x] Function calling implementation
157
+ - [x] Repository migration to dedicated responses.js repo
158
+ - [x] Basic development tooling setup
159
+ - [x] Demo application with comprehensive instructions
160
+ - [x] Multi-turn conversation fixes for text messages + tool calls
161
+ - [x] Correctly return "usage" field
162
+ - [x] MCP support (non-streaming)
163
+ - [ ] MCP support (streaming)
164
+ - [ ] Tools execution (web search, file search, image generation, code interpreter)
165
+ - [ ] Background mode support
166
+ - [ ] Additional API routes (GET, DELETE, CANCEL, LIST responses)
167
+ - [ ] Reasoning capabilities
168
+
169
+ ## 🤝 Contributing
170
+
171
+ We welcome contributions! Please feel free to submit issues, feature requests, or pull requests.
172
+
173
+ ## 📄 License
174
+
175
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
176
+
177
+ ## 🙏 Acknowledgments
178
+
179
+ - Based on OpenAI's [Responses API specification](https://platform.openai.com/docs/api-reference/responses)
180
+ - Built on top of [OpenAI's nodejs client](https://github.com/openai/openai-node)
181
+ - Demo UI adapted from [openai-responses-starter-app](https://github.com/openai/openai-responses-starter-app)
182
+ - Built on top of [Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers/index)
package.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "@huggingface/responses.js",
3
+ "packageManager": "[email protected]",
4
+ "version": "0.1.0",
5
+ "type": "module",
6
+ "description": "Server for handling AI responses",
7
+ "repository": "https://github.com/huggingface/huggingface.js.git",
8
+ "publishConfig": {
9
+ "access": "public"
10
+ },
11
+ "main": "./dist/index.js",
12
+ "module": "./dist/index.mjs",
13
+ "types": "./dist/index.d.ts",
14
+ "exports": {
15
+ ".": {
16
+ "types": "./dist/index.d.ts",
17
+ "require": "./dist/index.js",
18
+ "import": "./dist/index.mjs"
19
+ }
20
+ },
21
+ "engines": {
22
+ "node": ">=18"
23
+ },
24
+ "source": "index.ts",
25
+ "scripts": {
26
+ "build": "tsup src/*.ts --format cjs,esm --clean && tsc --emitDeclarationOnly --declaration",
27
+ "check": "tsc",
28
+ "dev": "tsx watch src/index.ts",
29
+ "format": "prettier --write .",
30
+ "format:check": "prettier --check .",
31
+ "lint": "eslint --quiet --fix --ext .cjs,.ts .",
32
+ "lint:check": "eslint --ext .cjs,.ts .",
33
+ "prepublishOnly": "pnpm run build",
34
+ "prepare": "pnpm run build",
35
+ "start": "node dist/index.js",
36
+ "example": "node examples/_run.js",
37
+ "demo:build": "cd demo && npm run build",
38
+ "demo:dev": "cd demo && npm run dev",
39
+ "demo:install": "cd demo && npm install",
40
+ "demo:lint": "cd demo && npm run lint",
41
+ "demo:format": "cd demo && npm run format",
42
+ "demo:start": "cd demo && npm run start",
43
+ "deploy:spaces": "./push_to_space.sh"
44
+ },
45
+ "files": [
46
+ "src",
47
+ "dist",
48
+ "tsconfig.json"
49
+ ],
50
+ "keywords": [
51
+ "huggingface",
52
+ "ai",
53
+ "llm",
54
+ "responses-api",
55
+ "server"
56
+ ],
57
+ "author": "Hugging Face",
58
+ "license": "MIT",
59
+ "dependencies": {
60
+ "@huggingface/inference": "^4.3.1",
61
+ "@huggingface/tasks": "^0.19.22",
62
+ "@modelcontextprotocol/sdk": "^1.15.0",
63
+ "express": "^4.21.2",
64
+ "openai": "^5.8.2",
65
+ "zod": "^3.25.71"
66
+ },
67
+ "devDependencies": {
68
+ "@eslint/js": "^9.30.1",
69
+ "@types/express": "^4.17.23",
70
+ "@typescript-eslint/eslint-plugin": "^8.35.1",
71
+ "@typescript-eslint/parser": "^8.35.1",
72
+ "eslint": "^9.30.1",
73
+ "eslint-config-prettier": "^10.1.5",
74
+ "eslint-plugin-prettier": "^5.5.1",
75
+ "prettier": "^3.6.2",
76
+ "tsup": "^8.5.0",
77
+ "tsx": "^4.20.3",
78
+ "typescript": "^5.8.3"
79
+ }
80
+ }
pnpm-lock.yaml ADDED
The diff for this file is too large to render. See raw diff
 
src/index.ts ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { createApp } from "./server.js";
2
+
3
+ const app = createApp();
4
+ const port = process.env.PORT || 3000;
5
+
6
+ // Start server
7
+ app.listen(port, () => {
8
+ console.log(`🚀 Server started at ${new Date().toISOString()}`);
9
+ console.log(`🌐 Server is running on http://localhost:${port}`);
10
+ console.log("─".repeat(60));
11
+ });
12
+
13
+ // Graceful shutdown logging
14
+ process.on("SIGINT", () => {
15
+ console.log("─".repeat(60));
16
+ console.log(`🛑 Server shutting down at ${new Date().toISOString()}`);
17
+ process.exit(0);
18
+ });
19
+
20
+ process.on("SIGTERM", () => {
21
+ console.log("─".repeat(60));
22
+ console.log(`🛑 Server shutting down at ${new Date().toISOString()}`);
23
+ process.exit(0);
24
+ });
25
+
26
+ export default app;
src/lib/McpResultFormatter.ts ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Vendored from `@huggingface/mcp-client`
3
+ *
4
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/mcp-client/src/ResultFormatter.ts
5
+ */
6
+
7
+ import type {
8
+ TextResourceContents,
9
+ BlobResourceContents,
10
+ CompatibilityCallToolResult,
11
+ } from "@modelcontextprotocol/sdk/types";
12
+
13
+ /**
14
+ * A utility class for formatting CallToolResult contents into human-readable text.
15
+ * Processes different content types, extracts text, and summarizes binary data.
16
+ */
17
+ export class McpResultFormatter {
18
+ /**
19
+ * Formats a CallToolResult's contents into a single string.
20
+ * - Text content is included directly
21
+ * - Binary content (images, audio, blobs) is summarized
22
+ *
23
+ * @param result The CallToolResult to format
24
+ * @returns A human-readable string representation of the result contents
25
+ */
26
+ static format(result: CompatibilityCallToolResult): string {
27
+ if (!result.content || !Array.isArray(result.content) || result.content.length === 0) {
28
+ return "[No content]";
29
+ }
30
+
31
+ const formattedParts: string[] = [];
32
+
33
+ for (const item of result.content) {
34
+ switch (item.type) {
35
+ case "text":
36
+ // Extract text content directly
37
+ formattedParts.push(item.text);
38
+ break;
39
+
40
+ case "image": {
41
+ // Summarize image content
42
+ const imageSize = this.getBase64Size(item.data);
43
+ formattedParts.push(
44
+ `[Binary Content: Image ${item.mimeType}, ${imageSize} bytes]\nThe task is complete and the content accessible to the User`
45
+ );
46
+ break;
47
+ }
48
+
49
+ case "audio": {
50
+ // Summarize audio content
51
+ const audioSize = this.getBase64Size(item.data);
52
+ formattedParts.push(
53
+ `[Binary Content: Audio ${item.mimeType}, ${audioSize} bytes]\nThe task is complete and the content accessible to the User`
54
+ );
55
+ break;
56
+ }
57
+
58
+ case "resource":
59
+ // Handle embedded resources - explicitly type the resource
60
+ if ("text" in item.resource) {
61
+ // It's a text resource with a text property
62
+ const textResource = item.resource as TextResourceContents;
63
+ formattedParts.push(textResource.text);
64
+ } else if ("blob" in item.resource) {
65
+ // It's a binary resource with a blob property
66
+ const blobResource = item.resource as BlobResourceContents;
67
+ const blobSize = this.getBase64Size(blobResource.blob);
68
+ const uri = blobResource.uri ? ` (${blobResource.uri})` : "";
69
+ const mimeType = blobResource.mimeType ? blobResource.mimeType : "unknown type";
70
+ formattedParts.push(
71
+ `[Binary Content${uri}: ${mimeType}, ${blobSize} bytes]\nThe task is complete and the content accessible to the User`
72
+ );
73
+ }
74
+ break;
75
+ }
76
+ }
77
+
78
+ return formattedParts.join("\n");
79
+ }
80
+
81
+ /**
82
+ * Calculates the approximate size in bytes of base64-encoded data
83
+ */
84
+ private static getBase64Size(base64: string): number {
85
+ // Remove base64 header if present (e.g., data:image/png;base64,)
86
+ const cleanBase64 = base64.includes(",") ? base64.split(",")[1] : base64;
87
+
88
+ // Calculate size: Base64 encodes 3 bytes into 4 characters
89
+ const padding = cleanBase64.endsWith("==") ? 2 : cleanBase64.endsWith("=") ? 1 : 0;
90
+ return Math.floor((cleanBase64.length * 3) / 4 - padding);
91
+ }
92
+ }
src/lib/generateUniqueId.ts ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * AI-generated file using Cursor + Claude 4
3
+ *
4
+ * Generate a unique ID for the response
5
+ */
6
+ import { randomBytes } from "crypto";
7
+
8
+ export function generateUniqueId(prefix?: string): string {
9
+ const id = randomBytes(24).toString("hex");
10
+ return prefix ? `${prefix}_${id}` : id;
11
+ }
src/mcp.ts ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { Client } from "@modelcontextprotocol/sdk/client/index.js";
2
+ import { SSEClientTransport } from "@modelcontextprotocol/sdk/client/sse.js";
3
+ import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js";
4
+ import { version as packageVersion } from "../package.json";
5
+ import { URL } from "url";
6
+
7
+ import type { McpServerParams } from "./schemas";
8
+ import { McpResultFormatter } from "./lib/McpResultFormatter";
9
+ import { generateUniqueId } from "./lib/generateUniqueId";
10
+ import type { ResponseOutputItem } from "openai/resources/responses/responses";
11
+
12
+ export async function connectMcpServer(mcpServer: McpServerParams): Promise<Client> {
13
+ const mcp = new Client({ name: "@huggingface/responses.js", version: packageVersion });
14
+
15
+ // Try to connect with http first, if that fails, try sse
16
+ const url = new URL(mcpServer.server_url);
17
+ const options = {
18
+ requestInit: mcpServer.headers
19
+ ? {
20
+ headers: mcpServer.headers,
21
+ }
22
+ : undefined,
23
+ };
24
+ try {
25
+ const transport = new StreamableHTTPClientTransport(url, options);
26
+ await mcp.connect(transport);
27
+ } catch {
28
+ const transport = new SSEClientTransport(url, options);
29
+ await mcp.connect(transport);
30
+ }
31
+
32
+ console.log("Connected to MCP server", mcpServer.server_url);
33
+
34
+ return mcp;
35
+ }
36
+
37
+ export async function callMcpTool(
38
+ mcpServer: McpServerParams,
39
+ toolName: string,
40
+ server_label: string,
41
+ argumentsString: string
42
+ ): Promise<ResponseOutputItem> {
43
+ try {
44
+ const client = await connectMcpServer(mcpServer);
45
+ const toolArgs: Record<string, unknown> = argumentsString === "" ? {} : JSON.parse(argumentsString);
46
+ console.log(`Calling MCP tool '${toolName}'`);
47
+ const toolResponse = await client.callTool({ name: toolName, arguments: toolArgs });
48
+ const formattedResult = McpResultFormatter.format(toolResponse);
49
+ return {
50
+ type: "mcp_call",
51
+ id: generateUniqueId("mcp_call"),
52
+ name: toolName,
53
+ server_label: server_label,
54
+ arguments: argumentsString,
55
+ output: formattedResult,
56
+ };
57
+ } catch (error) {
58
+ const errorMessage =
59
+ error instanceof Error ? error.message : typeof error === "string" ? error : JSON.stringify(error);
60
+ return {
61
+ type: "mcp_call",
62
+ id: generateUniqueId("mcp_call"),
63
+ name: toolName,
64
+ server_label: server_label,
65
+ arguments: argumentsString,
66
+ error: errorMessage,
67
+ };
68
+ }
69
+ }
src/middleware/logging.ts ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * AI-generated file using Cursor + Claude 4
3
+ *
4
+ * Middleware to log all HTTP requests with duration, status code, method, and route
5
+ */
6
+ import { type Request, type Response, type NextFunction } from "express";
7
+
8
+ interface LogContext {
9
+ timestamp: string;
10
+ method: string;
11
+ url: string;
12
+ statusCode?: number;
13
+ duration?: number;
14
+ }
15
+
16
+ function formatLogMessage(context: LogContext): string {
17
+ const { timestamp, method, url, statusCode, duration } = context;
18
+
19
+ if (statusCode === undefined) {
20
+ return `[${timestamp}] 📥 ${method} ${url}`;
21
+ }
22
+
23
+ const statusEmoji =
24
+ statusCode >= 200 && statusCode < 300
25
+ ? "✅"
26
+ : statusCode >= 400 && statusCode < 500
27
+ ? "⚠️"
28
+ : statusCode >= 500
29
+ ? "❌"
30
+ : "ℹ️";
31
+ return `[${timestamp}] ${statusEmoji} ${statusCode} ${method} ${url} (${duration}ms)`;
32
+ }
33
+
34
+ /**
35
+ * Middleware to log all HTTP requests with duration, status code, method, and route
36
+ */
37
+ export function requestLogger() {
38
+ return (req: Request, res: Response, next: NextFunction): void => {
39
+ const startTime = Date.now();
40
+ const { method, url } = req;
41
+
42
+ // Log incoming request
43
+ console.log(
44
+ formatLogMessage({
45
+ timestamp: new Date().toISOString(),
46
+ method,
47
+ url,
48
+ })
49
+ );
50
+
51
+ // Listen for when the response finishes
52
+ res.on("finish", () => {
53
+ const duration = Date.now() - startTime;
54
+
55
+ console.log(
56
+ formatLogMessage({
57
+ timestamp: new Date().toISOString(),
58
+ method,
59
+ url,
60
+ statusCode: res.statusCode,
61
+ duration,
62
+ })
63
+ );
64
+ });
65
+
66
+ next();
67
+ };
68
+ }
src/middleware/validation.ts ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * AI-generated file using Cursor + Claude 4
3
+ */
4
+
5
+ import { type Request, type Response, type NextFunction } from "express";
6
+ import { z } from "zod";
7
+
8
+ /**
9
+ * Middleware to validate request body against a Zod schema
10
+ * @param schema - Zod schema to validate against
11
+ * @returns Express middleware function
12
+ */
13
+ export function validateBody<T extends z.ZodTypeAny>(schema: T) {
14
+ return (req: Request, res: Response, next: NextFunction): void => {
15
+ try {
16
+ const validatedBody = schema.parse(req.body);
17
+ req.body = validatedBody;
18
+ next();
19
+ } catch (error) {
20
+ if (error instanceof z.ZodError) {
21
+ console.log(req.body);
22
+ res.status(400).json({
23
+ success: false,
24
+ error: error.errors,
25
+ details: error.errors,
26
+ });
27
+ } else {
28
+ res.status(500).json({
29
+ success: false,
30
+ error: "Internal server error",
31
+ });
32
+ }
33
+ }
34
+ };
35
+ }
36
+
37
+ /**
38
+ * Type helper to create a properly typed request with validated body
39
+ */
40
+ export interface ValidatedRequest<T> extends Request {
41
+ body: T;
42
+ }
src/routes/index.ts ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ export { postCreateResponse } from "./responses.js";
2
+ export { getLandingPageHtml } from "./landingPageHtml.js";
src/routes/landingPageHtml.ts ADDED
@@ -0,0 +1,567 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { Request, Response } from "express";
2
+
3
+ export function getLandingPageHtml(req: Request, res: Response): void {
4
+ const baseUrl = `${req.protocol}://${req.get("host")}/v1`;
5
+ res.setHeader("Content-Type", "text/html; charset=utf-8");
6
+ res.send(`
7
+ <!DOCTYPE html>
8
+ <html lang="en">
9
+ <head>
10
+ <meta charset="UTF-8">
11
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
12
+ <title>responses.js – OpenAI-compatible Responses API</title>
13
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap" rel="stylesheet">
14
+ <style>
15
+ :root {
16
+ --primary: #2563eb;
17
+ --primary-dark: #1e40af;
18
+ --accent: #fbbf24;
19
+ --bg: #f8fafc;
20
+ --card-bg: #fff;
21
+ --border: #e5e7eb;
22
+ --text: #1e293b;
23
+ --muted: #64748b;
24
+ --radius: 14px;
25
+ --shadow: 0 4px 24px #0002;
26
+ }
27
+ html, body { height: 100%; }
28
+ body {
29
+ font-family: 'Inter', Arial, sans-serif;
30
+ background: var(--bg);
31
+ color: var(--text);
32
+ margin: 0;
33
+ min-height: 100vh;
34
+ display: flex;
35
+ flex-direction: column;
36
+ }
37
+ .sticky-header {
38
+ position: sticky;
39
+ top: 0;
40
+ z-index: 100;
41
+ background: linear-gradient(90deg, var(--primary) 0%, #60a5fa 100%);
42
+ color: #fff;
43
+ box-shadow: 0 2px 12px #0001;
44
+ }
45
+ .header-inner {
46
+ max-width: 1100px;
47
+ margin: 0 auto;
48
+ display: flex;
49
+ align-items: center;
50
+ justify-content: space-between;
51
+ padding: 1.5rem 1.5rem 1.2rem 1.5rem;
52
+ }
53
+ .header-title {
54
+ display: flex;
55
+ align-items: center;
56
+ gap: 0.8rem;
57
+ }
58
+ .header-title svg {
59
+ height: 2.2rem;
60
+ width: 2.2rem;
61
+ display: block;
62
+ }
63
+ .header-title h1 {
64
+ font-size: 2.1rem;
65
+ font-weight: 700;
66
+ margin: 0;
67
+ letter-spacing: -1px;
68
+ }
69
+ .github-btn {
70
+ background: #fff2;
71
+ color: #fff;
72
+ border: 1.5px solid #fff4;
73
+ border-radius: 8px;
74
+ padding: 0.6em 1.3em;
75
+ font-weight: 600;
76
+ font-size: 1.05em;
77
+ text-decoration: none;
78
+ display: flex;
79
+ align-items: center;
80
+ gap: 0.5em;
81
+ transition: background 0.2s, color 0.2s;
82
+ }
83
+ .github-btn:hover {
84
+ background: #fff;
85
+ color: var(--primary-dark);
86
+ }
87
+ main {
88
+ flex: 1;
89
+ max-width: 900px;
90
+ margin: 0 auto;
91
+ padding: 2.5rem 1.2rem 1.5rem 1.2rem;
92
+ display: flex;
93
+ flex-direction: column;
94
+ gap: 2.5rem;
95
+ }
96
+ .hero {
97
+ background: linear-gradient(120deg, #dbeafe 0%, #f0fdf4 100%);
98
+ border-radius: var(--radius);
99
+ box-shadow: var(--shadow);
100
+ padding: 2.5rem 2rem 2rem 2rem;
101
+ display: flex;
102
+ flex-direction: column;
103
+ align-items: center;
104
+ text-align: center;
105
+ position: relative;
106
+ overflow: hidden;
107
+ }
108
+ .hero h2 {
109
+ font-size: 2rem;
110
+ font-weight: 700;
111
+ margin: 0 0 0.7rem 0;
112
+ color: var(--primary-dark);
113
+ }
114
+ .hero p {
115
+ font-size: 1.18rem;
116
+ color: var(--muted);
117
+ margin: 0 0 1.5rem 0;
118
+ }
119
+ .api-endpoint-box {
120
+ background: #fff;
121
+ border: 2px solid var(--primary);
122
+ border-radius: 12px;
123
+ padding: 1.3rem 1.2rem 1.3rem 1.2rem;
124
+ margin: 1.5rem 0 1.5rem 0;
125
+ text-align: center;
126
+ font-size: 1.18rem;
127
+ box-shadow: 0 2px 8px #174ea610;
128
+ position: relative;
129
+ display: flex;
130
+ flex-direction: column;
131
+ align-items: center;
132
+ gap: 0.5em;
133
+ }
134
+ .api-endpoint-url {
135
+ display: inline-block;
136
+ background: #f1f5f9;
137
+ color: var(--primary-dark);
138
+ font-family: 'Fira Mono', 'Consolas', monospace;
139
+ font-size: 1.15em;
140
+ padding: 0.3em 0.7em;
141
+ border-radius: 6px;
142
+ border: 1px solid #cbd5e1;
143
+ margin: 0.5em 0 0.5em 0;
144
+ word-break: break-all;
145
+ }
146
+ .copy-endpoint-btn {
147
+ position: absolute;
148
+ top: 16px;
149
+ right: 16px;
150
+ background: var(--primary);
151
+ color: #fff;
152
+ border: none;
153
+ border-radius: 4px;
154
+ padding: 0.3em 1em;
155
+ font-size: 1em;
156
+ cursor: pointer;
157
+ opacity: 0.85;
158
+ transition: background 0.2s, opacity 0.2s;
159
+ z-index: 2;
160
+ }
161
+ .copy-endpoint-btn:hover { background: var(--primary-dark); opacity: 1; }
162
+ .copy-endpoint-btn.copied { background: #388e3c; color: #fff; opacity: 1; }
163
+ .cta {
164
+ margin: 1.5rem auto 0 auto;
165
+ background: var(--primary);
166
+ color: #fff;
167
+ text-decoration: none;
168
+ font-weight: bold;
169
+ padding: 1rem 2.5rem;
170
+ border-radius: 8px;
171
+ font-size: 1.2rem;
172
+ transition: background 0.2s;
173
+ box-shadow: 0 2px 8px #2563eb20;
174
+ display: inline-block;
175
+ }
176
+ .cta:hover { background: var(--primary-dark); }
177
+ .features {
178
+ display: grid;
179
+ grid-template-columns: repeat(2, 1fr); /* 2 columns for 2x2 grid */
180
+ gap: 1.5rem;
181
+ margin: 2rem 0 0 0;
182
+ }
183
+ .feature-card {
184
+ background: var(--card-bg);
185
+ border-radius: var(--radius);
186
+ box-shadow: 0 1px 6px #0001;
187
+ padding: 1.2rem 1.3rem;
188
+ border: 1.5px solid var(--border);
189
+ display: flex;
190
+ flex-direction: column;
191
+ align-items: flex-start;
192
+ gap: 0.5em;
193
+ min-height: 120px;
194
+ position: relative;
195
+ transition: box-shadow 0.2s, border 0.2s;
196
+ }
197
+ .feature-card:hover {
198
+ box-shadow: 0 4px 16px #2563eb22;
199
+ border: 1.5px solid var(--primary);
200
+ }
201
+ .feature-card b {
202
+ font-size: 1.08em;
203
+ color: var(--primary-dark);
204
+ }
205
+ .examples-section {
206
+ margin-top: 2.5rem;
207
+ }
208
+ .examples-tabs {
209
+ display: flex;
210
+ gap: 0.5em;
211
+ margin-bottom: 1.2em;
212
+ border-bottom: 2px solid #e5e7eb;
213
+ }
214
+ .examples-tab {
215
+ background: none;
216
+ border: none;
217
+ font-size: 1.08em;
218
+ font-weight: 600;
219
+ color: var(--muted);
220
+ padding: 0.7em 1.2em 0.5em 1.2em;
221
+ cursor: pointer;
222
+ border-radius: 8px 8px 0 0;
223
+ transition: color 0.2s, background 0.2s;
224
+ }
225
+ .examples-tab.active {
226
+ color: var(--primary-dark);
227
+ background: #fff;
228
+ border-bottom: 2px solid var(--primary);
229
+ }
230
+ .example-panel { display: none; }
231
+ .example-panel.active { display: block; }
232
+ pre {
233
+ background: #f4f4f8;
234
+ border-radius: 8px;
235
+ padding: 1.1rem 1rem 1.1rem 1rem;
236
+ overflow-x: auto;
237
+ font-size: 0.98rem;
238
+ position: relative;
239
+ margin: 0.5em 0 0.5em 0;
240
+ }
241
+ code {
242
+ font-family: 'Fira Mono', 'Consolas', monospace;
243
+ font-size: 1em;
244
+ background: none;
245
+ color: #222;
246
+ }
247
+ .copy-btn {
248
+ position: absolute;
249
+ top: 10px;
250
+ right: 10px;
251
+ background: #e0e4ea;
252
+ border: none;
253
+ border-radius: 4px;
254
+ padding: 0.2em 0.7em;
255
+ font-size: 0.95em;
256
+ color: var(--primary-dark);
257
+ cursor: pointer;
258
+ opacity: 0.7;
259
+ transition: opacity 0.2s, background 0.2s;
260
+ z-index: 2;
261
+ }
262
+ .copy-btn:hover { opacity: 1; background: #c9d3e6; }
263
+ .copy-btn.copied { color: #388e3c; background: #d0f5dd; opacity: 1; }
264
+ .more-info-footer {
265
+ background: #f1f5f9;
266
+ border-top: 1.5px solid #e5e7eb;
267
+ margin-top: 3rem;
268
+ padding: 2rem 1rem 1.5rem 1rem;
269
+ border-radius: 0 0 var(--radius) var(--radius);
270
+ text-align: center;
271
+ color: var(--muted);
272
+ font-size: 1.08em;
273
+ }
274
+ .more-info-footer ul {
275
+ list-style: none;
276
+ padding: 0;
277
+ margin: 0.5em 0 0 0;
278
+ display: flex;
279
+ flex-wrap: wrap;
280
+ gap: 1.5em;
281
+ justify-content: center;
282
+ }
283
+ .more-info-footer a {
284
+ color: var(--primary-dark);
285
+ text-decoration: none;
286
+ font-weight: 500;
287
+ transition: color 0.2s;
288
+ }
289
+ .more-info-footer a:hover { color: var(--primary); }
290
+ @media (max-width: 700px) {
291
+ .header-inner { flex-direction: column; align-items: flex-start; gap: 1.2em; }
292
+ .header-title h1 { font-size: 1.5rem; }
293
+ main { padding: 1.2rem; }
294
+ .hero { padding: 1.2rem 0.7rem 1.2rem 0.7rem; }
295
+ }
296
+ </style>
297
+ <!-- Prism.js for syntax highlighting -->
298
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/themes/prism.min.css">
299
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/prism.min.js"></script>
300
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/components/prism-javascript.min.js"></script>
301
+ <script>
302
+ function copyCode(btn) {
303
+ const pre = btn.parentElement;
304
+ const code = pre.querySelector('code');
305
+ if (!code) return;
306
+ const text = code.innerText;
307
+ navigator.clipboard.writeText(text).then(() => {
308
+ btn.textContent = 'Copied!';
309
+ btn.classList.add('copied');
310
+ setTimeout(() => {
311
+ btn.textContent = 'Copy';
312
+ btn.classList.remove('copied');
313
+ }, 1200);
314
+ });
315
+ }
316
+ function copyEndpointUrl(btn) {
317
+ const url = document.getElementById('api-endpoint-url').innerText;
318
+ navigator.clipboard.writeText(url).then(() => {
319
+ btn.textContent = 'Copied!';
320
+ btn.classList.add('copied');
321
+ setTimeout(() => {
322
+ btn.textContent = 'Copy';
323
+ btn.classList.remove('copied');
324
+ }, 1200);
325
+ });
326
+ }
327
+ // Tabs for examples
328
+ function showExampleTab(idx) {
329
+ document.querySelectorAll('.examples-tab').forEach((tab, i) => {
330
+ tab.classList.toggle('active', i === idx);
331
+ });
332
+ document.querySelectorAll('.example-panel').forEach((panel, i) => {
333
+ panel.classList.toggle('active', i === idx);
334
+ });
335
+ }
336
+ window.addEventListener('DOMContentLoaded', function() {
337
+ showExampleTab(0);
338
+ document.querySelectorAll('.examples-tab').forEach((tab, i) => {
339
+ tab.addEventListener('click', () => showExampleTab(i));
340
+ });
341
+ });
342
+ </script>
343
+ </head>
344
+ <body>
345
+ <header class="sticky-header">
346
+ <div class="header-inner">
347
+ <div class="header-title">
348
+ <img src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg" alt="Hugging Face Logo" style="height:4.0rem;width:4.0rem;display:block;"/>
349
+ <h1>responses.js</h1>
350
+ </div>
351
+ <a href="https://github.com/huggingface/responses.js" target="_blank" aria-label="GitHub Repository" class="github-btn">
352
+ <svg height="20" width="20" viewBox="0 0 16 16" fill="currentColor" style="display: block;"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.19 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"/></svg>
353
+ GitHub
354
+ </a>
355
+ </div>
356
+ </header>
357
+ <main>
358
+ <section class="hero">
359
+ <h2>OpenAI-compatible Responses API</h2>
360
+ <p><b>responses.js</b> is an open-source, lightweight server implementing OpenAI's Responses API, built on top of Chat Completions and powered by Hugging Face Inference Providers.</p>
361
+ <div class="api-endpoint-box">
362
+ <button class="copy-endpoint-btn" onclick="copyEndpointUrl(this)">Copy</button>
363
+ <div><b>API Endpoint:</b></div>
364
+ <span class="api-endpoint-url" id="api-endpoint-url">${baseUrl}/responses</span>
365
+ <div style="font-size:0.98em; color:#333; margin-top:0.5em;">Get started by sending requests to this endpoint</div>
366
+ </div>
367
+ <a class="cta" href="https://github.com/huggingface/responses.js" target="_blank">View on GitHub</a>
368
+ </section>
369
+ <section>
370
+ <div class="features">
371
+ <div class="feature-card">
372
+ <b>OpenAI-compatible</b><br>Stateless implementation of the <a href="https://platform.openai.com/docs/api-reference/responses" target="_blank">Responses API</a>
373
+ </div>
374
+ <div class="feature-card">
375
+ <b>Inference Providers</b><br>Powered by Hugging Face Inference Providers
376
+ </div>
377
+ <div class="feature-card">
378
+ <b>Multi-modal</b><br>Text and image input support
379
+ </div>
380
+ <div class="feature-card">
381
+ <b>Streaming, & Structured Output</b><br>Supports streaming, JSON schema, and function calling
382
+ </div>
383
+ </div>
384
+ </section>
385
+ <section class="examples-section">
386
+ <h2 style="color:var(--primary-dark);margin-bottom:1.2em;">Examples</h2>
387
+ <div class="examples-tabs">
388
+ <button class="examples-tab active" type="button">Text</button>
389
+ <button class="examples-tab" type="button">Text + Image Input</button>
390
+ <button class="examples-tab" type="button">Multi-turn</button>
391
+ <button class="examples-tab" type="button">Streaming</button>
392
+ <button class="examples-tab" type="button">Function Calling</button>
393
+ <button class="examples-tab" type="button">Structured Output</button>
394
+ </div>
395
+ <div class="example-panel active">
396
+ <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-js">import OpenAI from "openai";
397
+
398
+ const openai = new OpenAI({
399
+ baseURL: "http://localhost:3000/v1",
400
+ apiKey: "YOUR_API_KEY_HERE", // visit https://huggingface.co/settings/tokens
401
+ });
402
+
403
+ const response = await openai.responses.create({
404
+ model: "Qwen/Qwen2.5-VL-7B-Instruct",
405
+ instructions: "You are a helpful assistant.",
406
+ input: "Tell me a three sentence bedtime story about a unicorn.",
407
+ });
408
+
409
+ console.log(response);
410
+ console.log(response.output_text);</code></pre>
411
+ </div>
412
+ <div class="example-panel">
413
+ <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-js">import OpenAI from "openai";
414
+
415
+ const openai = new OpenAI({
416
+ baseURL: "${baseUrl}",
417
+ apiKey: "YOUR_API_KEY_HERE", // visit https://huggingface.co/settings/tokens
418
+ });
419
+
420
+ const response = await openai.responses.create({
421
+ model: "Qwen/Qwen2.5-VL-7B-Instruct",
422
+ input: [
423
+ {
424
+ role: "user",
425
+ content: [
426
+ { type: "input_text", text: "what is in this image?" },
427
+ {
428
+ type: "input_image",
429
+ image_url: "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
430
+ }
431
+ ]
432
+ }
433
+ ]
434
+ });
435
+
436
+ console.log(response);
437
+ console.log(response.output_text);</code></pre>
438
+ </div>
439
+ <div class="example-panel">
440
+ <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-js">import OpenAI from "openai";
441
+
442
+ const openai = new OpenAI({
443
+ baseURL: "http://localhost:3000/v1",
444
+ apiKey: "YOUR_API_KEY_HERE", // visit https://huggingface.co/settings/tokens
445
+ });
446
+ const response = await openai.responses.create({
447
+ model: "Qwen/Qwen2.5-VL-7B-Instruct",
448
+ input: [
449
+ {
450
+ role: "developer",
451
+ content: "Talk like a pirate.",
452
+ },
453
+ {
454
+ role: "user",
455
+ content: "Are semicolons optional in JavaScript?",
456
+ },
457
+ ],
458
+ });
459
+
460
+ console.log(response);
461
+ console.log(response.output_text);</code></pre>
462
+ </div>
463
+ <div class="example-panel">
464
+ <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-js">import { OpenAI } from "openai";
465
+ const openai = new OpenAI({
466
+ baseURL: "http://localhost:3000/v1",
467
+ apiKey: "YOUR_API_KEY_HERE", // visit https://huggingface.co/settings/tokens
468
+ });
469
+
470
+ const stream = await openai.responses.create({
471
+ model: "hyperbolic@Qwen/Qwen2.5-VL-7B-Instruct",
472
+ input: [
473
+ {
474
+ role: "user",
475
+ content: "Say 'double bubble bath' ten times fast.",
476
+ },
477
+ ],
478
+ stream: true,
479
+ });
480
+
481
+ for await (const event of stream) {
482
+ console.log(event);
483
+ }</code></pre>
484
+ </div>
485
+ <div class="example-panel">
486
+ <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-js">import OpenAI from "openai";
487
+
488
+ const openai = new OpenAI({
489
+ baseURL: "${baseUrl}",
490
+ apiKey: "YOUR_API_KEY_HERE", // visit https://huggingface.co/settings/tokens
491
+ });
492
+
493
+ const tools = [
494
+ {
495
+ type: "function",
496
+ name: "get_current_weather",
497
+ description: "Get the current weather in a given location",
498
+ parameters: {
499
+ type: "object",
500
+ properties: {
501
+ location: { type: "string", description: "The city and state, e.g. San Francisco, CA" },
502
+ unit: { type: "string", enum: ["celsius", "fahrenheit"] }
503
+ },
504
+ required: ["location", "unit"]
505
+ }
506
+ }
507
+ ];
508
+
509
+ const response = await openai.responses.create({
510
+ model: "cerebras@meta-llama/Llama-3.3-70B-Instruct",
511
+ tools: tools,
512
+ input: "What is the weather like in Boston today?",
513
+ tool_choice: "auto"
514
+ });
515
+
516
+ console.log(response);</code></pre>
517
+ </div>
518
+ <div class="example-panel">
519
+ <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button><code class="language-js">import OpenAI from "openai";
520
+ import { zodTextFormat } from "openai/helpers/zod";
521
+ import { z } from "zod";
522
+
523
+ const openai = new OpenAI({
524
+ baseURL: "http://localhost:3000/v1",
525
+ apiKey: "YOUR_API_KEY_HERE", // visit https://huggingface.co/settings/tokens
526
+ });
527
+
528
+ const Step = z.object({
529
+ explanation: z.string(),
530
+ output: z.string(),
531
+ });
532
+
533
+ const MathReasoning = z.object({
534
+ steps: z.array(Step),
535
+ final_answer: z.string(),
536
+ });
537
+
538
+ const response = await openai.responses.parse({
539
+ model: "novita@meta-llama/Meta-Llama-3-70B-Instruct",
540
+ input: [
541
+ {
542
+ role: "system",
543
+ content: "You are a helpful math tutor. Guide the user through the solution step by step.",
544
+ },
545
+ { role: "user", content: "how can I solve 8x + 7 = -23" },
546
+ ],
547
+ text: {
548
+ format: zodTextFormat(MathReasoning, "math_reasoning"),
549
+ },
550
+ });
551
+
552
+ console.log(response.output_parsed);</code></pre>
553
+ </div>
554
+ </section>
555
+ <footer class="more-info-footer">
556
+ <div style="font-weight:600; color:var(--primary-dark); font-size:1.13em; margin-bottom:0.5em;">More Info</div>
557
+ <ul>
558
+ <li><a href="https://github.com/huggingface/responses.js" target="_blank">GitHub Repository</a></li>
559
+ <li><a href="https://platform.openai.com/docs/api-reference/responses" target="_blank">OpenAI Responses API Docs</a></li>
560
+ <li><a href="https://huggingface.co/docs/inference-providers/index" target="_blank">Hugging Face Inference Providers</a></li>
561
+ </ul>
562
+ </footer>
563
+ </main>
564
+ </body>
565
+ </html>
566
+ `);
567
+ }
src/routes/responses.ts ADDED
@@ -0,0 +1,659 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { type Response as ExpressResponse } from "express";
2
+ import { type ValidatedRequest } from "../middleware/validation.js";
3
+ import type { CreateResponseParams, McpServerParams, McpApprovalRequestParams } from "../schemas.js";
4
+ import { generateUniqueId } from "../lib/generateUniqueId.js";
5
+ import { InferenceClient } from "@huggingface/inference";
6
+ import type {
7
+ ChatCompletionInputMessage,
8
+ ChatCompletionInputMessageChunkType,
9
+ ChatCompletionInput,
10
+ } from "@huggingface/tasks";
11
+
12
+ import type {
13
+ Response,
14
+ ResponseStreamEvent,
15
+ ResponseContentPartAddedEvent,
16
+ ResponseOutputMessage,
17
+ ResponseFunctionToolCall,
18
+ ResponseOutputItem,
19
+ } from "openai/resources/responses/responses";
20
+ import type {
21
+ ChatCompletionInputTool,
22
+ ChatCompletionStreamOutputUsage,
23
+ } from "@huggingface/tasks/dist/commonjs/tasks/chat-completion/inference.js";
24
+ import { callMcpTool, connectMcpServer } from "../mcp.js";
25
+
26
+ class StreamingError extends Error {
27
+ constructor(message: string) {
28
+ super(message);
29
+ this.name = "StreamingError";
30
+ }
31
+ }
32
+
33
+ export const postCreateResponse = async (
34
+ req: ValidatedRequest<CreateResponseParams>,
35
+ res: ExpressResponse
36
+ ): Promise<void> => {
37
+ const apiKey = req.headers.authorization?.split(" ")[1];
38
+
39
+ if (!apiKey) {
40
+ res.status(401).json({
41
+ success: false,
42
+ error: "Unauthorized",
43
+ });
44
+ return;
45
+ }
46
+
47
+ const client = new InferenceClient(apiKey);
48
+ const messages: ChatCompletionInputMessage[] = req.body.instructions
49
+ ? [{ role: "system", content: req.body.instructions }]
50
+ : [];
51
+
52
+ if (Array.isArray(req.body.input)) {
53
+ messages.push(
54
+ ...req.body.input
55
+ .map((item) => {
56
+ switch (item.type) {
57
+ case "function_call":
58
+ return {
59
+ // hacky but best fit for now
60
+ role: "assistant",
61
+ name: `function_call ${item.name} ${item.call_id}`,
62
+ content: item.arguments,
63
+ };
64
+ case "function_call_output":
65
+ return {
66
+ // hacky but best fit for now
67
+ role: "assistant",
68
+ name: `function_call_output ${item.call_id}`,
69
+ content: item.output,
70
+ };
71
+ case "message":
72
+ return {
73
+ role: item.role,
74
+ content:
75
+ typeof item.content === "string"
76
+ ? item.content
77
+ : item.content
78
+ .map((content) => {
79
+ switch (content.type) {
80
+ case "input_image":
81
+ return {
82
+ type: "image_url" as ChatCompletionInputMessageChunkType,
83
+ image_url: {
84
+ url: content.image_url,
85
+ },
86
+ };
87
+ case "output_text":
88
+ return content.text
89
+ ? {
90
+ type: "text" as ChatCompletionInputMessageChunkType,
91
+ text: content.text,
92
+ }
93
+ : undefined;
94
+ case "refusal":
95
+ return undefined;
96
+ case "input_text":
97
+ return {
98
+ type: "text" as ChatCompletionInputMessageChunkType,
99
+ text: content.text,
100
+ };
101
+ }
102
+ })
103
+ .filter((item) => item !== undefined),
104
+ };
105
+ case "mcp_list_tools": {
106
+ // Hacky: will be dropped by filter
107
+ return {
108
+ role: "assistant",
109
+ name: "mcp_list_tools",
110
+ content: "",
111
+ };
112
+ }
113
+ case "mcp_approval_request": {
114
+ return {
115
+ role: "assistant",
116
+ name: "mcp_approval_request",
117
+ content: `MCP approval request (${item.id}). Server: '${item.server_label}'. Tool: '${item.name}'. Arguments: '${item.arguments}'.`,
118
+ };
119
+ }
120
+ case "mcp_approval_response": {
121
+ return {
122
+ role: "assistant",
123
+ name: "mcp_approval_response",
124
+ content: `MCP approval response (${item.id}). Approved: ${item.approve}. Reason: ${item.reason}.`,
125
+ };
126
+ }
127
+ }
128
+ })
129
+ .filter((message) => message.content?.length !== 0)
130
+ );
131
+ } else {
132
+ messages.push({ role: "user", content: req.body.input });
133
+ }
134
+
135
+ const output: ResponseOutputItem[] = [];
136
+ let tools: ChatCompletionInputTool[] | undefined = [];
137
+ const mcpToolsMapping: Record<string, McpServerParams> = {};
138
+ if (req.body.tools) {
139
+ await Promise.all(
140
+ req.body.tools.map(async (tool) => {
141
+ switch (tool.type) {
142
+ case "function":
143
+ tools?.push({
144
+ type: tool.type,
145
+ function: {
146
+ name: tool.name,
147
+ parameters: tool.parameters,
148
+ description: tool.description,
149
+ strict: tool.strict,
150
+ },
151
+ });
152
+ break;
153
+ case "mcp": {
154
+ let mcpListTools: ResponseOutputItem.McpListTools | undefined;
155
+
156
+ // If MCP list tools is already in the input, use it
157
+ if (Array.isArray(req.body.input)) {
158
+ for (const item of req.body.input) {
159
+ if (item.type === "mcp_list_tools" && item.server_label === tool.server_label) {
160
+ mcpListTools = item;
161
+ console.debug(`Using MCP list tools from input for server '${tool.server_label}'`);
162
+ break;
163
+ }
164
+ }
165
+ }
166
+ // Otherwise, list tools from MCP server
167
+ if (!mcpListTools) {
168
+ try {
169
+ const mcp = await connectMcpServer(tool);
170
+ console.debug("Listing MCP tools from server");
171
+ const mcpTools = await mcp.listTools();
172
+ console.debug(`Fetched ${mcpTools.tools.length} tools from MCP server '${tool.server_label}'`);
173
+
174
+ // All tools are returned in Response object
175
+ mcpListTools = {
176
+ id: generateUniqueId("mcp_list_tools"),
177
+ type: "mcp_list_tools",
178
+ server_label: tool.server_label,
179
+ tools: mcpTools.tools.map((mcpTool) => ({
180
+ input_schema: mcpTool.inputSchema,
181
+ name: mcpTool.name,
182
+ annotations: mcpTool.annotations,
183
+ description: mcpTool.description,
184
+ })),
185
+ };
186
+ } catch (error) {
187
+ console.error("Error listing tools from MCP server", error);
188
+ mcpListTools = {
189
+ id: generateUniqueId("mcp_list_tools"),
190
+ type: "mcp_list_tools",
191
+ server_label: tool.server_label,
192
+ tools: [],
193
+ error: `Failed to list tools from MCP server '${tool.server_label}': ${error instanceof Error ? error.message : "Unknown error"}`,
194
+ };
195
+ }
196
+ output.push(mcpListTools);
197
+ }
198
+
199
+ // Only allowed tools are forwarded to the LLM
200
+ const allowedTools = tool.allowed_tools
201
+ ? Array.isArray(tool.allowed_tools)
202
+ ? tool.allowed_tools
203
+ : tool.allowed_tools.tool_names
204
+ : [];
205
+ if (mcpListTools?.tools) {
206
+ for (const mcpTool of mcpListTools.tools) {
207
+ if (allowedTools.length === 0 || allowedTools.includes(mcpTool.name)) {
208
+ tools?.push({
209
+ type: "function" as const,
210
+ function: {
211
+ name: mcpTool.name,
212
+ parameters: mcpTool.input_schema,
213
+ description: mcpTool.description ?? undefined,
214
+ },
215
+ });
216
+ }
217
+ mcpToolsMapping[mcpTool.name] = tool;
218
+ }
219
+ break;
220
+ }
221
+ }
222
+ }
223
+ })
224
+ );
225
+ }
226
+
227
+ if (tools.length === 0) {
228
+ tools = undefined;
229
+ }
230
+
231
+ const model = req.body.model.includes("@") ? req.body.model.split("@")[1] : req.body.model;
232
+ const provider = req.body.model.includes("@") ? req.body.model.split("@")[0] : undefined;
233
+
234
+ const payload: ChatCompletionInput = {
235
+ // main params
236
+ model: model,
237
+ provider: provider,
238
+ messages: messages,
239
+ stream: req.body.stream,
240
+ // options
241
+ max_tokens: req.body.max_output_tokens === null ? undefined : req.body.max_output_tokens,
242
+ response_format: req.body.text?.format
243
+ ? {
244
+ type: req.body.text.format.type,
245
+ json_schema:
246
+ req.body.text.format.type === "json_schema"
247
+ ? {
248
+ description: req.body.text.format.description,
249
+ name: req.body.text.format.name,
250
+ schema: req.body.text.format.schema,
251
+ strict: req.body.text.format.strict,
252
+ }
253
+ : undefined,
254
+ }
255
+ : undefined,
256
+ temperature: req.body.temperature,
257
+ tool_choice:
258
+ typeof req.body.tool_choice === "string"
259
+ ? req.body.tool_choice
260
+ : req.body.tool_choice
261
+ ? {
262
+ type: "function",
263
+ function: {
264
+ name: req.body.tool_choice.name,
265
+ },
266
+ }
267
+ : undefined,
268
+ tools,
269
+ top_p: req.body.top_p,
270
+ };
271
+
272
+ const responseObject: Omit<Response, "incomplete_details" | "output_text" | "parallel_tool_calls"> = {
273
+ created_at: Math.floor(new Date().getTime() / 1000),
274
+ error: null,
275
+ id: generateUniqueId("resp"),
276
+ instructions: req.body.instructions,
277
+ max_output_tokens: req.body.max_output_tokens,
278
+ metadata: req.body.metadata,
279
+ model: req.body.model,
280
+ object: "response",
281
+ output,
282
+ // parallel_tool_calls: req.body.parallel_tool_calls,
283
+ status: "in_progress",
284
+ text: req.body.text,
285
+ tool_choice: req.body.tool_choice ?? "auto",
286
+ tools: req.body.tools ?? [],
287
+ temperature: req.body.temperature,
288
+ top_p: req.body.top_p,
289
+ usage: {
290
+ input_tokens: 0,
291
+ input_tokens_details: { cached_tokens: 0 },
292
+ output_tokens: 0,
293
+ output_tokens_details: { reasoning_tokens: 0 },
294
+ total_tokens: 0,
295
+ },
296
+ };
297
+
298
+ // MCP approval requests => do not call LLM at all
299
+ if (Array.isArray(req.body.input)) {
300
+ for (const item of req.body.input) {
301
+ // Note: currently supporting only 1 mcp_approval_response per request
302
+ if (item.type === "mcp_approval_response" && item.approve) {
303
+ const approvalRequest = req.body.input.find(
304
+ (i) => i.type === "mcp_approval_request" && i.id === item.approval_request_id
305
+ ) as McpApprovalRequestParams | undefined;
306
+ console.log("approvalRequest", approvalRequest);
307
+ if (approvalRequest) {
308
+ const toolParams = mcpToolsMapping[approvalRequest.name];
309
+ responseObject.output.push(
310
+ await callMcpTool(toolParams, approvalRequest.name, toolParams.server_label, approvalRequest.arguments)
311
+ );
312
+ responseObject.status = "completed";
313
+ res.json(responseObject);
314
+ return;
315
+ } else {
316
+ responseObject.status = "failed";
317
+ const errorMessage = `MCP approval response for approval request '${item.approval_request_id}' not found`;
318
+ console.error(errorMessage);
319
+ responseObject.error = {
320
+ code: "server_error",
321
+ message: errorMessage,
322
+ };
323
+ res.json(responseObject);
324
+ return;
325
+ }
326
+ }
327
+ }
328
+ }
329
+
330
+ // Streaming mode
331
+ if (req.body.stream) {
332
+ res.setHeader("Content-Type", "text/event-stream");
333
+ res.setHeader("Connection", "keep-alive");
334
+ let sequenceNumber = 0;
335
+
336
+ // Emit events in sequence
337
+ const emitEvent = (event: ResponseStreamEvent) => {
338
+ res.write(`data: ${JSON.stringify(event)}\n\n`);
339
+ };
340
+
341
+ try {
342
+ // Response created event
343
+ emitEvent({
344
+ type: "response.created",
345
+ response: responseObject as Response,
346
+ sequence_number: sequenceNumber++,
347
+ });
348
+
349
+ // Response in progress event
350
+ emitEvent({
351
+ type: "response.in_progress",
352
+ response: responseObject as Response,
353
+ sequence_number: sequenceNumber++,
354
+ });
355
+
356
+ const stream = client.chatCompletionStream(payload);
357
+ let usage: ChatCompletionStreamOutputUsage | undefined;
358
+
359
+ for await (const chunk of stream) {
360
+ if (chunk.usage) {
361
+ usage = chunk.usage;
362
+ }
363
+
364
+ if (chunk.choices[0].delta.content) {
365
+ if (responseObject.output.length === 0) {
366
+ const outputObject: ResponseOutputMessage = {
367
+ id: generateUniqueId("msg"),
368
+ type: "message",
369
+ role: "assistant",
370
+ status: "in_progress",
371
+ content: [],
372
+ };
373
+ responseObject.output = [outputObject];
374
+
375
+ // Response output item added event
376
+ emitEvent({
377
+ type: "response.output_item.added",
378
+ output_index: 0,
379
+ item: outputObject,
380
+ sequence_number: sequenceNumber++,
381
+ });
382
+ }
383
+
384
+ const outputObject = responseObject.output.at(-1);
385
+ if (!outputObject || outputObject.type !== "message") {
386
+ throw new StreamingError("Not implemented: only single output item type is supported in streaming mode.");
387
+ }
388
+
389
+ if (outputObject.content.length === 0) {
390
+ // Response content part added event
391
+ const contentPart: ResponseContentPartAddedEvent["part"] = {
392
+ type: "output_text",
393
+ text: "",
394
+ annotations: [],
395
+ };
396
+ outputObject.content.push(contentPart);
397
+
398
+ emitEvent({
399
+ type: "response.content_part.added",
400
+ item_id: outputObject.id,
401
+ output_index: 0,
402
+ content_index: 0,
403
+ part: contentPart,
404
+ sequence_number: sequenceNumber++,
405
+ });
406
+ }
407
+
408
+ const contentPart = outputObject.content.at(-1);
409
+ if (!contentPart || contentPart.type !== "output_text") {
410
+ throw new StreamingError("Not implemented: only output_text is supported in streaming mode.");
411
+ }
412
+
413
+ // Add text delta
414
+ contentPart.text += chunk.choices[0].delta.content;
415
+ emitEvent({
416
+ type: "response.output_text.delta",
417
+ item_id: outputObject.id,
418
+ output_index: 0,
419
+ content_index: 0,
420
+ delta: chunk.choices[0].delta.content,
421
+ sequence_number: sequenceNumber++,
422
+ });
423
+ } else if (chunk.choices[0].delta.tool_calls && chunk.choices[0].delta.tool_calls.length > 0) {
424
+ if (chunk.choices[0].delta.tool_calls.length > 1) {
425
+ throw new StreamingError("Not implemented: only single tool call is supported in streaming mode.");
426
+ }
427
+
428
+ if (responseObject.output.length === 0) {
429
+ if (!chunk.choices[0].delta.tool_calls[0].function.name) {
430
+ throw new StreamingError("Tool call function name is required.");
431
+ }
432
+
433
+ const outputObject: ResponseFunctionToolCall = {
434
+ type: "function_call",
435
+ id: generateUniqueId("fc"),
436
+ call_id: chunk.choices[0].delta.tool_calls[0].id,
437
+ name: chunk.choices[0].delta.tool_calls[0].function.name,
438
+ arguments: "",
439
+ };
440
+ responseObject.output = [outputObject];
441
+
442
+ // Response output item added event
443
+ emitEvent({
444
+ type: "response.output_item.added",
445
+ output_index: 0,
446
+ item: outputObject,
447
+ sequence_number: sequenceNumber++,
448
+ });
449
+ }
450
+
451
+ const outputObject = responseObject.output.at(-1);
452
+ if (!outputObject || !outputObject.id || outputObject.type !== "function_call") {
453
+ throw new StreamingError("Not implemented: can only support single output item type in streaming mode.");
454
+ }
455
+
456
+ outputObject.arguments += chunk.choices[0].delta.tool_calls[0].function.arguments;
457
+ emitEvent({
458
+ type: "response.function_call_arguments.delta",
459
+ item_id: outputObject.id,
460
+ output_index: 0,
461
+ delta: chunk.choices[0].delta.tool_calls[0].function.arguments,
462
+ sequence_number: sequenceNumber++,
463
+ });
464
+ }
465
+ }
466
+
467
+ const lastOutputItem = responseObject.output.at(-1);
468
+
469
+ if (lastOutputItem) {
470
+ if (lastOutputItem?.type === "message") {
471
+ const contentPart = lastOutputItem.content.at(-1);
472
+ if (contentPart?.type === "output_text") {
473
+ emitEvent({
474
+ type: "response.output_text.done",
475
+ item_id: lastOutputItem.id,
476
+ output_index: responseObject.output.length - 1,
477
+ content_index: lastOutputItem.content.length - 1,
478
+ text: contentPart.text,
479
+ sequence_number: sequenceNumber++,
480
+ });
481
+
482
+ emitEvent({
483
+ type: "response.content_part.done",
484
+ item_id: lastOutputItem.id,
485
+ output_index: responseObject.output.length - 1,
486
+ content_index: lastOutputItem.content.length - 1,
487
+ part: contentPart,
488
+ sequence_number: sequenceNumber++,
489
+ });
490
+ } else {
491
+ throw new StreamingError("Not implemented: only output_text is supported in streaming mode.");
492
+ }
493
+
494
+ // Response output item done event
495
+ lastOutputItem.status = "completed";
496
+ emitEvent({
497
+ type: "response.output_item.done",
498
+ output_index: responseObject.output.length - 1,
499
+ item: lastOutputItem,
500
+ sequence_number: sequenceNumber++,
501
+ });
502
+ } else if (lastOutputItem?.type === "function_call") {
503
+ if (!lastOutputItem.id) {
504
+ throw new StreamingError("Function call id is required.");
505
+ }
506
+
507
+ emitEvent({
508
+ type: "response.function_call_arguments.done",
509
+ item_id: lastOutputItem.id,
510
+ output_index: responseObject.output.length - 1,
511
+ arguments: lastOutputItem.arguments,
512
+ sequence_number: sequenceNumber++,
513
+ });
514
+
515
+ lastOutputItem.status = "completed";
516
+ emitEvent({
517
+ type: "response.output_item.done",
518
+ output_index: responseObject.output.length - 1,
519
+ item: lastOutputItem,
520
+ sequence_number: sequenceNumber++,
521
+ });
522
+ } else {
523
+ throw new StreamingError("Not implemented: only message output is supported in streaming mode.");
524
+ }
525
+ }
526
+
527
+ // Response completed event
528
+ responseObject.status = "completed";
529
+ if (usage) {
530
+ responseObject.usage = {
531
+ input_tokens: usage.prompt_tokens,
532
+ input_tokens_details: { cached_tokens: 0 },
533
+ output_tokens: usage.completion_tokens,
534
+ output_tokens_details: { reasoning_tokens: 0 },
535
+ total_tokens: usage.total_tokens,
536
+ };
537
+ }
538
+ emitEvent({
539
+ type: "response.completed",
540
+ response: responseObject as Response,
541
+ sequence_number: sequenceNumber++,
542
+ });
543
+ } catch (streamError) {
544
+ console.error("Error in streaming chat completion:", streamError);
545
+
546
+ let message = "An error occurred while streaming from inference server.";
547
+ if (streamError instanceof StreamingError) {
548
+ message = streamError.message;
549
+ } else if (
550
+ typeof streamError === "object" &&
551
+ streamError &&
552
+ "message" in streamError &&
553
+ typeof streamError.message === "string"
554
+ ) {
555
+ message = streamError.message;
556
+ }
557
+ responseObject.status = "failed";
558
+ responseObject.error = {
559
+ code: "server_error",
560
+ message,
561
+ };
562
+ emitEvent({
563
+ type: "response.failed",
564
+ response: responseObject as Response,
565
+ sequence_number: sequenceNumber++,
566
+ });
567
+ }
568
+ res.end();
569
+ return;
570
+ }
571
+
572
+ try {
573
+ const chatCompletionResponse = await client.chatCompletion(payload);
574
+
575
+ responseObject.status = "completed";
576
+ for (const choice of chatCompletionResponse.choices) {
577
+ if (choice.message.content) {
578
+ responseObject.output.push({
579
+ id: generateUniqueId("msg"),
580
+ type: "message",
581
+ role: "assistant",
582
+ status: "completed",
583
+ content: [
584
+ {
585
+ type: "output_text",
586
+ text: choice.message.content,
587
+ annotations: [],
588
+ },
589
+ ],
590
+ });
591
+ }
592
+ if (choice.message.tool_calls) {
593
+ for (const toolCall of choice.message.tool_calls) {
594
+ if (toolCall.function.name in mcpToolsMapping) {
595
+ const toolParams = mcpToolsMapping[toolCall.function.name];
596
+
597
+ // Check if approval is required
598
+ const approvalRequired =
599
+ toolParams.require_approval === "always"
600
+ ? true
601
+ : toolParams.require_approval === "never"
602
+ ? false
603
+ : toolParams.require_approval.always?.tool_names?.includes(toolCall.function.name)
604
+ ? true
605
+ : toolParams.require_approval.never?.tool_names?.includes(toolCall.function.name)
606
+ ? false
607
+ : true; // behavior is undefined in specs, let's default to
608
+
609
+ if (approvalRequired) {
610
+ // TODO: Implement approval logic
611
+ console.log(`Requesting approval for MCP tool '${toolCall.function.name}'`);
612
+ responseObject.output.push({
613
+ type: "mcp_approval_request",
614
+ id: generateUniqueId("mcp_approval_request"),
615
+ name: toolCall.function.name,
616
+ server_label: toolParams.server_label,
617
+ arguments: toolCall.function.arguments,
618
+ });
619
+ } else {
620
+ responseObject.output.push(
621
+ await callMcpTool(
622
+ toolParams,
623
+ toolCall.function.name,
624
+ toolParams.server_label,
625
+ toolCall.function.arguments
626
+ )
627
+ );
628
+ }
629
+ } else {
630
+ responseObject.output.push({
631
+ type: "function_call",
632
+ id: generateUniqueId("fc"),
633
+ call_id: toolCall.id,
634
+ name: toolCall.function.name,
635
+ arguments: toolCall.function.arguments,
636
+ status: "completed",
637
+ });
638
+ }
639
+ }
640
+ }
641
+ }
642
+
643
+ responseObject.usage = {
644
+ input_tokens: chatCompletionResponse.usage.prompt_tokens,
645
+ input_tokens_details: { cached_tokens: 0 },
646
+ output_tokens: chatCompletionResponse.usage.completion_tokens,
647
+ output_tokens_details: { reasoning_tokens: 0 },
648
+ total_tokens: chatCompletionResponse.usage.total_tokens,
649
+ };
650
+
651
+ res.json(responseObject);
652
+ } catch (error) {
653
+ console.error(error);
654
+ res.status(500).json({
655
+ success: false,
656
+ error: error instanceof Error ? error.message : "Unknown error",
657
+ });
658
+ }
659
+ };
src/schemas.ts ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { z } from "zod";
2
+
3
+ /**
4
+ * https://platform.openai.com/docs/api-reference/responses/create
5
+ * commented out properties are not supported by the server
6
+ */
7
+
8
+ const inputContentSchema = z.array(
9
+ z.union([
10
+ z.object({
11
+ type: z.literal("input_text"),
12
+ text: z.string(),
13
+ }),
14
+ z.object({
15
+ type: z.literal("input_image"),
16
+ // file_id: z.string().nullable().default(null),
17
+ image_url: z.string(),
18
+ // detail: z.enum(["auto", "low", "high"]).default("auto"),
19
+ }),
20
+ // z.object({
21
+ // type: z.literal("input_file"),
22
+ // file_data: z.string().nullable().default(null),
23
+ // file_id: z.string().nullable().default(null),
24
+ // filename: z.string().nullable().default(null),
25
+ // }),
26
+ ])
27
+ );
28
+
29
+ const mcpServerParamsSchema = z.object({
30
+ server_label: z.string(),
31
+ server_url: z.string(),
32
+ type: z.literal("mcp"),
33
+ allowed_tools: z
34
+ .union([
35
+ z.array(z.string()),
36
+ z.object({
37
+ tool_names: z.array(z.string()),
38
+ }),
39
+ ])
40
+ .nullable()
41
+ .default(null),
42
+ headers: z.record(z.string()).nullable().default(null),
43
+ require_approval: z
44
+ .union([
45
+ z.enum(["always", "never"]),
46
+ z.object({
47
+ always: z.object({ tool_names: z.array(z.string()).optional() }).optional(),
48
+ never: z.object({ tool_names: z.array(z.string()).optional() }).optional(),
49
+ }),
50
+ ])
51
+ .default("always"),
52
+ });
53
+
54
+ const mcpApprovalRequestParamsSchema = z.object({
55
+ type: z.literal("mcp_approval_request"),
56
+ id: z.string(),
57
+ server_label: z.string(),
58
+ name: z.string(),
59
+ arguments: z.string(),
60
+ });
61
+ const mcpApprovalResponseParamsSchema = z.object({
62
+ type: z.literal("mcp_approval_response"),
63
+ id: z.string().optional(),
64
+ approval_request_id: z.string(),
65
+ approve: z.boolean(),
66
+ reason: z.string().optional(),
67
+ });
68
+
69
+ export const createResponseParamsSchema = z.object({
70
+ // background: z.boolean().default(false),
71
+ // include:
72
+ input: z.union([
73
+ z.string(),
74
+ z.array(
75
+ z.union([
76
+ z.object({
77
+ content: z.union([z.string(), inputContentSchema]),
78
+ role: z.enum(["user", "assistant", "system", "developer"]),
79
+ type: z.enum(["message"]).default("message"),
80
+ }),
81
+ z.object({
82
+ role: z.enum(["user", "system", "developer"]),
83
+ status: z.enum(["in_progress", "completed", "incomplete"]).nullable().default(null),
84
+ content: inputContentSchema,
85
+ type: z.enum(["message"]).default("message"),
86
+ }),
87
+ z.object({
88
+ id: z.string().optional(),
89
+ role: z.enum(["assistant"]),
90
+ status: z.enum(["in_progress", "completed", "incomplete"]).optional(),
91
+ type: z.enum(["message"]).default("message"),
92
+ content: z.array(
93
+ z.union([
94
+ z.object({
95
+ type: z.literal("output_text"),
96
+ text: z.string(),
97
+ annotations: z.array(z.object({})).optional(), // TODO: incomplete
98
+ logprobs: z.array(z.object({})).optional(), // TODO: incomplete
99
+ }),
100
+ z.object({
101
+ type: z.literal("refusal"),
102
+ refusal: z.string(),
103
+ }),
104
+ // TODO: much more objects: File search tool call, Computer tool call, Computer tool call output, Web search tool call, Function tool call, Function tool call output, Reasoning, Image generation call, Code interpreter tool call, Local shell call, Local shell call output, MCP list tools, MCP approval request, MCP approval response, MCP tool call
105
+ ])
106
+ ),
107
+ }),
108
+ z.object({
109
+ type: z.literal("function_call"),
110
+ id: z.string().optional(),
111
+ call_id: z.string(),
112
+ name: z.string(),
113
+ arguments: z.string(),
114
+ status: z.enum(["in_progress", "completed", "incomplete"]).optional(),
115
+ }),
116
+ z.object({
117
+ call_id: z.string(),
118
+ output: z.string(),
119
+ type: z.literal("function_call_output"),
120
+ id: z.string().optional(),
121
+ status: z.enum(["in_progress", "completed", "incomplete"]),
122
+ }),
123
+ z.object({
124
+ type: z.literal("mcp_list_tools"),
125
+ id: z.string(),
126
+ server_label: z.string(),
127
+ tools: z.array(
128
+ z.object({
129
+ name: z.string(),
130
+ input_schema: z.record(z.any()),
131
+ description: z.string().nullable().optional(),
132
+ annotations: z.object({}).optional(),
133
+ })
134
+ ),
135
+ error: z.string().nullable().optional(),
136
+ }),
137
+ mcpApprovalRequestParamsSchema,
138
+ mcpApprovalResponseParamsSchema,
139
+ ])
140
+ ),
141
+ ]),
142
+ instructions: z.string().nullable().default(null),
143
+ max_output_tokens: z.number().int().min(0).nullable().default(null),
144
+ // max_tool_calls: z.number().min(0).nullable().default(null),
145
+ metadata: z
146
+ .record(z.string().max(64), z.string().max(512))
147
+ .refine((val) => Object.keys(val).length <= 16, {
148
+ message: "Must have at most 16 items",
149
+ })
150
+ .nullable()
151
+ .default(null),
152
+ model: z.string(),
153
+ // parallel_tool_calls: z.boolean().default(true), // TODO: how to handle this if chat completion doesn't?
154
+ // previous_response_id: z.string().nullable().default(null),
155
+ // reasoning: z.object({
156
+ // effort: z.enum(["low", "medium", "high"]).default("medium"),
157
+ // summary: z.enum(["auto", "concise", "detailed"]).nullable().default(null),
158
+ // }),
159
+ // store: z.boolean().default(true),
160
+ stream: z.boolean().default(false),
161
+ temperature: z.number().min(0).max(2).default(1),
162
+ text: z
163
+ .object({
164
+ format: z.union([
165
+ z.object({
166
+ type: z.literal("text"),
167
+ }),
168
+ z.object({
169
+ type: z.literal("json_object"),
170
+ }),
171
+ z.object({
172
+ type: z.literal("json_schema"),
173
+ name: z
174
+ .string()
175
+ .max(64, "Must be at most 64 characters")
176
+ .regex(/^[a-zA-Z0-9_-]+$/, "Only letters, numbers, underscores, and dashes are allowed"),
177
+ description: z.string().optional(),
178
+ schema: z.record(z.any()),
179
+ strict: z.boolean().default(false),
180
+ }),
181
+ ]),
182
+ })
183
+ .optional(),
184
+ tool_choice: z
185
+ .union([
186
+ z.enum(["auto", "none", "required"]),
187
+ z.object({
188
+ type: z.enum(["function"]),
189
+ name: z.string(),
190
+ }),
191
+ // TODO: also hosted tool and MCP tool
192
+ ])
193
+ .optional(),
194
+ tools: z
195
+ .array(
196
+ z.union([
197
+ z.object({
198
+ name: z.string(),
199
+ parameters: z.record(z.any()),
200
+ strict: z.boolean().default(true),
201
+ type: z.enum(["function"]),
202
+ description: z.string().optional(),
203
+ }),
204
+ mcpServerParamsSchema,
205
+ ])
206
+ )
207
+ .optional(),
208
+ // top_logprobs: z.number().min(0).max(20).nullable().default(null),
209
+ top_p: z.number().min(0).max(1).default(1),
210
+ // truncation: z.enum(["auto", "disabled"]).default("disabled"),
211
+ // user
212
+ });
213
+
214
+ export type CreateResponseParams = z.infer<typeof createResponseParamsSchema>;
215
+ export type McpServerParams = z.infer<typeof mcpServerParamsSchema>;
216
+ export type McpApprovalRequestParams = z.infer<typeof mcpApprovalRequestParamsSchema>;
217
+ export type McpApprovalResponseParams = z.infer<typeof mcpApprovalResponseParamsSchema>;
src/server.ts ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import express, { type Express } from "express";
2
+ import { createResponseParamsSchema } from "./schemas.js";
3
+ import { validateBody } from "./middleware/validation.js";
4
+ import { requestLogger } from "./middleware/logging.js";
5
+ import { getLandingPageHtml, postCreateResponse } from "./routes/index.js";
6
+
7
+ export const createApp = (): Express => {
8
+ const app: Express = express();
9
+
10
+ // Middleware
11
+ app.use(requestLogger());
12
+ app.use(express.json());
13
+
14
+ // Routes
15
+ app.get("/", getLandingPageHtml);
16
+
17
+ app.post("/v1/responses", validateBody(createResponseParamsSchema), postCreateResponse);
18
+
19
+ return app;
20
+ };
tsconfig.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "allowSyntheticDefaultImports": true,
4
+ "lib": ["ES2022", "DOM"],
5
+ "module": "CommonJS",
6
+ "moduleResolution": "node",
7
+ "target": "ES2022",
8
+ "forceConsistentCasingInFileNames": true,
9
+ "strict": true,
10
+ "noImplicitAny": true,
11
+ "strictNullChecks": true,
12
+ "skipLibCheck": true,
13
+ "noImplicitOverride": true,
14
+ "outDir": "./dist",
15
+ "declaration": true,
16
+ "declarationMap": true,
17
+ "resolveJsonModule": true
18
+ },
19
+ "include": ["src", "test"],
20
+ "exclude": ["dist"]
21
+ }