responses.js – OpenAI-compatible Responses API

OpenAI-compatible Responses API

responses.js is an open-source, lightweight server implementing OpenAI's Responses API, built on top of Chat Completions and powered by Hugging Face Inference Providers.

API Endpoint:

${baseUrl}

Get started by sending requests to this endpoint

View on GitHub

OpenAI-compatible
Stateless implementation of the Responses API

Inference Providers
Powered by Hugging Face Inference Providers

Multi-modal
Text and image input support

Streaming, & Structured Output
Supports streaming, JSON schema, and function calling

Examples

from openai import OpenAI
import os

client = OpenAI(
    base_url="${baseUrl}",
    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)

response = client.responses.create(
    model="Qwen/Qwen2.5-VL-7B-Instruct",
    instructions="You are a helpful assistant.",
    input="Tell me a three sentence bedtime story about a unicorn.",
)

print(response)
print(response.output_text)

from openai import OpenAI
import os

client = OpenAI(
    base_url="${baseUrl}",
    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)

response = client.responses.create(
    model="Qwen/Qwen2.5-VL-7B-Instruct",
    input=[
        {
            "role": "user",
            "content": [
                {"type": "input_text", "text": "what is in this image?"},
                {
                    "type": "input_image",
                    "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
                },
            ],
        }
    ],
)

print(response)
print(response.output_text)

from openai import OpenAI
import os

client = OpenAI(
    base_url="${baseUrl}",
    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)

response = client.responses.create(
    model="Qwen/Qwen2.5-VL-7B-Instruct",
    input=[
        {
            "role": "developer",
            "content": "Talk like a pirate.",
        },
        {
            "role": "user",
            "content": "Are semicolons optional in JavaScript?",
        },
    ],
)

print(response)
print(response.output_text)

from openai import OpenAI
import os

client = OpenAI(
    base_url="${baseUrl}",
    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)

stream = client.responses.create(
    model="Qwen/Qwen2.5-VL-7B-Instruct",
    input=[
        {
            "role": "user",
            "content": "Say 'double bubble bath' ten times fast.",
        },
    ],
    stream=True,
)

for event in stream:
    print(event)

from openai import OpenAI
import os

client = OpenAI(
    base_url="${baseUrl}",
    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)

tools = [
    {
        "type": "function",
        "name": "get_current_weather",
        "description": "Get the current weather in a given location",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"},
                "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
            },
            "required": ["location", "unit"],
        },
    }
]

response = client.responses.create(
    model="cerebras@meta-llama/Llama-3.3-70B-Instruct",
    tools=tools,
    input="What is the weather like in Boston today?",
    tool_choice="auto",
)

print(response)

from openai import OpenAI
from pydantic import BaseModel
import os

client = OpenAI(
    base_url="${baseUrl}",
    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)

class CalendarEvent(BaseModel):
    name: str
    date: str
    participants: list[str]

response = client.responses.parse(
    model="novita@meta-llama/Meta-Llama-3-70B-Instruct",
    input=[
        {"role": "system", "content": "Extract the event information."},
        {
            "role": "user",
            "content": "Alice and Bob are going to a science fair on Friday.",
        },
    ],
    text_format=CalendarEvent,
)

print(response.output_parsed)

from openai import OpenAI
import os

client = OpenAI(
    base_url="${baseUrl}",
    api_key=os.getenv("HF_TOKEN"), # visit https://huggingface.co/settings/tokens
)

response = client.responses.create(
    model="cerebras@meta-llama/Llama-3.3-70B-Instruct",
    input="how does tiktoken work?",
    tools=[
        {
            "type": "mcp",
            "server_label": "gitmcp",
            "server_url": "https://gitmcp.io/openai/tiktoken",
            "allowed_tools": ["search_tiktoken_documentation", "fetch_tiktoken_documentation"],
            "require_approval": "never",
        },
    ],
)

for output in response.output:
    print(output)