import { serve } from "https://deno.land/std@0.208.0/http/server.ts"; // Julep API Base URL (fixed) const JULEP_API_BASE = "https://api.julep.ai/api"; // Hardcoded list of models (Agent IDs in this context) const HARDCODED_MODELS = [ 'mistral-large-2411', 'o1', 'text-embedding-3-large', 'vertex_ai/text-embedding-004', 'claude-3.5-haiku', 'cerebras/llama-4-scout-17b-16e-instruct', 'llama-3.1-8b', 'magnum-v4-72b', 'voyage-multilingual-2', 'claude-3-haiku', 'gpt-4o', 'BAAI/bge-m3', 'openrouter/meta-llama/llama-4-maverick', 'openrouter/meta-llama/llama-4-scout', 'claude-3.5-sonnet', 'hermes-3-llama-3.1-70b', 'claude-3.5-sonnet-20240620', 'qwen-2.5-72b-instruct', 'l3.3-euryale-70b', 'gpt-4o-mini', 'cerebras/llama-3.3-70b', 'o1-preview', 'gemini-1.5-pro-latest', 'l3.1-euryale-70b', 'claude-3-sonnet', 'Alibaba-NLP/gte-large-en-v1.5', 'openrouter/meta-llama/llama-4-scout:free', 'llama-3.1-70b', 'eva-qwen-2.5-72b', 'claude-3.5-sonnet-20241022', 'gemini-2.0-flash', 'deepseek-chat', 'o1-mini', 'eva-llama-3.33-70b', 'gemini-2.5-pro-preview-03-25', 'gemini-1.5-pro', 'gpt-4-turbo', 'openrouter/meta-llama/llama-4-maverick:free', 'o3-mini', 'claude-3.7-sonnet', 'voyage-3', 'cerebras/llama-3.1-8b', 'claude-3-opus' ]; // Helper function to get Julep API Key from Authorization header function getJulepApiKey(req: Request): string | null { const authHeader = req.headers.get("Authorization"); if (authHeader && authHeader.startsWith("Bearer ")) { return authHeader.substring(7); // Extract the token after "Bearer " } return null; } // OpenAI Models endpoint handler (hardcoded) async function handleModels(req: Request): Promise { const julepApiKey = getJulepApiKey(req); if (!julepApiKey) { return new Response("Unauthorized: Missing or invalid Authorization header", { status: 401 }); } // Format hardcoded models into OpenAI models format const openaiModels = HARDCODED_MODELS.map((modelId) => ({ id: modelId, object: "model", created: Math.floor(Date.now() / 1000), // Use current time for creation owned_by: "julep", // Or "openai" if you prefer permission: [ { id: `modelperm-${modelId}`, object: "model_permission", created: Math.floor(Date.now() / 1000), allow_create_engine: false, allow_sampling: true, allow_logprobs: true, allow_search_indices: false, allow_view: true, allow_fine_tuning: false, organization: "*", group: null, is_blocking: false, }, ], root: modelId, parent: null, })); return new Response(JSON.stringify({ data: openaiModels, object: "list" }), { headers: { "Content-Type": "application/json" }, status: 200, }); } // OpenAI Chat Completions endpoint handler async function handleChatCompletions(req: Request): Promise { const julepApiKey = getJulepApiKey(req); if (!julepApiKey) { return new Response("Unauthorized: Missing or invalid Authorization header", { status: 401 }); } const headers = { "Authorization": `Bearer ${julepApiKey}`, "Content-Type": "application/json", }; let agentId: string | null = null; // Variable to store the created agent ID let sessionId: string | null = null; // Variable to store the created session ID try { const requestBody = await req.json(); const { model, messages, stream, ...rest } = requestBody; if (!model || !messages || !Array.isArray(messages) || messages.length === 0) { return new Response("Invalid request body. 'model' and 'messages' are required.", { status: 400 }); } // Check if the requested model is in our hardcoded list if (!HARDCODED_MODELS.includes(model)) { return new Response(`Invalid model: ${model}. Please use one of the available models.`, { status: 400 }); } // 1. Create a new Agent for this request const createAgentUrl = `${JULEP_API_BASE}/agents`; const createAgentBody = { name: model, // Set agent name to the model value model: model, // Use the requested OpenAI model as the Julep Agent's model about: model, // Set agent about to the model value instructions: ["Follow user instructions carefully."], // Keep some default instructions }; const createAgentResponse = await fetch(createAgentUrl, { method: "POST", headers, body: JSON.stringify(createAgentBody), }); if (!createAgentResponse.ok) { const errorText = await createAgentResponse.text(); console.error(`Error creating Julep Agent: ${createAgentResponse.status} - ${errorText}`); return new Response(`Error creating Julep Agent: ${createAgentResponse.statusText}`, { status: createAgentResponse.status }); } const agentData = await createAgentResponse.json(); agentId = agentData.id; // Store the agent ID // 2. Create a Session using the new Agent ID const createSessionUrl = `${JULEP_API_BASE}/sessions`; const createSessionBody = { agent: agentId, // Use the newly created Agent ID // You can add other Session creation parameters here if needed }; const createSessionResponse = await fetch(createSessionUrl, { method: "POST", headers, body: JSON.stringify(createSessionBody), }); if (!createSessionResponse.ok) { const errorText = await createSessionResponse.text(); console.error(`Error creating Julep Session: ${createSessionResponse.status} - ${errorText}`); // Attempt to clean up the temporary agent if (agentId) { fetch(`${JULEP_API_BASE}/agents/${agentId}`, { method: "DELETE", headers }).catch(console.error); } return new Response(`Error creating Julep Session: ${createSessionResponse.statusText}`, { status: createSessionResponse.status }); } const sessionData = await createSessionResponse.json(); sessionId = sessionData.id; // Store the session ID // 3. Perform Chat Completion const chatUrl = `${JULEP_API_BASE}/sessions/${sessionId}/chat`; const chatBody = { messages: messages.map((msg: any) => ({ role: msg.role, content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content), // Handle potential object content // Map other relevant fields if necessary })), stream: stream === true, ...rest, // Forward any other parameters from the OpenAI request }; const chatResponse = await fetch(chatUrl, { method: "POST", headers, body: JSON.stringify(chatBody), }); // 4. Handle Response and Clean Up if (!chatResponse.ok) { // If the chat request itself fails, read the error body and then clean up const errorText = await chatResponse.text(); console.error(`Error during Julep Chat Completion: ${chatResponse.status} - ${errorText}`); // Attempt to clean up the temporary agent and session if (sessionId) { fetch(`${JULEP_API_BASE}/sessions/${sessionId}`, { method: "DELETE", headers }).catch(console.error); } if (agentId) { fetch(`${JULEP_API_BASE}/agents/${agentId}`, { method: "DELETE", headers }).catch(console.error); } return new Response(`Error during Julep Chat Completion: ${chatResponse.statusText} - ${errorText}`, { status: chatResponse.status }); } if (stream) { // Handle streaming response (Server-Sent Events) // Pipe the Julep response body directly to the client response body // and add cleanup to the end of the stream. const readableStream = chatResponse.body!.pipeThrough(new TextDecoderStream()).pipeThrough(new TransformStream({ transform(chunk, controller) { // Parse Julep streaming chunks and format as OpenAI SSE const lines = chunk.split('\n').filter(line => line.trim() !== ''); for (const line of lines) { if (line.startsWith('data:')) { const data = JSON.parse(line.substring(5).trim()); // Format the Julep chunk data into OpenAI SSE format const openaiChunk = { id: data.id, object: "chat.completion.chunk", created: Math.floor(new Date(data.created_at).getTime() / 1000), model: model, // Use the requested model ID choices: data.choices.map((choice: any) => ({ index: choice.index, delta: { role: choice.delta.role, content: choice.delta.content, tool_calls: choice.delta.tool_calls ? toolCallDeltaToOpenAI(choice.delta.tool_calls) : undefined, }, finish_reason: choice.finish_reason, })), }; controller.enqueue(`data: ${JSON.stringify(openaiChunk)}\n\n`); } else { // Pass through non-data lines like comments or empty lines if needed controller.enqueue(`${line}\n`); } } }, })); // Attach cleanup to the end of the stream // We need to duplicate the stream to be able to pipe it to the client response // AND to a WritableStream for cleanup. const [stream1, stream2] = readableStream.tee(); const cleanupPromise = new Promise((resolve, reject) => { stream2.pipeTo(new WritableStream({ close: () => { if (sessionId) { fetch(`${JULEP_API_BASE}/sessions/${sessionId}`, { method: "DELETE", headers }).catch(console.error); } if (agentId) { fetch(`${JULEP_API_BASE}/agents/${agentId}`, { method: "DELETE", headers }).catch(console.error); } resolve(); }, abort: (reason) => { console.error("Stream aborted:", reason); if (sessionId) { fetch(`${JULEP_API_BASE}/sessions/${sessionId}`, { method: "DELETE", headers }).catch(console.error); } if (agentId) { fetch(`${JULEP_API_BASE}/agents/${agentId}`, { method: "DELETE", headers }).catch(console.error); } reject(reason); } })).catch(reject); }); // Return the response with the first stream. return new Response(stream1, { headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive", }, status: 200, }); } else { // Handle non-streaming response const julepChatData = await chatResponse.json(); const openaiCompletion = { id: julepChatData.id, object: "chat.completion", created: Math.floor(new Date(julepChatData.created_at).getTime() / 1000), model: model, // Use the requested model ID choices: julepChatData.choices.map((choice: any) => ({ index: choice.index, message: { role: choice.message.role, content: choice.message.content, tool_calls: choice.message.tool_calls ? toolCallMessageToOpenAI(choice.message.tool_calls) : undefined, }, finish_reason: choice.finish_reason, })), usage: julepChatData.usage ? { prompt_tokens: julepChatData.usage.prompt_tokens, completion_tokens: julepChatData.usage.completion_tokens, total_tokens: julepChatData.usage.total_tokens, } : undefined, }; // Attempt to clean up the temporary agent and session (fire and forget) if (sessionId) { fetch(`${JULEP_API_BASE}/sessions/${sessionId}`, { method: "DELETE", headers }).catch(console.error); } if (agentId) { fetch(`${JULEP_API_BASE}/agents/${agentId}`, { method: "DELETE", headers }).catch(console.error); } return new Response(JSON.stringify(openaiCompletion), { headers: { "Content-Type": "application/json" }, status: 200, }); } } catch (error) { console.error("Error handling chat completions request:", error); // Attempt to clean up in case of errors before session/agent creation if (sessionId) { fetch(`${JULEP_API_BASE}/sessions/${sessionId}`, { method: "DELETE", headers }).catch(console.error); } if (agentId) { fetch(`${Julep_API_BASE}/agents/${agentId}`, { method: "DELETE", headers }).catch(console.error); } return new Response("Internal Server Error", { status: 500 }); } } // Helper to format Julep ToolCall delta to OpenAI format function toolCallDeltaToOpenAI(julepToolCalls: any[]): any[] { return julepToolCalls.map(toolCall => { // Assuming Julep's delta format for tool_calls is similar to the message format // and contains function objects directly. Adjust if necessary. return { id: toolCall.id, type: "function", function: { name: toolCall.function?.name, arguments: toolCall.function?.arguments, // Arguments might be streamed as chunks }, }; }); } // Helper to format Julep ToolCall message to OpenAI format function toolCallMessageToOpenAI(julepToolCalls: any[]): any[] { return julepToolCalls.map(toolCall => { return { id: toolCall.id, type: "function", function: { name: toolCall.function?.name, arguments: toolCall.function?.arguments, // Arguments should be complete in non-streaming }, }; }); } // Main request handler async function handler(req: Request): Promise { const url = new URL(req.url); if (url.pathname === "/v1/models" && req.method === "GET") { return handleModels(req); } else if (url.pathname === "/v1/chat/completions" && req.method === "POST") { return handleChatCompletions(req); } else { return new Response("Not Found", { status: 404 }); } } console.log(`HTTP server running on http://localhost:8000`); serve(handler, { port: 7860 });