julep

Running

File size: 14,138 Bytes

import { serve } from "https://deno.land/[email protected]/http/server.ts";

// Julep API Base URL (fixed)
const JULEP_API_BASE = "https://api.julep.ai/api";

// Hardcoded list of models (Agent IDs in this context)
const HARDCODED_MODELS = [
  'mistral-large-2411', 'o1', 'text-embedding-3-large', 'vertex_ai/text-embedding-004',
  'claude-3.5-haiku', 'cerebras/llama-4-scout-17b-16e-instruct', 'llama-3.1-8b',
  'magnum-v4-72b', 'voyage-multilingual-2', 'claude-3-haiku', 'gpt-4o',
  'BAAI/bge-m3', 'openrouter/meta-llama/llama-4-maverick', 'openrouter/meta-llama/llama-4-scout',
  'claude-3.5-sonnet', 'hermes-3-llama-3.1-70b', 'claude-3.5-sonnet-20240620',
  'qwen-2.5-72b-instruct', 'l3.3-euryale-70b', 'gpt-4o-mini', 'cerebras/llama-3.3-70b',
  'o1-preview', 'gemini-1.5-pro-latest', 'l3.1-euryale-70b', 'claude-3-sonnet',
  'Alibaba-NLP/gte-large-en-v1.5', 'openrouter/meta-llama/llama-4-scout:free',
  'llama-3.1-70b', 'eva-qwen-2.5-72b', 'claude-3.5-sonnet-20241022', 'gemini-2.0-flash',
  'deepseek-chat', 'o1-mini', 'eva-llama-3.33-70b', 'gemini-2.5-pro-preview-03-25',
  'gemini-1.5-pro', 'gpt-4-turbo', 'openrouter/meta-llama/llama-4-maverick:free',
  'o3-mini', 'claude-3.7-sonnet', 'voyage-3', 'cerebras/llama-3.1-8b', 'claude-3-opus'
];

// Helper function to get Julep API Key from Authorization header
function getJulepApiKey(req: Request): string | null {
  const authHeader = req.headers.get("Authorization");
  if (authHeader && authHeader.startsWith("Bearer ")) {
    return authHeader.substring(7); // Extract the token after "Bearer "
  }
  return null;
}

// OpenAI Models endpoint handler (hardcoded)
async function handleModels(req: Request): Promise<Response> {
  const julepApiKey = getJulepApiKey(req);
  if (!julepApiKey) {
    return new Response("Unauthorized: Missing or invalid Authorization header", { status: 401 });
  }

  // Format hardcoded models into OpenAI models format
  const openaiModels = HARDCODED_MODELS.map((modelId) => ({
    id: modelId,
    object: "model",
    created: Math.floor(Date.now() / 1000), // Use current time for creation
    owned_by: "julep", // Or "openai" if you prefer
    permission: [
      {
        id: `modelperm-${modelId}`,
        object: "model_permission",
        created: Math.floor(Date.now() / 1000),
        allow_create_engine: false,
        allow_sampling: true,
        allow_logprobs: true,
        allow_search_indices: false,
        allow_view: true,
        allow_fine_tuning: false,
        organization: "*",
        group: null,
        is_blocking: false,
      },
    ],
    root: modelId,
    parent: null,
  }));

  return new Response(JSON.stringify({ data: openaiModels, object: "list" }), {
    headers: { "Content-Type": "application/json" },
    status: 200,
  });
}

// OpenAI Chat Completions endpoint handler
async function handleChatCompletions(req: Request): Promise<Response> {
  const julepApiKey = getJulepApiKey(req);
  if (!julepApiKey) {
    return new Response("Unauthorized: Missing or invalid Authorization header", { status: 401 });
  }

  const headers = {
    "Authorization": `Bearer ${julepApiKey}`,
    "Content-Type": "application/json",
  };

  let agentId: string | null = null; // Variable to store the created agent ID
  let sessionId: string | null = null; // Variable to store the created session ID

  try {
    const requestBody = await req.json();
    const { model, messages, stream, ...rest } = requestBody;

    if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
      return new Response("Invalid request body. 'model' and 'messages' are required.", { status: 400 });
    }

    // Check if the requested model is in our hardcoded list
    if (!HARDCODED_MODELS.includes(model)) {
       return new Response(`Invalid model: ${model}. Please use one of the available models.`, { status: 400 });
    }

    // 1. Create a new Agent for this request
    const createAgentUrl = `${JULEP_API_BASE}/agents`;
    const createAgentBody = {
      name: model, // Set agent name to the model value
      model: model, // Use the requested OpenAI model as the Julep Agent's model
      about: model, // Set agent about to the model value
      instructions: ["Follow user instructions carefully."], // Keep some default instructions
    };

    const createAgentResponse = await fetch(createAgentUrl, {
      method: "POST",
      headers,
      body: JSON.stringify(createAgentBody),
    });

    if (!createAgentResponse.ok) {
      const errorText = await createAgentResponse.text();
      console.error(`Error creating Julep Agent: ${createAgentResponse.status} - ${errorText}`);
      return new Response(`Error creating Julep Agent: ${createAgentResponse.statusText}`, { status: createAgentResponse.status });
    }

    const agentData = await createAgentResponse.json();
    agentId = agentData.id; // Store the agent ID

    // 2. Create a Session using the new Agent ID
    const createSessionUrl = `${JULEP_API_BASE}/sessions`;
    const createSessionBody = {
      agent: agentId, // Use the newly created Agent ID
      // You can add other Session creation parameters here if needed
    };

    const createSessionResponse = await fetch(createSessionUrl, {
      method: "POST",
      headers,
      body: JSON.stringify(createSessionBody),
    });

    if (!createSessionResponse.ok) {
      const errorText = await createSessionResponse.text();
      console.error(`Error creating Julep Session: ${createSessionResponse.status} - ${errorText}`);
      // Attempt to clean up the temporary agent
      if (agentId) {
         fetch(`${JULEP_API_BASE}/agents/${agentId}`, { method: "DELETE", headers }).catch(console.error);
      }
      return new Response(`Error creating Julep Session: ${createSessionResponse.statusText}`, { status: createSessionResponse.status });
    }

    const sessionData = await createSessionResponse.json();
    sessionId = sessionData.id; // Store the session ID

    // 3. Perform Chat Completion
    const chatUrl = `${JULEP_API_BASE}/sessions/${sessionId}/chat`;
    const chatBody = {
      messages: messages.map((msg: any) => ({
        role: msg.role,
        content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content), // Handle potential object content
        // Map other relevant fields if necessary
      })),
      stream: stream === true,
      ...rest, // Forward any other parameters from the OpenAI request
    };

    const chatResponse = await fetch(chatUrl, {
      method: "POST",
      headers,
      body: JSON.stringify(chatBody),
    });

    // 4. Handle Response and Clean Up
    if (!chatResponse.ok) {
      // If the chat request itself fails, read the error body and then clean up
      const errorText = await chatResponse.text();
      console.error(`Error during Julep Chat Completion: ${chatResponse.status} - ${errorText}`);
      // Attempt to clean up the temporary agent and session
      if (sessionId) {
         fetch(`${JULEP_API_BASE}/sessions/${sessionId}`, { method: "DELETE", headers }).catch(console.error);
      }
      if (agentId) {
         fetch(`${JULEP_API_BASE}/agents/${agentId}`, { method: "DELETE", headers }).catch(console.error);
      }
      return new Response(`Error during Julep Chat Completion: ${chatResponse.statusText} - ${errorText}`, { status: chatResponse.status });
    }

    if (stream) {
      // Handle streaming response (Server-Sent Events)
      // Pipe the Julep response body directly to the client response body
      // and add cleanup to the end of the stream.
      const readableStream = chatResponse.body!.pipeThrough(new TextDecoderStream()).pipeThrough(new TransformStream({
        transform(chunk, controller) {
          // Parse Julep streaming chunks and format as OpenAI SSE
          const lines = chunk.split('\n').filter(line => line.trim() !== '');
          for (const line of lines) {
            if (line.startsWith('data:')) {
              const data = JSON.parse(line.substring(5).trim());
              // Format the Julep chunk data into OpenAI SSE format
              const openaiChunk = {
                id: data.id,
                object: "chat.completion.chunk",
                created: Math.floor(new Date(data.created_at).getTime() / 1000),
                model: model, // Use the requested model ID
                choices: data.choices.map((choice: any) => ({
                  index: choice.index,
                  delta: {
                    role: choice.delta.role,
                    content: choice.delta.content,
                    tool_calls: choice.delta.tool_calls ? toolCallDeltaToOpenAI(choice.delta.tool_calls) : undefined,
                  },
                  finish_reason: choice.finish_reason,
                })),
              };
              controller.enqueue(`data: ${JSON.stringify(openaiChunk)}\n\n`);
            } else {
               // Pass through non-data lines like comments or empty lines if needed
               controller.enqueue(`${line}\n`);
            }
          }
        },
      }));

      // Attach cleanup to the end of the stream
      // We need to duplicate the stream to be able to pipe it to the client response
      // AND to a WritableStream for cleanup.
      const [stream1, stream2] = readableStream.tee();

      const cleanupPromise = new Promise<void>((resolve, reject) => {
          stream2.pipeTo(new WritableStream({
              close: () => {
                  if (sessionId) {
                     fetch(`${JULEP_API_BASE}/sessions/${sessionId}`, { method: "DELETE", headers }).catch(console.error);
                  }
                  if (agentId) {
                     fetch(`${JULEP_API_BASE}/agents/${agentId}`, { method: "DELETE", headers }).catch(console.error);
                  }
                  resolve();
              },
              abort: (reason) => {
                  console.error("Stream aborted:", reason);
                   if (sessionId) {
                     fetch(`${JULEP_API_BASE}/sessions/${sessionId}`, { method: "DELETE", headers }).catch(console.error);
                  }
                  if (agentId) {
                     fetch(`${JULEP_API_BASE}/agents/${agentId}`, { method: "DELETE", headers }).catch(console.error);
                  }
                  reject(reason);
              }
          })).catch(reject);
      });

      // Return the response with the first stream.
       return new Response(stream1, {
        headers: {
          "Content-Type": "text/event-stream",
          "Cache-Control": "no-cache",
          "Connection": "keep-alive",
        },
        status: 200,
      });

    } else {
      // Handle non-streaming response
      const julepChatData = await chatResponse.json();

      const openaiCompletion = {
        id: julepChatData.id,
        object: "chat.completion",
        created: Math.floor(new Date(julepChatData.created_at).getTime() / 1000),
        model: model, // Use the requested model ID
        choices: julepChatData.choices.map((choice: any) => ({
          index: choice.index,
          message: {
            role: choice.message.role,
            content: choice.message.content,
            tool_calls: choice.message.tool_calls ? toolCallMessageToOpenAI(choice.message.tool_calls) : undefined,
          },
          finish_reason: choice.finish_reason,
        })),
        usage: julepChatData.usage ? {
          prompt_tokens: julepChatData.usage.prompt_tokens,
          completion_tokens: julepChatData.usage.completion_tokens,
          total_tokens: julepChatData.usage.total_tokens,
        } : undefined,
      };

      // Attempt to clean up the temporary agent and session (fire and forget)
      if (sessionId) {
         fetch(`${JULEP_API_BASE}/sessions/${sessionId}`, { method: "DELETE", headers }).catch(console.error);
      }
      if (agentId) {
         fetch(`${JULEP_API_BASE}/agents/${agentId}`, { method: "DELETE", headers }).catch(console.error);
      }

      return new Response(JSON.stringify(openaiCompletion), {
        headers: { "Content-Type": "application/json" },
        status: 200,
      });
    }

  } catch (error) {
    console.error("Error handling chat completions request:", error);
    // Attempt to clean up in case of errors before session/agent creation
     if (sessionId) {
         fetch(`${JULEP_API_BASE}/sessions/${sessionId}`, { method: "DELETE", headers }).catch(console.error);
      }
      if (agentId) {
         fetch(`${Julep_API_BASE}/agents/${agentId}`, { method: "DELETE", headers }).catch(console.error);
      }
    return new Response("Internal Server Error", { status: 500 });
  }
}

// Helper to format Julep ToolCall delta to OpenAI format
function toolCallDeltaToOpenAI(julepToolCalls: any[]): any[] {
  return julepToolCalls.map(toolCall => {
    // Assuming Julep's delta format for tool_calls is similar to the message format
    // and contains function objects directly. Adjust if necessary.
    return {
      id: toolCall.id,
      type: "function",
      function: {
        name: toolCall.function?.name,
        arguments: toolCall.function?.arguments, // Arguments might be streamed as chunks
      },
    };
  });
}

// Helper to format Julep ToolCall message to OpenAI format
function toolCallMessageToOpenAI(julepToolCalls: any[]): any[] {
  return julepToolCalls.map(toolCall => {
    return {
      id: toolCall.id,
      type: "function",
      function: {
        name: toolCall.function?.name,
        arguments: toolCall.function?.arguments, // Arguments should be complete in non-streaming
      },
    };
  });
}

// Main request handler
async function handler(req: Request): Promise<Response> {
  const url = new URL(req.url);

  if (url.pathname === "/v1/models" && req.method === "GET") {
    return handleModels(req);
  } else if (url.pathname === "/v1/chat/completions" && req.method === "POST") {
    return handleChatCompletions(req);
  } else {
    return new Response("Not Found", { status: 404 });
  }
}

console.log(`HTTP server running on http://localhost:8000`);
serve(handler, { port: 7860 });