File size: 1,180 Bytes
27a346a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
"""
utils.py
"""
# Standard imports
import os
from typing import List
# Third party imports
import numpy as np
from openai import OpenAI
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
# Maximum tokens for text-embedding-3-large
MAX_TOKENS = 8191 # We don't have access to the tokenizer for text-embedding-3-large, and just assume 1 character = 1 token here
def get_embeddings(
texts: List[str], model: str = "text-embedding-3-large"
) -> List[List[float]]:
"""
Generate embeddings for a list of texts using OpenAI API synchronously.
Args:
texts: List of strings to embed.
model: OpenAI embedding model to use (default: text-embedding-3-large).
Returns:
A list of embeddings (each embedding is a list of floats).
Raises:
Exception: If the OpenAI API call fails.
"""
# Truncate texts to max token limit
truncated_texts = [text[:MAX_TOKENS] for text in texts]
# Make the API call
response = client.embeddings.create(input=truncated_texts, model=model)
# Extract embeddings from response
embeddings = np.array([data.embedding for data in response.data])
return embeddings
|