embedding-model / app.py
snsynth's picture
remove hf ref
e4f0aa5
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModel
import torch
app = FastAPI(
title="OpenAI-compatible Embedding API",
version="1.0.0",
)
# Load model from Hugging Face Hub
MODEL_NAME = "BAAI/bge-small-en-v1.5"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME)
model.eval()
class EmbeddingRequest(BaseModel):
input: list[str]
@app.get("/")
def root():
return {"message": "API is working"}
@app.post("/embeddings")
def create_embeddings(request: EmbeddingRequest):
with torch.no_grad():
tokens = tokenizer(request.input, return_tensors="pt", padding=True, truncation=True)
output = model(**tokens)
cls_embeddings = output.last_hidden_state[:, 0]
norm_embeddings = torch.nn.functional.normalize(cls_embeddings, p=2, dim=1)
data = [
{
"object": "embedding",
"embedding": e.tolist(),
"index": i
}
for i, e in enumerate(norm_embeddings)
]
return {
"object": "list",
"data": data,
"model": MODEL_NAME,
"usage": {
"prompt_tokens": sum(len(tokenizer.encode(x)) for x in request.input),
"total_tokens": sum(len(tokenizer.encode(x)) for x in request.input),
}
}