Spaces:
Sleeping
Sleeping
File size: 1,361 Bytes
8980288 9d07138 8980288 9d07138 8980288 46f6ce5 9d07138 8980288 9d07138 8980288 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModel
import torch
app = FastAPI(
title="OpenAI-compatible Embedding API",
version="1.0.0",
)
# Load model from Hugging Face Hub
MODEL_NAME = "BAAI/bge-small-en-v1.5"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME)
model.eval()
class EmbeddingRequest(BaseModel):
input: list[str]
@app.get("/")
def root():
return {"message": "API is working"}
@app.post("/embeddings")
def create_embeddings(request: EmbeddingRequest):
with torch.no_grad():
tokens = tokenizer(request.input, return_tensors="pt", padding=True, truncation=True)
output = model(**tokens)
cls_embeddings = output.last_hidden_state[:, 0]
norm_embeddings = torch.nn.functional.normalize(cls_embeddings, p=2, dim=1)
data = [
{
"object": "embedding",
"embedding": e.tolist(),
"index": i
}
for i, e in enumerate(norm_embeddings)
]
return {
"object": "list",
"data": data,
"model": MODEL_NAME,
"usage": {
"prompt_tokens": sum(len(tokenizer.encode(x)) for x in request.input),
"total_tokens": sum(len(tokenizer.encode(x)) for x in request.input),
}
} |