File size: 758 Bytes
84f505f a3e1970 84f505f 1e9ac73 84f505f 1e9ac73 84f505f 1e9ac73 a3e1970 84f505f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
import numpy as np
from onnxruntime import InferenceSession
from transformers import AutoTokenizer
import os
app = FastAPI()
# CORS setup
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# Load model
session = InferenceSession("model.onnx")
tokenizer = AutoTokenizer.from_pretrained("Xenova/multi-qa-mpnet-base-dot-v1")
@app.post("/predict")
async def predict(query: str):
inputs = tokenizer(query, return_tensors="np")
inputs = {k: v.astype(np.int64) for k, v in inputs.items()}
outputs = session.run(None, inputs)
embedding = outputs[0][0].tolist()
return {"embedding": embedding} |