File size: 992 Bytes
c13d1c7 1e231f5 a2343c7 c13d1c7 1e231f5 c13d1c7 a2343c7 c13d1c7 1e231f5 a2343c7 1ebf8f1 1e231f5 a2343c7 1e231f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
from fastapi import FastAPI
from pydantic import BaseModel
import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModel
from bs4 import BeautifulSoup
import hazm
import time
model_name="HooshvareLab/bert-base-parsbert-uncased"
tokenizer=AutoTokenizer.from_pretrained(model_name)
model=TFAutoModel.from_pretrained(model_name)
sent_tokenizer=hazm.SentenceTokenizer()
normalizer=hazm.Normalizer()
app=FastAPI()
class Input(BaseModel):
texts: list
@app.post("/get_vectors")
def get_vecs(data: Input):
now=time.time()
texts=data.texts
texts=list(map(lambda x: BeautifulSoup(x).get_text(), texts))
texts=list(map(normalizer.normalize, texts))
tokens=tokenizer(texts, return_tensors="tf", padding=True, truncation=True, max_length=512)
outputs=model(**tokens)
sentence_embedding=tf.reduce_mean(outputs.last_hidden_state, axis=1)
vecs=sentence_embedding.numpy().tolist()
return {"vectors": vecs, "duration": time.time()-now} |