|
from fastapi import FastAPI |
|
from pydantic import BaseModel |
|
import tensorflow as tf |
|
from transformers import AutoTokenizer, TFAutoModel |
|
from bs4 import BeautifulSoup |
|
import hazm |
|
import time |
|
|
|
model_name="HooshvareLab/bert-base-parsbert-uncased" |
|
tokenizer=AutoTokenizer.from_pretrained(model_name) |
|
model=TFAutoModel.from_pretrained(model_name) |
|
|
|
sent_tokenizer=hazm.SentenceTokenizer() |
|
normalizer=hazm.Normalizer() |
|
|
|
|
|
app=FastAPI() |
|
|
|
class Input(BaseModel): |
|
texts: list |
|
|
|
@app.post("/get_vectors") |
|
def get_vecs(data: Input): |
|
now=time.time() |
|
texts=data.texts |
|
texts=list(map(lambda x: BeautifulSoup(x).get_text(), texts)) |
|
texts=list(map(normalizer.normalize, texts)) |
|
|
|
tokens=tokenizer(texts, return_tensors="tf", padding=True, truncation=True, max_length=512) |
|
outputs=model(**tokens) |
|
|
|
sentence_embedding=tf.reduce_mean(outputs.last_hidden_state, axis=1) |
|
vecs=sentence_embedding.numpy().tolist() |
|
|
|
return {"vectors": vecs, "duration": time.time()-now} |