from fastapi import FastAPI from pydantic import BaseModel import tensorflow as tf from transformers import AutoTokenizer, TFAutoModel from bs4 import BeautifulSoup import hazm import time model_name="HooshvareLab/bert-base-parsbert-uncased" tokenizer=AutoTokenizer.from_pretrained(model_name) model=TFAutoModel.from_pretrained(model_name) sent_tokenizer=hazm.SentenceTokenizer() normalizer=hazm.Normalizer() app=FastAPI() class Input(BaseModel): texts: list @app.post("/get_vectors") def get_vecs(data: Input): now=time.time() texts=data.texts texts=list(map(lambda x: BeautifulSoup(x).get_text(), texts)) texts=list(map(normalizer.normalize, texts)) tokens=tokenizer(texts, return_tensors="tf", padding=True, truncation=True, max_length=512) outputs=model(**tokens) sentence_embedding=tf.reduce_mean(outputs.last_hidden_state, axis=1) vecs=sentence_embedding.numpy().tolist() return {"vectors": vecs, "duration": time.time()-now}