File size: 992 Bytes
c13d1c7
 
1e231f5
 
 
 
a2343c7
c13d1c7
1e231f5
 
 
 
 
 
 
 
c13d1c7
 
 
a2343c7
c13d1c7
1e231f5
 
a2343c7
1ebf8f1
1e231f5
 
 
 
 
 
 
 
a2343c7
1e231f5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from fastapi import FastAPI
from pydantic import BaseModel
import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModel
from bs4 import BeautifulSoup
import hazm
import time

model_name="HooshvareLab/bert-base-parsbert-uncased"
tokenizer=AutoTokenizer.from_pretrained(model_name)
model=TFAutoModel.from_pretrained(model_name)

sent_tokenizer=hazm.SentenceTokenizer()
normalizer=hazm.Normalizer()


app=FastAPI()

class Input(BaseModel):
    texts: list

@app.post("/get_vectors")
def get_vecs(data: Input):
    now=time.time()
    texts=data.texts
    texts=list(map(lambda x: BeautifulSoup(x).get_text(), texts))
    texts=list(map(normalizer.normalize, texts))
    
    tokens=tokenizer(texts, return_tensors="tf", padding=True, truncation=True, max_length=512)
    outputs=model(**tokens)
    
    sentence_embedding=tf.reduce_mean(outputs.last_hidden_state, axis=1)
    vecs=sentence_embedding.numpy().tolist()
    
    return {"vectors": vecs, "duration": time.time()-now}