aidpc commited on
Commit
1e231f5
·
verified ·
1 Parent(s): 5b020f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -4
app.py CHANGED
@@ -1,15 +1,34 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
 
 
 
 
3
  import time
4
 
 
 
 
 
 
 
 
 
5
  app=FastAPI()
6
 
7
  class Input(BaseModel):
8
  texts: list
9
 
10
- @app.post("/hello")
11
- def say_hello(data: Input):
12
  now=time.time()
13
- output=list(map(len, data.texts))
 
 
 
 
 
 
 
14
 
15
- return {"data": output, "duration": time.time()-now}
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
+ import tensorflow as tf
4
+ from transformers import AutoTokenizer, TFAutoModel
5
+ from bs4 import BeautifulSoup
6
+ import hazm
7
  import time
8
 
9
+ model_name="HooshvareLab/bert-base-parsbert-uncased"
10
+ tokenizer=AutoTokenizer.from_pretrained(model_name)
11
+ model=TFAutoModel.from_pretrained(model_name)
12
+
13
+ sent_tokenizer=hazm.SentenceTokenizer()
14
+ normalizer=hazm.Normalizer()
15
+
16
+
17
  app=FastAPI()
18
 
19
  class Input(BaseModel):
20
  texts: list
21
 
22
+ @app.post("/get_vectors")
23
+ def get_vecs(data: Input):
24
  now=time.time()
25
+ texts=list(map(lambda x: BeautifulSoup(x).get_text(), texts))
26
+ texts=list(map(normalizer.normalize, texts))
27
+
28
+ tokens=tokenizer(texts, return_tensors="tf", padding=True, truncation=True, max_length=512)
29
+ outputs=model(**tokens)
30
+
31
+ sentence_embedding=tf.reduce_mean(outputs.last_hidden_state, axis=1)
32
+ vecs=sentence_embedding.numpy().tolist()
33
 
34
+ return {"vectors": vecs, "duration": time.time()-now}