joaocansi commited on
Commit
4da7379
·
1 Parent(s): 2b87773
Files changed (2) hide show
  1. app.py +15 -9
  2. embeddings.npy → x_train.npy +2 -2
app.py CHANGED
@@ -1,19 +1,25 @@
1
- import gradio as gr
2
-
3
- from sentence_transformers import SentenceTransformer
4
  from sklearn.ensemble import IsolationForest
5
  from tqdm import tqdm
 
 
 
6
  import numpy as np
7
 
8
- model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
9
- embeddings = np.load("embeddings.npy")
 
10
 
11
- iso_forest = IsolationForest(contamination=0.1, random_state=42)
12
- iso_forest.fit(embeddings)
 
13
 
14
  def classify_email(text):
15
- emb = model.encode([text])
16
- pred = iso_forest.predict(emb)[0]
 
 
 
17
  return pred
18
 
19
  demo = gr.Interface(fn=classify_email, inputs="text", outputs="number")
 
1
+ from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
 
 
2
  from sklearn.ensemble import IsolationForest
3
  from tqdm import tqdm
4
+
5
+ import torch
6
+ import gradio as gr
7
  import numpy as np
8
 
9
+ tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-base-portuguese-cased")
10
+ model = AutoModel.from_pretrained("neuralmind/bert-base-portuguese-cased")
11
+ model.eval()
12
 
13
+ data = np.load("x_train.npy")
14
+ iso_forest = IsolationForest(contamination=0.1, random_state=42, n_jobs=-1, max_samples=256)
15
+ iso_forest.fit(data)
16
 
17
  def classify_email(text):
18
+ with torch.no_grad():
19
+ inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=256)
20
+ outputs = model(**inputs)
21
+ cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
22
+ pred = iso_forest.predict(cls_embedding)[0]
23
  return pred
24
 
25
  demo = gr.Interface(fn=classify_email, inputs="text", outputs="number")
embeddings.npy → x_train.npy RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1971a59671a0c4546b2560c5832eba022664f1750f03589302bbd4a15d439138
3
- size 10752128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e72b681a0cee50f5ad491a23399302dc384fc7cdbe637c26337257dc959c98c
3
+ size 11520128