joaocansi
commited on
Commit
·
4da7379
1
Parent(s):
2b87773
- app.py +15 -9
- embeddings.npy → x_train.npy +2 -2
app.py
CHANGED
@@ -1,19 +1,25 @@
|
|
1 |
-
import
|
2 |
-
|
3 |
-
from sentence_transformers import SentenceTransformer
|
4 |
from sklearn.ensemble import IsolationForest
|
5 |
from tqdm import tqdm
|
|
|
|
|
|
|
6 |
import numpy as np
|
7 |
|
8 |
-
|
9 |
-
|
|
|
10 |
|
11 |
-
|
12 |
-
iso_forest.
|
|
|
13 |
|
14 |
def classify_email(text):
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
17 |
return pred
|
18 |
|
19 |
demo = gr.Interface(fn=classify_email, inputs="text", outputs="number")
|
|
|
1 |
+
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
|
|
|
|
|
2 |
from sklearn.ensemble import IsolationForest
|
3 |
from tqdm import tqdm
|
4 |
+
|
5 |
+
import torch
|
6 |
+
import gradio as gr
|
7 |
import numpy as np
|
8 |
|
9 |
+
tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-base-portuguese-cased")
|
10 |
+
model = AutoModel.from_pretrained("neuralmind/bert-base-portuguese-cased")
|
11 |
+
model.eval()
|
12 |
|
13 |
+
data = np.load("x_train.npy")
|
14 |
+
iso_forest = IsolationForest(contamination=0.1, random_state=42, n_jobs=-1, max_samples=256)
|
15 |
+
iso_forest.fit(data)
|
16 |
|
17 |
def classify_email(text):
|
18 |
+
with torch.no_grad():
|
19 |
+
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=256)
|
20 |
+
outputs = model(**inputs)
|
21 |
+
cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
|
22 |
+
pred = iso_forest.predict(cls_embedding)[0]
|
23 |
return pred
|
24 |
|
25 |
demo = gr.Interface(fn=classify_email, inputs="text", outputs="number")
|
embeddings.npy → x_train.npy
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e72b681a0cee50f5ad491a23399302dc384fc7cdbe637c26337257dc959c98c
|
3 |
+
size 11520128
|