File size: 957 Bytes
357cf66
 
 
 
 
b9cf3e2
357cf66
 
 
 
 
 
 
 
 
b9cf3e2
357cf66
 
 
 
 
 
 
 
 
 
b9cf3e2
357cf66
b9cf3e2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
from sklearn.ensemble import IsolationForest
from tqdm import tqdm

import torch
import gradio as gr
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-base-portuguese-cased")
model = AutoModel.from_pretrained("neuralmind/bert-base-portuguese-cased")
model.eval()

data = np.load("X_test.npy")
iso_forest = IsolationForest(contamination=0.1, random_state=42)
iso_forest.fit(data)

def test_email(text):
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=256)
        outputs = model(**inputs)
        cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
    pred = iso_forest.predict(cls_embedding)[0]
    if pred == -1:
        return "Anomaly detected"
    else:
        return "Normal"

demo = gr.Interface(fn=test_email, inputs="text", outputs="text")
demo.launch()