autou / app.py
joaocansi
feat: add bert and isoforest
357cf66
raw
history blame
957 Bytes
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
from sklearn.ensemble import IsolationForest
from tqdm import tqdm
import torch
import gradio as gr
import numpy as np
tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-base-portuguese-cased")
model = AutoModel.from_pretrained("neuralmind/bert-base-portuguese-cased")
model.eval()
data = np.load("X_test.npy")
iso_forest = IsolationForest(contamination=0.1, random_state=42)
iso_forest.fit(data)
def test_email(text):
with torch.no_grad():
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=256)
outputs = model(**inputs)
cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
pred = iso_forest.predict(cls_embedding)[0]
if pred == -1:
return "Anomaly detected"
else:
return "Normal"
demo = gr.Interface(fn=test_email, inputs="text", outputs="text")
demo.launch()