|
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel |
|
from sklearn.ensemble import IsolationForest |
|
from tqdm import tqdm |
|
|
|
import torch |
|
import gradio as gr |
|
import numpy as np |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-base-portuguese-cased") |
|
model = AutoModel.from_pretrained("neuralmind/bert-base-portuguese-cased") |
|
model.eval() |
|
|
|
data = np.load("x_train.npy") |
|
iso_forest = IsolationForest(contamination=0.1, random_state=42, n_jobs=-1, max_samples=256) |
|
iso_forest.fit(data) |
|
|
|
def classify_email(text): |
|
with torch.no_grad(): |
|
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=256) |
|
outputs = model(**inputs) |
|
cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy() |
|
pred = iso_forest.predict(cls_embedding)[0] |
|
return pred |
|
|
|
demo = gr.Interface(fn=classify_email, inputs="text", outputs="number") |
|
demo.launch() |
|
|