autou / app.py
joaocansi
.
4da7379
raw
history blame
925 Bytes
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
from sklearn.ensemble import IsolationForest
from tqdm import tqdm
import torch
import gradio as gr
import numpy as np
tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-base-portuguese-cased")
model = AutoModel.from_pretrained("neuralmind/bert-base-portuguese-cased")
model.eval()
data = np.load("x_train.npy")
iso_forest = IsolationForest(contamination=0.1, random_state=42, n_jobs=-1, max_samples=256)
iso_forest.fit(data)
def classify_email(text):
with torch.no_grad():
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=256)
outputs = model(**inputs)
cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
pred = iso_forest.predict(cls_embedding)[0]
return pred
demo = gr.Interface(fn=classify_email, inputs="text", outputs="number")
demo.launch()