autou / app.py
joaocansi
.
d4a9bd3
raw
history blame
898 Bytes
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
from sklearn.ensemble import IsolationForest
from tqdm import tqdm
import torch
import gradio as gr
import numpy as np
tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-base-portuguese-cased")
model = AutoModel.from_pretrained("neuralmind/bert-base-portuguese-cased")
model.eval()
data = np.load("x_train.npy")
iso_forest = IsolationForest(contamination=0.25, random_state=42)
iso_forest.fit(data)
def classify_email(text):
with torch.no_grad():
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=256)
outputs = model(**inputs)
cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
pred = iso_forest.predict(cls_embedding)[0]
return pred
demo = gr.Interface(fn=classify_email, inputs="text", outputs="number")
demo.launch()