joaocansi commited on
Commit
14d2797
·
1 Parent(s): 4105ca8

feat: add embedding.npy file

Browse files
Files changed (2) hide show
  1. app.py +9 -14
  2. data.npy → embeddings.npy +2 -2
app.py CHANGED
@@ -1,25 +1,20 @@
1
- from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
 
 
2
  from sklearn.ensemble import IsolationForest
3
  from tqdm import tqdm
4
-
5
- import torch
6
- import gradio as gr
7
  import numpy as np
8
 
9
- tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-base-portuguese-cased")
10
- model = AutoModel.from_pretrained("neuralmind/bert-base-portuguese-cased")
11
- model.eval()
12
 
13
- data = np.load("data.npy")
14
  iso_forest = IsolationForest(contamination=0.1, random_state=42)
15
- iso_forest.fit(data)
16
 
17
  def classify_email(text):
18
- with torch.no_grad():
19
- inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=256)
20
- outputs = model(**inputs)
21
- cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
22
- pred = iso_forest.predict(cls_embedding)[0]
23
  return pred
24
 
25
  demo = gr.Interface(fn=classify_email, inputs="text", outputs="number")
 
1
+ import gradio as gr
2
+
3
+ from sentence_transformers import SentenceTransformer
4
  from sklearn.ensemble import IsolationForest
5
  from tqdm import tqdm
6
+ import pandas as pd
 
 
7
  import numpy as np
8
 
9
+ model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
10
+ embeddings = np.load("embeddings.npy")
 
11
 
 
12
  iso_forest = IsolationForest(contamination=0.1, random_state=42)
13
+ iso_forest.fit(embeddings)
14
 
15
  def classify_email(text):
16
+ emb = model.encode([text])
17
+ pred = iso_forest.predict(emb)[0]
 
 
 
18
  return pred
19
 
20
  demo = gr.Interface(fn=classify_email, inputs="text", outputs="number")
data.npy → embeddings.npy RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e72b681a0cee50f5ad491a23399302dc384fc7cdbe637c26337257dc959c98c
3
- size 11520128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1971a59671a0c4546b2560c5832eba022664f1750f03589302bbd4a15d439138
3
+ size 10752128