joaocansi
commited on
Commit
·
14d2797
1
Parent(s):
4105ca8
feat: add embedding.npy file
Browse files- app.py +9 -14
- data.npy → embeddings.npy +2 -2
app.py
CHANGED
@@ -1,25 +1,20 @@
|
|
1 |
-
|
|
|
|
|
2 |
from sklearn.ensemble import IsolationForest
|
3 |
from tqdm import tqdm
|
4 |
-
|
5 |
-
import torch
|
6 |
-
import gradio as gr
|
7 |
import numpy as np
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
model.eval()
|
12 |
|
13 |
-
data = np.load("data.npy")
|
14 |
iso_forest = IsolationForest(contamination=0.1, random_state=42)
|
15 |
-
iso_forest.fit(
|
16 |
|
17 |
def classify_email(text):
|
18 |
-
|
19 |
-
|
20 |
-
outputs = model(**inputs)
|
21 |
-
cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
|
22 |
-
pred = iso_forest.predict(cls_embedding)[0]
|
23 |
return pred
|
24 |
|
25 |
demo = gr.Interface(fn=classify_email, inputs="text", outputs="number")
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
from sentence_transformers import SentenceTransformer
|
4 |
from sklearn.ensemble import IsolationForest
|
5 |
from tqdm import tqdm
|
6 |
+
import pandas as pd
|
|
|
|
|
7 |
import numpy as np
|
8 |
|
9 |
+
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
|
10 |
+
embeddings = np.load("embeddings.npy")
|
|
|
11 |
|
|
|
12 |
iso_forest = IsolationForest(contamination=0.1, random_state=42)
|
13 |
+
iso_forest.fit(embeddings)
|
14 |
|
15 |
def classify_email(text):
|
16 |
+
emb = model.encode([text])
|
17 |
+
pred = iso_forest.predict(emb)[0]
|
|
|
|
|
|
|
18 |
return pred
|
19 |
|
20 |
demo = gr.Interface(fn=classify_email, inputs="text", outputs="number")
|
data.npy → embeddings.npy
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1971a59671a0c4546b2560c5832eba022664f1750f03589302bbd4a15d439138
|
3 |
+
size 10752128
|