Spaces:
Sleeping
Sleeping
Update test.py
Browse files
test.py
CHANGED
@@ -1,63 +1,63 @@
|
|
1 |
-
import torch
|
2 |
-
from torch.nn.functional import softmax
|
3 |
-
from load_model import load_model # Import the load_model function
|
4 |
-
import numpy as np
|
5 |
-
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
6 |
-
import streamlit as st
|
7 |
-
|
8 |
-
@st.cache_resource
|
9 |
-
def get_model_and_tokenizer(model_name):
|
10 |
-
return load_model(model_name)
|
11 |
-
|
12 |
-
# Initialize default model (could be anything, or even load dynamically)
|
13 |
-
default_model_name = "cahya/bert-base-indonesian-522M"
|
14 |
-
tokenizer, model = load_model(default_model_name)
|
15 |
-
|
16 |
-
# Prediction function
|
17 |
-
def predict_hoax(title, content):
|
18 |
-
if tokenizer is None or model is None:
|
19 |
-
raise ValueError("Model and tokenizer must be loaded before prediction.")
|
20 |
-
|
21 |
-
print(f"Using model: {model}")
|
22 |
-
print(f"Using tokenizer: {tokenizer}")
|
23 |
-
|
24 |
-
text = f"{title} [SEP] {content}"
|
25 |
-
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256)
|
26 |
-
with torch.no_grad():
|
27 |
-
outputs = model(**inputs)
|
28 |
-
probs = softmax(outputs.logits, dim=1)
|
29 |
-
pred = torch.argmax(probs, dim=1).item()
|
30 |
-
label = 'HOAX' if pred == 1 else 'NON-HOAX'
|
31 |
-
return label
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
# LIME prediction function
|
37 |
-
def predict_proba_for_lime(texts):
|
38 |
-
results = []
|
39 |
-
for text in texts:
|
40 |
-
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256)
|
41 |
-
with torch.no_grad():
|
42 |
-
outputs = model(**inputs)
|
43 |
-
probs = softmax(outputs.logits, dim=1).detach().cpu().numpy()
|
44 |
-
results.append(probs[0])
|
45 |
-
return np.array(results)
|
46 |
-
|
47 |
-
def evaluate_model_performance(df, tokenizer, model):
|
48 |
-
true_labels = []
|
49 |
-
pred_labels = []
|
50 |
-
|
51 |
-
for index, row in df.iterrows():
|
52 |
-
true_label = row['Label'] # Menggunakan 'Title' sebagai label sebenarnya karena tidak ada 'Final_Result'
|
53 |
-
pred_label = predict_hoax(row['Title'], row['Content'])
|
54 |
-
|
55 |
-
true_labels.append(1 if true_label == 'HOAX' else 0)
|
56 |
-
pred_labels.append(1 if pred_label == 'HOAX' else 0)
|
57 |
-
|
58 |
-
accuracy = accuracy_score(true_labels, pred_labels)
|
59 |
-
precision = precision_score(true_labels, pred_labels, average='binary')
|
60 |
-
recall = recall_score(true_labels, pred_labels, average='binary')
|
61 |
-
f1 = f1_score(true_labels, pred_labels, average='binary')
|
62 |
-
|
63 |
return accuracy, precision, recall, f1
|
|
|
1 |
+
import torch
|
2 |
+
from torch.nn.functional import softmax
|
3 |
+
from load_model import load_model # Import the load_model function
|
4 |
+
import numpy as np
|
5 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
6 |
+
import streamlit as st
|
7 |
+
|
8 |
+
@st.cache_resource
|
9 |
+
def get_model_and_tokenizer(model_name):
|
10 |
+
return load_model(model_name)
|
11 |
+
|
12 |
+
# Initialize default model (could be anything, or even load dynamically)
|
13 |
+
default_model_name = "cahya/bert-base-indonesian-522M"
|
14 |
+
tokenizer, model = load_model(default_model_name)
|
15 |
+
|
16 |
+
# Prediction function
|
17 |
+
def predict_hoax(title, content):
|
18 |
+
if tokenizer is None or model is None:
|
19 |
+
raise ValueError("Model and tokenizer must be loaded before prediction.")
|
20 |
+
|
21 |
+
print(f"Using model: {model}")
|
22 |
+
print(f"Using tokenizer: {tokenizer}")
|
23 |
+
|
24 |
+
text = f"{title} [SEP] {content}"
|
25 |
+
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256)
|
26 |
+
with torch.no_grad():
|
27 |
+
outputs = model(**inputs)
|
28 |
+
probs = softmax(outputs.logits, dim=1)
|
29 |
+
pred = torch.argmax(probs, dim=1).item()
|
30 |
+
label = 'HOAX' if pred == 1 else 'NON-HOAX'
|
31 |
+
return label
|
32 |
+
|
33 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
34 |
+
model.to(device)
|
35 |
+
|
36 |
+
# LIME prediction function
|
37 |
+
def predict_proba_for_lime(texts):
|
38 |
+
results = []
|
39 |
+
for text in texts:
|
40 |
+
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256).to(device)
|
41 |
+
with torch.no_grad():
|
42 |
+
outputs = model(**inputs)
|
43 |
+
probs = softmax(outputs.logits, dim=1).detach().cpu().numpy()
|
44 |
+
results.append(probs[0])
|
45 |
+
return np.array(results)
|
46 |
+
|
47 |
+
def evaluate_model_performance(df, tokenizer, model):
|
48 |
+
true_labels = []
|
49 |
+
pred_labels = []
|
50 |
+
|
51 |
+
for index, row in df.iterrows():
|
52 |
+
true_label = row['Label'] # Menggunakan 'Title' sebagai label sebenarnya karena tidak ada 'Final_Result'
|
53 |
+
pred_label = predict_hoax(row['Title'], row['Content'])
|
54 |
+
|
55 |
+
true_labels.append(1 if true_label == 'HOAX' else 0)
|
56 |
+
pred_labels.append(1 if pred_label == 'HOAX' else 0)
|
57 |
+
|
58 |
+
accuracy = accuracy_score(true_labels, pred_labels)
|
59 |
+
precision = precision_score(true_labels, pred_labels, average='binary')
|
60 |
+
recall = recall_score(true_labels, pred_labels, average='binary')
|
61 |
+
f1 = f1_score(true_labels, pred_labels, average='binary')
|
62 |
+
|
63 |
return accuracy, precision, recall, f1
|