Nakhwa commited on
Commit
48e9045
·
verified ·
1 Parent(s): 45b5854

Update test.py

Browse files
Files changed (1) hide show
  1. test.py +62 -62
test.py CHANGED
@@ -1,63 +1,63 @@
1
- import torch
2
- from torch.nn.functional import softmax
3
- from load_model import load_model # Import the load_model function
4
- import numpy as np
5
- from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
6
- import streamlit as st
7
-
8
- @st.cache_resource
9
- def get_model_and_tokenizer(model_name):
10
- return load_model(model_name)
11
-
12
- # Initialize default model (could be anything, or even load dynamically)
13
- default_model_name = "cahya/bert-base-indonesian-522M"
14
- tokenizer, model = load_model(default_model_name)
15
-
16
- # Prediction function
17
- def predict_hoax(title, content):
18
- if tokenizer is None or model is None:
19
- raise ValueError("Model and tokenizer must be loaded before prediction.")
20
-
21
- print(f"Using model: {model}")
22
- print(f"Using tokenizer: {tokenizer}")
23
-
24
- text = f"{title} [SEP] {content}"
25
- inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256)
26
- with torch.no_grad():
27
- outputs = model(**inputs)
28
- probs = softmax(outputs.logits, dim=1)
29
- pred = torch.argmax(probs, dim=1).item()
30
- label = 'HOAX' if pred == 1 else 'NON-HOAX'
31
- return label
32
-
33
- # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
34
- # model.to(device)
35
-
36
- # LIME prediction function
37
- def predict_proba_for_lime(texts):
38
- results = []
39
- for text in texts:
40
- inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256)
41
- with torch.no_grad():
42
- outputs = model(**inputs)
43
- probs = softmax(outputs.logits, dim=1).detach().cpu().numpy()
44
- results.append(probs[0])
45
- return np.array(results)
46
-
47
- def evaluate_model_performance(df, tokenizer, model):
48
- true_labels = []
49
- pred_labels = []
50
-
51
- for index, row in df.iterrows():
52
- true_label = row['Label'] # Menggunakan 'Title' sebagai label sebenarnya karena tidak ada 'Final_Result'
53
- pred_label = predict_hoax(row['Title'], row['Content'])
54
-
55
- true_labels.append(1 if true_label == 'HOAX' else 0)
56
- pred_labels.append(1 if pred_label == 'HOAX' else 0)
57
-
58
- accuracy = accuracy_score(true_labels, pred_labels)
59
- precision = precision_score(true_labels, pred_labels, average='binary')
60
- recall = recall_score(true_labels, pred_labels, average='binary')
61
- f1 = f1_score(true_labels, pred_labels, average='binary')
62
-
63
  return accuracy, precision, recall, f1
 
1
+ import torch
2
+ from torch.nn.functional import softmax
3
+ from load_model import load_model # Import the load_model function
4
+ import numpy as np
5
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
6
+ import streamlit as st
7
+
8
+ @st.cache_resource
9
+ def get_model_and_tokenizer(model_name):
10
+ return load_model(model_name)
11
+
12
+ # Initialize default model (could be anything, or even load dynamically)
13
+ default_model_name = "cahya/bert-base-indonesian-522M"
14
+ tokenizer, model = load_model(default_model_name)
15
+
16
+ # Prediction function
17
+ def predict_hoax(title, content):
18
+ if tokenizer is None or model is None:
19
+ raise ValueError("Model and tokenizer must be loaded before prediction.")
20
+
21
+ print(f"Using model: {model}")
22
+ print(f"Using tokenizer: {tokenizer}")
23
+
24
+ text = f"{title} [SEP] {content}"
25
+ inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256)
26
+ with torch.no_grad():
27
+ outputs = model(**inputs)
28
+ probs = softmax(outputs.logits, dim=1)
29
+ pred = torch.argmax(probs, dim=1).item()
30
+ label = 'HOAX' if pred == 1 else 'NON-HOAX'
31
+ return label
32
+
33
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
34
+ model.to(device)
35
+
36
+ # LIME prediction function
37
+ def predict_proba_for_lime(texts):
38
+ results = []
39
+ for text in texts:
40
+ inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=256).to(device)
41
+ with torch.no_grad():
42
+ outputs = model(**inputs)
43
+ probs = softmax(outputs.logits, dim=1).detach().cpu().numpy()
44
+ results.append(probs[0])
45
+ return np.array(results)
46
+
47
+ def evaluate_model_performance(df, tokenizer, model):
48
+ true_labels = []
49
+ pred_labels = []
50
+
51
+ for index, row in df.iterrows():
52
+ true_label = row['Label'] # Menggunakan 'Title' sebagai label sebenarnya karena tidak ada 'Final_Result'
53
+ pred_label = predict_hoax(row['Title'], row['Content'])
54
+
55
+ true_labels.append(1 if true_label == 'HOAX' else 0)
56
+ pred_labels.append(1 if pred_label == 'HOAX' else 0)
57
+
58
+ accuracy = accuracy_score(true_labels, pred_labels)
59
+ precision = precision_score(true_labels, pred_labels, average='binary')
60
+ recall = recall_score(true_labels, pred_labels, average='binary')
61
+ f1 = f1_score(true_labels, pred_labels, average='binary')
62
+
63
  return accuracy, precision, recall, f1