File size: 6,164 Bytes
00a16c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import torch
import pandas as pd
import argparse
import re
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import classification_report, confusion_matrix

# Define model names
bert_model_name = "bert-base-uncased"
hatebert_model_name = "GroNLP/hateBERT"

class CyberbullyingDetector:
    def __init__(self, model_type="bert"):
        if model_type == "bert":
            self.tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
            self.model = AutoModelForSequenceClassification.from_pretrained(bert_model_name)
        elif model_type == "hatebert":
            self.tokenizer = AutoTokenizer.from_pretrained(hatebert_model_name)
            self.model = AutoModelForSequenceClassification.from_pretrained(hatebert_model_name)
        else:
            raise ValueError("Invalid model_type. Choose 'bert' or 'hatebert'.")

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

        self.cyberbullying_threshold = 0.7  # Confidence threshold

        self.trigger_words = [
            'buang', 'pokpok', 'bogo', 'linte', 'tanga', 'diputa', 'yuta mo', 'gaga',
            'lagtok ka', 'addict', 'bogok', 'gago', 'law-ay', 'demonyo ka', 'animal ka', 'animal',
            'bilatibay', 'yudipota', 'pangit', 'tikalon', 'tinikal', 'hambog',
            'batinggilan', 'biga-on', 'bulay-ug', 'agi', 'agitot', 'alpot', 'hangag'
        ]

    def find_triggers(self, text):
        text_lower = text.lower()
        return [word for word in self.trigger_words if word in text_lower]

    def predict(self, text):
        triggers = self.find_triggers(text)

        inputs = self.tokenizer(
            text,
            return_tensors="pt",
            truncation=True,
            max_length=128,
            padding=True
        ).to(self.device)

        with torch.no_grad():
            outputs = self.model(**inputs)

        logits = outputs.logits
        probs = torch.nn.functional.softmax(logits, dim=1)
        pred_class = torch.argmax(probs).item()
        confidence = probs[0][pred_class].item()

        if (pred_class == 1 and confidence >= self.cyberbullying_threshold) or (len(triggers) > 0):
            label = "Cyberbullying"
            is_cyberbullying = True
        else:
            label = "Safe"
            is_cyberbullying = False

        return {
            "text": text,
            "label": label,
            "confidence": confidence,
            "language": "hil",
            "triggers": triggers,
            "is_cyberbullying": is_cyberbullying
        }

def save_confusion_matrix(y_true, y_pred, filename="confusion_matrix.png"):
    labels = sorted(set(y_true + y_pred))
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(filename)
    plt.close()
    print(f"πŸ“Έ Confusion matrix saved as {filename}")

def run_predictions(input_csv=None, output_csv=None, model_type="bert"):
    detector = CyberbullyingDetector(model_type=model_type)

    if input_csv:
        df = pd.read_csv(input_csv)
        results = [detector.predict(text) for text in df['tweet_text']]

        output_df = df.copy()
        output_df['predicted_label'] = [r['label'] for r in results]
        output_df['confidence'] = [r['confidence'] for r in results]
        output_df['language'] = [r['language'] for r in results]
        output_df['triggers'] = [', '.join(r['triggers']) for r in results]
        output_df['is_cyberbullying'] = [r['is_cyberbullying'] for r in results]

        output_df['true_label'] = output_df['cyberbullying_type'].apply(
            lambda x: "Cyberbullying" if pd.notnull(x) and str(x).strip().lower() != "none" else "Safe"
        )

        if output_csv:
            output_df.to_csv(output_csv, index=False)
            print(f"\nβœ… Predictions saved to {output_csv}")

        print("\nπŸ“Œ Sample Predictions:")
        print(output_df[['tweet_text', 'predicted_label', 'confidence', 'triggers']].head(10).to_string(index=False))

        print("\nπŸ“Š Prediction Summary:")
        print(output_df['predicted_label'].value_counts())

        print("\nβœ… Ground Truth Summary:")
        print(output_df['true_label'].value_counts())

        accuracy = (output_df['predicted_label'] == output_df['true_label']).mean()
        print(f"\n🎯 Accuracy: {accuracy:.2%}")

        print("\n🧾 Classification Report:")
        print(classification_report(output_df['true_label'], output_df['predicted_label'], digits=2, zero_division=0))

        save_confusion_matrix(output_df['true_label'].tolist(), output_df['predicted_label'].tolist())

        return output_df
    else:
        # Inference mode
        print("\nπŸ” Type a sentence to analyze (or 'exit' to quit):")
        while True:
            text = input(">>> ")
            if text.lower() in ["exit", "quit"]:
                break
            result = detector.predict(text)
            print(result)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Cyberbullying Detector")
    parser.add_argument('--input_file', type=str, help="Path to input CSV with 'tweet_text' column")
    parser.add_argument('--output_file', type=str, help="Path to save results CSV")
    parser.add_argument('--model', type=str, default='bert', choices=['bert', 'hatebert'], help="Model to use")

    args = parser.parse_args()

    if args.input_file:
        print(f"πŸ“₯ Running batch predictions from {args.input_file} using {args.model.upper()}...")
    else:
        print(f"πŸ§ͺ No input file. Running in interactive mode using {args.model.upper()}...")

    run_predictions(args.input_file, args.output_file, model_type=args.model)