File size: 3,906 Bytes
d6e219c
f1948f2
 
a6c0cf2
b54664e
0ff864f
a6c0cf2
4dccd71
 
dd2f06d
8e4d20e
 
a9d4250
a6c0cf2
 
43095bd
 
a6c0cf2
 
43095bd
23bb2d2
4472a1d
73582bd
 
b98a1ee
 
 
73582bd
 
 
 
4472a1d
dcb0de6
28fc37c
dcb0de6
 
 
 
 
 
28fc37c
a6c0cf2
ecc77cc
28fc37c
43095bd
a6c0cf2
23bb2d2
2dda625
 
 
 
 
 
 
 
73582bd
43095bd
2efdba9
a6c0cf2
 
 
 
 
 
 
2dda625
 
 
a6c0cf2
 
 
2efdba9
 
a6c0cf2
 
 
 
 
ecc77cc
a6c0cf2
 
 
2dda625
a28ef35
ab8c96f
a6c0cf2
ad04ec8
a6c0cf2
 
 
 
 
 
 
 
 
ad04ec8
a6c0cf2
 
 
ab8c96f
4292d1b
2dda625
a6c0cf2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import gradio as gr
import torch
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer, RobertaForSequenceClassification, RobertaTokenizer
from motif_tagging import detect_motifs

# Load models
sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment")
model_name = "SamanthaStorm/autotrain-c1un8-p8vzo"
model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)

LABELS = [...]
THRESHOLDS = {...}
PATTERN_LABELS = LABELS[:15]
DANGER_LABELS = LABELS[15:18]
EXPLANATIONS = {...}
PATTERN_WEIGHTS = {...}


def custom_sentiment(text):
    inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = sentiment_model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
        label_idx = torch.argmax(probs).item()
    label_map = {0: "supportive", 1: "undermining"}
    label = label_map[label_idx]
    score = probs[0][label_idx].item()
    return {"label": label, "score": score}


def calculate_abuse_level(scores, thresholds, motif_hits=None):
    weighted_scores = []
    for label, score in zip(LABELS, scores):
        if score > thresholds[label]:
            weight = PATTERN_WEIGHTS.get(label, 1.0)
            weighted_scores.append(score * weight)
    base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
    motif_hits = motif_hits or []
    if any(label in motif_hits for label in DANGER_LABELS):
        base_score = max(base_score, 75.0)
    return base_score


def interpret_abuse_level(score):
    if score > 80:
        return "Extreme / High Risk"
    elif score > 60:
        return "Severe / Harmful Pattern Present"
    elif score > 40:
        return "Likely Abuse"
    elif score > 20:
        return "Mild Concern"
    return "Very Low / Likely Safe"


def analyze_single_message(text):
    if not text.strip():
        return "No input provided."
    sentiment = custom_sentiment(text)
    thresholds = {k: v * 0.8 for k, v in THRESHOLDS.items()} if sentiment['label'] == "undermining" else THRESHOLDS.copy()
    motif_flags, matched_phrases = detect_motifs(text)
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
    abuse_score = calculate_abuse_level(scores, thresholds, [label for label, _ in matched_phrases])
    summary = interpret_abuse_level(abuse_score)
    return f"Abuse Risk Score: {abuse_score}% β€” {summary}\nSentiment: {sentiment['label']} ({sentiment['score']*100:.2f}%)"


def analyze_composite(msg1, msg2, msg3):
    results = [analyze_single_message(t) for t in [msg1, msg2, msg3]]
    composite_score = np.mean([
        float(line.split('%')[0].split()[-1]) if 'Abuse Risk Score:' in line else 0
        for line in results
    ])
    final_summary = interpret_abuse_level(composite_score)
    composite_result = f"\n\nComposite Abuse Risk Score: {composite_score:.2f}% β€” {final_summary}"
    return results[0], results[1], results[2], composite_result


iface = gr.Interface(
    fn=analyze_composite,
    inputs=[
        gr.Textbox(label="Message 1"),
        gr.Textbox(label="Message 2"),
        gr.Textbox(label="Message 3")
    ],
    outputs=[
        gr.Textbox(label="Message 1 Result"),
        gr.Textbox(label="Message 2 Result"),
        gr.Textbox(label="Message 3 Result"),
        gr.Textbox(label="Composite Score Summary")
    ],
    title="Abuse Pattern Detector (Multi-Message)",
    live=False,
    allow_flagging="manual"
)

if __name__ == "__main__":
    iface.launch()