SamanthaStorm commited on
Commit
ec5f81e
·
verified ·
1 Parent(s): cbd8c88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -24
app.py CHANGED
@@ -5,7 +5,7 @@ from transformers import AutoModelForSequenceClassification, AutoTokenizer
5
  from transformers import RobertaForSequenceClassification, RobertaTokenizer
6
  from motif_tagging import detect_motifs
7
 
8
- # Load sentiment model
9
  sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
10
  sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment")
11
 
@@ -16,19 +16,15 @@ tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
16
 
17
  LABELS = [
18
  "gaslighting", "mockery", "dismissiveness", "control", "guilt_tripping", "apology_baiting", "blame_shifting", "projection",
19
- "contradictory_statements", "manipulation", "deflection", "insults", "obscure_formal", "recovery_phase", "non_abusive",
20
- "suicidal_threat", "physical_threat", "extreme_control"
21
  ]
22
 
23
  THRESHOLDS = {
24
  "gaslighting": 0.25, "mockery": 0.15, "dismissiveness": 0.45, "control": 0.43, "guilt_tripping": 0.15,
25
  "apology_baiting": 0.2, "blame_shifting": 0.23, "projection": 0.50, "contradictory_statements": 0.25,
26
- "manipulation": 0.25, "deflection": 0.30, "insults": 0.34, "obscure_formal": 0.25, "recovery_phase": 0.25,
27
- "non_abusive": 2.0, "suicidal_threat": 0.45, "physical_threat": 0.02, "extreme_control": 0.30
28
  }
29
 
30
- PATTERN_LABELS = LABELS[:15]
31
-
32
  EXPLANATIONS = {
33
  "gaslighting": "Gaslighting involves making someone question their own reality or perceptions...",
34
  "blame_shifting": "Blame-shifting is when one person redirects the responsibility...",
@@ -40,12 +36,13 @@ EXPLANATIONS = {
40
  "apology_baiting": "Apology-baiting manipulates victims into apologizing for abuser's behavior.",
41
  "deflection": "Deflection avoids accountability by redirecting blame.",
42
  "control": "Control restricts autonomy through manipulation or coercion.",
 
43
  "manipulation": "Manipulation deceives to influence or control outcomes.",
44
  "obscure_formal": "Obscure/formal language manipulates through confusion or superiority."
45
  }
46
 
47
  PATTERN_WEIGHTS = {
48
- "gaslighting": 1.3, "control": 1.2, "dismissiveness": 0.8, "non_abusive": 0.0
49
  }
50
 
51
  def custom_sentiment(text):
@@ -57,10 +54,11 @@ def custom_sentiment(text):
57
  label_map = {0: "supportive", 1: "undermining"}
58
  return {"label": label_map[label_idx], "score": probs[0][label_idx].item()}
59
 
60
- def calculate_abuse_level(scores, thresholds, motif_hits=None):
61
  weighted_scores = [score * PATTERN_WEIGHTS.get(label, 1.0) for label, score in zip(LABELS, scores) if score > thresholds[label]]
62
  base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
63
- return base_score
 
64
 
65
  def interpret_abuse_level(score):
66
  if score > 80:
@@ -81,24 +79,36 @@ def analyze_single_message(text, thresholds, motif_flags):
81
  with torch.no_grad():
82
  outputs = model(**inputs)
83
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
 
 
 
84
  abuse_level = calculate_abuse_level(scores, adjusted_thresholds, motif_hits)
85
- top_label = LABELS[np.argmax(scores)]
86
- return abuse_level, top_label
87
 
88
  def analyze_composite(msg1, msg2, msg3, flags):
89
  thresholds = THRESHOLDS
90
- messages = [m for m in [msg1, msg2, msg3] if m.strip()]
91
- results = [analyze_single_message(m, thresholds, flags) for m in messages]
92
- if not results:
93
  return "Please enter at least one message."
94
- abuse_scores = [score for score, _ in results]
95
- labels = [label.replace("_", " ") for _, label in results]
96
- composite = round(np.mean(abuse_scores), 2)
97
- abuse_desc = interpret_abuse_level(composite)
98
- top_labels = ", ".join(labels)
99
- return f"The messages analyzed show signs of {top_labels}, indicating a {composite}% likelihood of abusive communication ({abuse_desc})."
100
-
101
- txt_inputs = [
 
 
 
 
 
 
 
 
 
102
  gr.Textbox(label="Message 1"),
103
  gr.Textbox(label="Message 2"),
104
  gr.Textbox(label="Message 3")
@@ -111,7 +121,7 @@ checkboxes = gr.CheckboxGroup(label="Contextual Flags", choices=[
111
 
112
  iface = gr.Interface(
113
  fn=analyze_composite,
114
- inputs=txt_inputs + [checkboxes],
115
  outputs=gr.Textbox(label="Results"),
116
  title="Abuse Pattern Detector (Multi-Message)",
117
  allow_flagging="manual"
 
5
  from transformers import RobertaForSequenceClassification, RobertaTokenizer
6
  from motif_tagging import detect_motifs
7
 
8
+ # custom fine-tuned sentiment model
9
  sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
10
  sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment")
11
 
 
16
 
17
  LABELS = [
18
  "gaslighting", "mockery", "dismissiveness", "control", "guilt_tripping", "apology_baiting", "blame_shifting", "projection",
19
+ "contradictory_statements", "manipulation", "deflection", "insults", "obscure_formal", "recovery_phase"
 
20
  ]
21
 
22
  THRESHOLDS = {
23
  "gaslighting": 0.25, "mockery": 0.15, "dismissiveness": 0.45, "control": 0.43, "guilt_tripping": 0.15,
24
  "apology_baiting": 0.2, "blame_shifting": 0.23, "projection": 0.50, "contradictory_statements": 0.25,
25
+ "manipulation": 0.25, "deflection": 0.30, "insults": 0.34, "obscure_formal": 0.25, "recovery_phase": 0.25
 
26
  }
27
 
 
 
28
  EXPLANATIONS = {
29
  "gaslighting": "Gaslighting involves making someone question their own reality or perceptions...",
30
  "blame_shifting": "Blame-shifting is when one person redirects the responsibility...",
 
36
  "apology_baiting": "Apology-baiting manipulates victims into apologizing for abuser's behavior.",
37
  "deflection": "Deflection avoids accountability by redirecting blame.",
38
  "control": "Control restricts autonomy through manipulation or coercion.",
39
+ "guilt_tripping": "Guilt-tripping uses guilt to manipulate someone’s actions.",
40
  "manipulation": "Manipulation deceives to influence or control outcomes.",
41
  "obscure_formal": "Obscure/formal language manipulates through confusion or superiority."
42
  }
43
 
44
  PATTERN_WEIGHTS = {
45
+ "gaslighting": 1.3, "mockery": 1.2, "control": 1.2, "dismissiveness": 0.8
46
  }
47
 
48
  def custom_sentiment(text):
 
54
  label_map = {0: "supportive", 1: "undermining"}
55
  return {"label": label_map[label_idx], "score": probs[0][label_idx].item()}
56
 
57
+ def calculate_abuse_level(scores, thresholds, motif_hits=None, flag_multiplier=1.0):
58
  weighted_scores = [score * PATTERN_WEIGHTS.get(label, 1.0) for label, score in zip(LABELS, scores) if score > thresholds[label]]
59
  base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
60
+ base_score *= flag_multiplier
61
+ return min(base_score, 100.0)
62
 
63
  def interpret_abuse_level(score):
64
  if score > 80:
 
79
  with torch.no_grad():
80
  outputs = model(**inputs)
81
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
82
+ threshold_labels = [label for label, score in zip(LABELS, scores) if score > adjusted_thresholds[label]]
83
+ phrase_labels = [label for label, _ in matched_phrases]
84
+ pattern_labels_used = list(set(threshold_labels + phrase_labels))
85
  abuse_level = calculate_abuse_level(scores, adjusted_thresholds, motif_hits)
86
+ top_patterns = sorted([(label, score) for label, score in zip(LABELS, scores)], key=lambda x: x[1], reverse=True)[:2]
87
+ return abuse_level, pattern_labels_used, top_patterns
88
 
89
  def analyze_composite(msg1, msg2, msg3, flags):
90
  thresholds = THRESHOLDS
91
+ messages = [msg1, msg2, msg3]
92
+ active_messages = [m for m in messages if m.strip()]
93
+ if not active_messages:
94
  return "Please enter at least one message."
95
+
96
+ flag_multiplier = 1 + (0.1 * len(flags)) # each checked flag increases weight by 10%
97
+ results = [analyze_single_message(m, thresholds, flags) for m in active_messages]
98
+ abuse_scores = [r[0] for r in results]
99
+ composite_score = round(sum(abuse_scores) / len(abuse_scores), 2)
100
+ label_sets = [label for result in results for label in result[1]]
101
+ label_counts = {label: label_sets.count(label) for label in set(label_sets)}
102
+ top_labels = sorted(label_counts.items(), key=lambda x: x[1], reverse=True)[:2]
103
+ top_explanations = [EXPLANATIONS.get(label, "") for label, _ in top_labels]
104
+
105
+ result = f"These messages show patterns of {', '.join(label for label, _ in top_labels)} and are estimated to be {composite_score}% likely abusive."
106
+ for expl in top_explanations:
107
+ if expl:
108
+ result += f"\n• {expl}"
109
+ return result
110
+
111
+ textbox_inputs = [
112
  gr.Textbox(label="Message 1"),
113
  gr.Textbox(label="Message 2"),
114
  gr.Textbox(label="Message 3")
 
121
 
122
  iface = gr.Interface(
123
  fn=analyze_composite,
124
+ inputs=textbox_inputs + [checkboxes],
125
  outputs=gr.Textbox(label="Results"),
126
  title="Abuse Pattern Detector (Multi-Message)",
127
  allow_flagging="manual"