SamanthaStorm commited on
Commit
cbd8c88
·
verified ·
1 Parent(s): e032990

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -35
app.py CHANGED
@@ -4,9 +4,8 @@ import numpy as np
4
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
5
  from transformers import RobertaForSequenceClassification, RobertaTokenizer
6
  from motif_tagging import detect_motifs
7
- from abuse_type_mapping import determine_abuse_type
8
 
9
- # custom fine-tuned sentiment model
10
  sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
11
  sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment")
12
 
@@ -29,7 +28,6 @@ THRESHOLDS = {
29
  }
30
 
31
  PATTERN_LABELS = LABELS[:15]
32
- DANGER_LABELS = LABELS[15:18]
33
 
34
  EXPLANATIONS = {
35
  "gaslighting": "Gaslighting involves making someone question their own reality or perceptions...",
@@ -42,23 +40,12 @@ EXPLANATIONS = {
42
  "apology_baiting": "Apology-baiting manipulates victims into apologizing for abuser's behavior.",
43
  "deflection": "Deflection avoids accountability by redirecting blame.",
44
  "control": "Control restricts autonomy through manipulation or coercion.",
45
- "extreme_control": "Extreme control dominates decisions and behaviors entirely.",
46
- "physical_threat": "Physical threats signal risk of bodily harm.",
47
- "suicidal_threat": "Suicidal threats manipulate others using self-harm threats.",
48
- "guilt_tripping": "Guilt-tripping uses guilt to manipulate someone’s actions.",
49
  "manipulation": "Manipulation deceives to influence or control outcomes.",
50
- "non_abusive": "Non-abusive language is respectful and free of coercion.",
51
  "obscure_formal": "Obscure/formal language manipulates through confusion or superiority."
52
  }
53
 
54
  PATTERN_WEIGHTS = {
55
- "physical_threat": 1.5,
56
- "suicidal_threat": 1.4,
57
- "extreme_control": 1.5,
58
- "gaslighting": 1.3,
59
- "control": 1.2,
60
- "dismissiveness": 0.8,
61
- "non_abusive": 0.0
62
  }
63
 
64
  def custom_sentiment(text):
@@ -73,9 +60,6 @@ def custom_sentiment(text):
73
  def calculate_abuse_level(scores, thresholds, motif_hits=None):
74
  weighted_scores = [score * PATTERN_WEIGHTS.get(label, 1.0) for label, score in zip(LABELS, scores) if score > thresholds[label]]
75
  base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
76
- motif_hits = motif_hits or []
77
- if any(label in motif_hits for label in {"physical_threat", "suicidal_threat", "extreme_control"}):
78
- base_score = max(base_score, 75.0)
79
  return base_score
80
 
81
  def interpret_abuse_level(score):
@@ -97,28 +81,22 @@ def analyze_single_message(text, thresholds, motif_flags):
97
  with torch.no_grad():
98
  outputs = model(**inputs)
99
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
100
- threshold_labels = [label for label, score in zip(PATTERN_LABELS, scores[:15]) if score > adjusted_thresholds[label]]
101
- phrase_labels = [label for label, _ in matched_phrases]
102
- pattern_labels_used = list(set(threshold_labels + phrase_labels))
103
  abuse_level = calculate_abuse_level(scores, adjusted_thresholds, motif_hits)
104
- abuse_description = interpret_abuse_level(abuse_level)
105
- top_patterns = sorted([(label, score) for label, score in zip(PATTERN_LABELS, scores[:15]) if label != "non_abusive"], key=lambda x: x[1], reverse=True)[:2]
106
- pattern_expl = "\n".join([f"• {label.replace('_', ' ').title()}: {EXPLANATIONS.get(label)}" for label, _ in top_patterns])
107
- return abuse_level, abuse_description, pattern_expl
108
 
109
  def analyze_composite(msg1, msg2, msg3, flags):
110
  thresholds = THRESHOLDS
111
- results = [analyze_single_message(m, thresholds, flags) for m in [msg1, msg2, msg3] if m.strip()]
 
112
  if not results:
113
  return "Please enter at least one message."
114
- result_lines = []
115
- total_score = 0
116
- for i, (score, desc, patterns) in enumerate(results, 1):
117
- total_score += score
118
- result_lines.append(f"Message {i}: {score:.2f}% – {desc}\n{patterns}\n")
119
- composite = round(total_score / len(results), 2)
120
- result_lines.append(f"\nComposite Abuse Score: {composite}%")
121
- return "\n\n".join(result_lines)
122
 
123
  txt_inputs = [
124
  gr.Textbox(label="Message 1"),
@@ -140,4 +118,4 @@ iface = gr.Interface(
140
  )
141
 
142
  if __name__ == "__main__":
143
- iface.launch()
 
4
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
5
  from transformers import RobertaForSequenceClassification, RobertaTokenizer
6
  from motif_tagging import detect_motifs
 
7
 
8
+ # Load sentiment model
9
  sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
10
  sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment")
11
 
 
28
  }
29
 
30
  PATTERN_LABELS = LABELS[:15]
 
31
 
32
  EXPLANATIONS = {
33
  "gaslighting": "Gaslighting involves making someone question their own reality or perceptions...",
 
40
  "apology_baiting": "Apology-baiting manipulates victims into apologizing for abuser's behavior.",
41
  "deflection": "Deflection avoids accountability by redirecting blame.",
42
  "control": "Control restricts autonomy through manipulation or coercion.",
 
 
 
 
43
  "manipulation": "Manipulation deceives to influence or control outcomes.",
 
44
  "obscure_formal": "Obscure/formal language manipulates through confusion or superiority."
45
  }
46
 
47
  PATTERN_WEIGHTS = {
48
+ "gaslighting": 1.3, "control": 1.2, "dismissiveness": 0.8, "non_abusive": 0.0
 
 
 
 
 
 
49
  }
50
 
51
  def custom_sentiment(text):
 
60
  def calculate_abuse_level(scores, thresholds, motif_hits=None):
61
  weighted_scores = [score * PATTERN_WEIGHTS.get(label, 1.0) for label, score in zip(LABELS, scores) if score > thresholds[label]]
62
  base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
 
 
 
63
  return base_score
64
 
65
  def interpret_abuse_level(score):
 
81
  with torch.no_grad():
82
  outputs = model(**inputs)
83
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
 
 
 
84
  abuse_level = calculate_abuse_level(scores, adjusted_thresholds, motif_hits)
85
+ top_label = LABELS[np.argmax(scores)]
86
+ return abuse_level, top_label
 
 
87
 
88
  def analyze_composite(msg1, msg2, msg3, flags):
89
  thresholds = THRESHOLDS
90
+ messages = [m for m in [msg1, msg2, msg3] if m.strip()]
91
+ results = [analyze_single_message(m, thresholds, flags) for m in messages]
92
  if not results:
93
  return "Please enter at least one message."
94
+ abuse_scores = [score for score, _ in results]
95
+ labels = [label.replace("_", " ") for _, label in results]
96
+ composite = round(np.mean(abuse_scores), 2)
97
+ abuse_desc = interpret_abuse_level(composite)
98
+ top_labels = ", ".join(labels)
99
+ return f"The messages analyzed show signs of {top_labels}, indicating a {composite}% likelihood of abusive communication ({abuse_desc})."
 
 
100
 
101
  txt_inputs = [
102
  gr.Textbox(label="Message 1"),
 
118
  )
119
 
120
  if __name__ == "__main__":
121
+ iface.launch()