SamanthaStorm commited on
Commit
43095bd
·
verified ·
1 Parent(s): 6476c8b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -45
app.py CHANGED
@@ -1,28 +1,27 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import RobertaForSequenceClassification, RobertaTokenizer, pipeline
4
  import numpy as np
 
5
 
6
- # Load sentiment model
7
  sentiment_analyzer = pipeline("sentiment-analysis")
8
 
9
- # Load abuse pattern model
10
  model_name = "SamanthaStorm/abuse-pattern-detector-v2"
11
  model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
12
  tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
13
 
14
- # Labels
15
  LABELS = [
16
  "gaslighting", "mockery", "dismissiveness", "control",
17
  "guilt_tripping", "apology_baiting", "blame_shifting", "projection",
18
  "contradictory_statements", "manipulation", "deflection", "insults",
19
- "obscure_formal", "recovery_phase", "non_abusive",
20
- "suicidal_threat", "physical_threat", "extreme_control"
21
  ]
22
- PATTERN_LABELS = LABELS[:15]
23
- DANGER_LABELS = LABELS[15:]
24
 
25
- # Thresholds
26
  THRESHOLDS = {
27
  "gaslighting": 0.25,
28
  "mockery": 0.15,
@@ -44,7 +43,9 @@ THRESHOLDS = {
44
  "extreme_control": 0.36
45
  }
46
 
47
- # Explanations
 
 
48
  EXPLANATIONS = {
49
  "gaslighting": "Gaslighting involves making someone question their own reality or perceptions, often causing them to feel confused or insecure.",
50
  "blame_shifting": "Blame-shifting is when one person redirects the responsibility for an issue onto someone else, avoiding accountability.",
@@ -60,11 +61,20 @@ EXPLANATIONS = {
60
  "physical_threat": "Physical threats involve any indication or direct mention of harm to someone’s physical well-being, often used to intimidate or control.",
61
  "suicidal_threat": "Suicidal threats are statements made to manipulate or control someone by making them feel responsible for the abuser’s well-being.",
62
  "guilt_tripping": "Guilt-tripping involves making someone feel guilty or responsible for things they didn’t do, often to manipulate their behavior.",
 
63
  "manipulation": "Manipulation refers to using deceptive tactics to control or influence someone’s emotions, decisions, or behavior to serve the manipulator’s own interests.",
64
- "non_abusive": "Non-abusive language is communication that is respectful, empathetic, and free of harmful behaviors or manipulation."
 
65
  }
66
 
67
- # Abuse level interpretation
 
 
 
 
 
 
 
68
  def interpret_abuse_level(score):
69
  if score > 80:
70
  return "Extreme / High Risk"
@@ -77,60 +87,56 @@ def interpret_abuse_level(score):
77
  else:
78
  return "Very Low / Likely Safe"
79
 
80
- # Main analysis
81
- def analyze_messages(input_text, context_flags):
82
  input_text = input_text.strip()
83
  if not input_text:
84
  return "Please enter a message for analysis."
85
 
86
- # Sentiment
87
  sentiment = sentiment_analyzer(input_text)[0]
88
  sentiment_label = sentiment['label']
89
  sentiment_score = sentiment['score']
90
 
91
- # Adjust thresholds if negative tone
92
  adjusted_thresholds = THRESHOLDS.copy()
93
- if sentiment_label.upper() == "NEGATIVE":
94
- adjusted_thresholds = {k: v * 0.8 for k, v in THRESHOLDS.items()}
95
 
96
- # Run model
97
  inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
98
  with torch.no_grad():
99
  outputs = model(**inputs)
100
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
101
 
102
- # Pattern & danger from model
103
  pattern_count = sum(score > adjusted_thresholds[label] for label, score in zip(PATTERN_LABELS, scores[:15]))
104
- danger_flag_count = sum(score > adjusted_thresholds[label] for label, score in zip(DANGER_LABELS, scores[15:]))
105
 
106
- # Add checkbox context flags
107
- if context_flags and len(context_flags) >= 2:
 
108
  danger_flag_count += 1
109
 
110
- # Override if non-abusive
 
 
111
  non_abusive_score = scores[LABELS.index('non_abusive')]
112
  if non_abusive_score > adjusted_thresholds['non_abusive']:
113
  return "This message is classified as non-abusive."
114
 
115
- # Abuse score
116
- triggered_scores = [score for label, score in zip(LABELS, scores) if score > adjusted_thresholds[label]]
117
- abuse_level = round(np.mean(triggered_scores) * 100, 2) if triggered_scores else 0.0
118
  abuse_description = interpret_abuse_level(abuse_level)
119
 
120
- # Top patterns
121
- scored_patterns = [(label, score) for label, score in zip(PATTERN_LABELS, scores[:15])]
122
- top_patterns = sorted(scored_patterns, key=lambda x: x[1], reverse=True)[:2]
123
- top_pattern_explanations = "\n".join(
124
- [f"• {label.replace('_', ' ').title()}: {EXPLANATIONS.get(label, 'No explanation available.')}" for label, _ in top_patterns]
125
- )
126
-
127
- # Resources
128
  if danger_flag_count >= 2:
129
  resources = "Immediate assistance recommended. Please seek professional help or contact emergency services."
130
  else:
131
  resources = "For more information on abuse patterns, consider reaching out to support groups or professional counselors."
132
 
133
- # Result
 
 
 
 
 
 
 
134
  result = (
135
  f"Abuse Risk Score: {abuse_level}% – {abuse_description}\n\n"
136
  f"Most Likely Patterns:\n{top_pattern_explanations}\n\n"
@@ -141,21 +147,30 @@ def analyze_messages(input_text, context_flags):
141
  f"Resources: {resources} \n\n"
142
  f"Sentiment: {sentiment_label} (Confidence: {sentiment_score*100:.2f}%)"
143
  )
 
 
 
 
 
 
144
  return result
145
 
146
- # Interface
147
  iface = gr.Interface(
148
  fn=analyze_messages,
149
  inputs=[
150
- gr.Textbox(lines=10, placeholder="Enter message here...", label="input_text"),
151
- gr.CheckboxGroup(
152
- ["They've threatened harm", "They isolate me", "I've changed my behavior out of fear",
153
- "They monitor/follow me", "I feel unsafe when alone with them"],
154
- label="Do any of these apply to your situation?",
155
- type="value"
156
- )
 
 
 
 
157
  ],
158
- outputs=gr.Textbox(label="Analysis Result"),
159
  title="Abuse Pattern Detector"
160
  )
161
 
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import RobertaForSequenceClassification, RobertaTokenizer
4
  import numpy as np
5
+ from transformers import pipeline
6
 
7
+ # Load sentiment analysis model
8
  sentiment_analyzer = pipeline("sentiment-analysis")
9
 
10
+ # Load model and tokenizer
11
  model_name = "SamanthaStorm/abuse-pattern-detector-v2"
12
  model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
13
  tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
14
 
15
+ # Define labels (18 total)
16
  LABELS = [
17
  "gaslighting", "mockery", "dismissiveness", "control",
18
  "guilt_tripping", "apology_baiting", "blame_shifting", "projection",
19
  "contradictory_statements", "manipulation", "deflection", "insults",
20
+ "obscure_formal", "recovery_phase", "non_abusive", "suicidal_threat", "physical_threat",
21
+ "extreme_control"
22
  ]
 
 
23
 
24
+ # Custom thresholds for each label
25
  THRESHOLDS = {
26
  "gaslighting": 0.25,
27
  "mockery": 0.15,
 
43
  "extreme_control": 0.36
44
  }
45
 
46
+ PATTERN_LABELS = LABELS[:15]
47
+ DANGER_LABELS = LABELS[15:18]
48
+
49
  EXPLANATIONS = {
50
  "gaslighting": "Gaslighting involves making someone question their own reality or perceptions, often causing them to feel confused or insecure.",
51
  "blame_shifting": "Blame-shifting is when one person redirects the responsibility for an issue onto someone else, avoiding accountability.",
 
61
  "physical_threat": "Physical threats involve any indication or direct mention of harm to someone’s physical well-being, often used to intimidate or control.",
62
  "suicidal_threat": "Suicidal threats are statements made to manipulate or control someone by making them feel responsible for the abuser’s well-being.",
63
  "guilt_tripping": "Guilt-tripping involves making someone feel guilty or responsible for things they didn’t do, often to manipulate their behavior.",
64
+ "emotional_manipulation": "Emotional manipulation is using guilt, fear, or emotional dependency to control another person’s thoughts, feelings, or actions.",
65
  "manipulation": "Manipulation refers to using deceptive tactics to control or influence someone’s emotions, decisions, or behavior to serve the manipulator’s own interests.",
66
+ "non_abusive": "Non-abusive language is communication that is respectful, empathetic, and free of harmful behaviors or manipulation.",
67
+ "obscure_formal": "Obscure or overly formal language used manipulatively to create confusion, avoid responsibility, or assert superiority."
68
  }
69
 
70
+
71
+ def calculate_abuse_level(scores, thresholds):
72
+ triggered_scores = [score for label, score in zip(LABELS, scores) if score > thresholds[label]]
73
+ if not triggered_scores:
74
+ return 0.0
75
+ return round(np.mean(triggered_scores) * 100, 2)
76
+
77
+
78
  def interpret_abuse_level(score):
79
  if score > 80:
80
  return "Extreme / High Risk"
 
87
  else:
88
  return "Very Low / Likely Safe"
89
 
90
+
91
+ def analyze_messages(input_text, risk_flags):
92
  input_text = input_text.strip()
93
  if not input_text:
94
  return "Please enter a message for analysis."
95
 
 
96
  sentiment = sentiment_analyzer(input_text)[0]
97
  sentiment_label = sentiment['label']
98
  sentiment_score = sentiment['score']
99
 
 
100
  adjusted_thresholds = THRESHOLDS.copy()
101
+ if sentiment_label == "NEGATIVE":
102
+ adjusted_thresholds = {key: val * 0.8 for key, val in THRESHOLDS.items()}
103
 
 
104
  inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
105
  with torch.no_grad():
106
  outputs = model(**inputs)
107
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
108
 
 
109
  pattern_count = sum(score > adjusted_thresholds[label] for label, score in zip(PATTERN_LABELS, scores[:15]))
110
+ danger_flag_count = sum(score > adjusted_thresholds[label] for label, score in zip(DANGER_LABELS, scores[15:18]))
111
 
112
+ contextual_flags = risk_flags if risk_flags else []
113
+ contextual_risk_score = len(contextual_flags)
114
+ if contextual_risk_score >= 2:
115
  danger_flag_count += 1
116
 
117
+ critical_flags = ["They've threatened harm", "They monitor/follow me", "I feel unsafe when alone with them"]
118
+ high_risk_context = any(flag in contextual_flags for flag in critical_flags)
119
+
120
  non_abusive_score = scores[LABELS.index('non_abusive')]
121
  if non_abusive_score > adjusted_thresholds['non_abusive']:
122
  return "This message is classified as non-abusive."
123
 
124
+ abuse_level = calculate_abuse_level(scores, THRESHOLDS)
 
 
125
  abuse_description = interpret_abuse_level(abuse_level)
126
 
 
 
 
 
 
 
 
 
127
  if danger_flag_count >= 2:
128
  resources = "Immediate assistance recommended. Please seek professional help or contact emergency services."
129
  else:
130
  resources = "For more information on abuse patterns, consider reaching out to support groups or professional counselors."
131
 
132
+ scored_patterns = [(label, score) for label, score in zip(PATTERN_LABELS, scores[:15])]
133
+ top_patterns = sorted(scored_patterns, key=lambda x: x[1], reverse=True)[:2]
134
+
135
+ top_pattern_explanations = "\n".join([
136
+ f"\u2022 {label.replace('_', ' ').title()}: {EXPLANATIONS.get(label, 'No explanation available.')}"
137
+ for label, _ in top_patterns
138
+ ])
139
+
140
  result = (
141
  f"Abuse Risk Score: {abuse_level}% – {abuse_description}\n\n"
142
  f"Most Likely Patterns:\n{top_pattern_explanations}\n\n"
 
147
  f"Resources: {resources} \n\n"
148
  f"Sentiment: {sentiment_label} (Confidence: {sentiment_score*100:.2f}%)"
149
  )
150
+
151
+ if contextual_flags:
152
+ result += "\n\n⚠️ You indicated the following:\n" + "\n".join([f"• {flag}" for flag in contextual_flags])
153
+ if high_risk_context:
154
+ result += "\n\n🚨 These responses suggest a high-risk situation. Consider seeking immediate help or safety planning resources."
155
+
156
  return result
157
 
158
+
159
  iface = gr.Interface(
160
  fn=analyze_messages,
161
  inputs=[
162
+ gr.Textbox(lines=10, placeholder="Enter message here..."),
163
+ gr.CheckboxGroup(label="Do any of these apply to your situation?", choices=[
164
+ "They've threatened harm",
165
+ "They isolate me",
166
+ "I’ve changed my behavior out of fear",
167
+ "They monitor/follow me",
168
+ "I feel unsafe when alone with them"
169
+ ])
170
+ ],
171
+ outputs=[
172
+ gr.Textbox(label="Analysis Result"),
173
  ],
 
174
  title="Abuse Pattern Detector"
175
  )
176