SamanthaStorm commited on
Commit
cb6b46c
Β·
verified Β·
1 Parent(s): eb7b135

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +199 -222
app.py CHANGED
@@ -6,275 +6,252 @@ from transformers import RobertaForSequenceClassification, RobertaTokenizer
6
  from motif_tagging import detect_motifs
7
  import re
8
 
9
- # --- Sentiment Model: T5-based Emotion Classifier ---
10
- sentiment_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")
11
- sentiment_model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-emotion")
 
12
 
13
  EMOTION_TO_SENTIMENT = {
14
- "joy": "supportive",
15
- "love": "supportive",
16
- "surprise": "supportive",
17
- "neutral": "supportive",
18
- "sadness": "undermining",
19
- "anger": "undermining",
20
- "fear": "undermining",
21
- "disgust": "undermining",
22
- "shame": "undermining",
23
- "guilt": "undermining"
24
  }
25
 
26
- # --- Abuse Detection Model ---
27
- model_name = "SamanthaStorm/autotrain-jlpi4-mllvp"
 
28
  model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
29
  tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
30
 
31
  LABELS = [
32
- "blame shifting", "contradictory statements", "control", "dismissiveness",
33
- "gaslighting", "guilt tripping", "insults", "obscure language",
34
- "projection", "recovery phase", "threat"
35
  ]
36
 
37
  THRESHOLDS = {
38
- "blame shifting": 0.3,
39
- "contradictory statements": 0.32,
40
- "control": 0.48,
41
- "dismissiveness": 0.45,
42
- "gaslighting": 0.30,
43
- "guilt tripping": 0.20,
44
- "insults": 0.34,
45
- "obscure language": 0.25,
46
- "projection": 0.35,
47
- "recovery phase": 0.25,
48
- "threat": 0.25
49
  }
50
 
51
  PATTERN_WEIGHTS = {
52
- "gaslighting": 1.3,
53
- "control": 1.2,
54
- "dismissiveness": 0.8,
55
- "blame shifting": 0.8,
56
- "contradictory statements": 0.75
57
  }
58
 
59
  EXPLANATIONS = {
60
- "blame shifting": "Blame-shifting is when one person redirects responsibility onto someone else to avoid accountability.",
61
- "contradictory statements": "Contradictory statements confuse the listener by flipping positions or denying previous claims.",
62
- "control": "Control restricts another person’s autonomy through coercion, manipulation, or threats.",
63
- "dismissiveness": "Dismissiveness is belittling or disregarding another person’s feelings, needs, or opinions.",
64
- "gaslighting": "Gaslighting involves making someone question their own reality, memory, or perceptions.",
65
- "guilt tripping": "Guilt-tripping uses guilt to manipulate someone’s actions or decisions.",
66
- "insults": "Insults are derogatory or demeaning remarks meant to shame, belittle, or hurt someone.",
67
- "obscure language": "Obscure language manipulates through complexity, vagueness, or superiority to confuse the other person.",
68
- "projection": "Projection accuses someone else of the very behaviors or intentions the speaker is exhibiting.",
69
- "recovery phase": "Recovery phase statements attempt to soothe or reset tension without acknowledging harm or change.",
70
- "threat": "Threats use fear of harm (physical, emotional, or relational) to control or intimidate someone."
71
  }
72
 
73
  RISK_SNIPPETS = {
74
- "low": (
75
- "🟒 Risk Level: Low",
76
- "The language patterns here do not strongly indicate abuse.",
77
- "Continue to check in with yourself and notice how you feel in response to repeated patterns."
78
- ),
79
- "moderate": (
80
- "⚠️ Risk Level: Moderate to High",
81
- "This language includes control, guilt, or reversal tactics.",
82
- "These patterns often lead to emotional confusion and reduced self-trust. Document these messages or talk with someone safe."
83
- ),
84
- "high": (
85
- "πŸ›‘ Risk Level: High",
86
- "Language includes threats or coercive control, which are strong indicators of escalation.",
87
- "Consider creating a safety plan or contacting a support line. Trust your sense of unease."
88
- )
89
  }
90
 
91
  def generate_risk_snippet(abuse_score, top_label):
92
- if abuse_score >= 85:
93
- risk_level = "high"
94
- elif abuse_score >= 60:
95
- risk_level = "moderate"
96
- else:
97
- risk_level = "low"
98
- title, summary, advice = RISK_SNIPPETS[risk_level]
99
- return f"\n\n{title}\n{summary} (Pattern: {top_label})\nπŸ’‘ {advice}"
 
 
100
 
101
- # --- DARVO Detection ---
102
  DARVO_PATTERNS = {
103
- "blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
104
  }
105
 
106
  DARVO_MOTIFS = [
107
- "i guess i’m the bad guy", "after everything i’ve done", "you always twist everything",
108
- "so now it’s all my fault", "i’m the villain", "i’m always wrong", "you never listen",
109
- "you’re attacking me", "i’m done trying", "i’m the only one who cares"
110
  ]
111
 
112
  def detect_contradiction(message):
113
- contradiction_flag = False
114
- contradiction_phrases = [
115
- (r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
116
- (r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
117
- (r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
118
- (r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE),
119
- (r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
120
- (r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE),
121
- ]
122
- for pattern, flags in contradiction_phrases:
123
- if re.search(pattern, message, flags):
124
- contradiction_flag = True
125
- break
126
- return contradiction_flag
127
 
128
  def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
129
- pattern_hits = len([p.lower() for p in patterns if p.lower() in DARVO_PATTERNS])
130
- pattern_score = pattern_hits / len(DARVO_PATTERNS)
131
- sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
132
- motif_hits = len([m.lower() for m in motifs_found if m.lower() in DARVO_MOTIFS])
133
- motif_score = motif_hits / len(DARVO_MOTIFS)
134
- contradiction_score = 1.0 if contradiction_flag else 0.0
135
- darvo_score = (
136
- 0.3 * pattern_score +
137
- 0.3 * sentiment_shift_score +
138
- 0.25 * motif_score +
139
- 0.15 * contradiction_score
140
- )
141
- return round(min(darvo_score, 1.0), 3)
142
 
143
- # --- Sentiment Mapping ---
144
  def custom_sentiment(text):
145
- input_ids = sentiment_tokenizer(f"emotion: {text}", return_tensors="pt").input_ids
146
- with torch.no_grad():
147
- outputs = sentiment_model.generate(input_ids)
148
- emotion = sentiment_tokenizer.decode(outputs[0], skip_special_tokens=True).strip().lower()
149
- sentiment = EMOTION_TO_SENTIMENT.get(emotion, "undermining")
150
- return {"label": sentiment, "emotion": emotion}
151
 
152
- # --- Abuse Analysis Core ---
153
  def calculate_abuse_level(scores, thresholds, motif_hits=None, flag_multiplier=1.0):
154
- weighted_scores = [score * PATTERN_WEIGHTS.get(label, 1.0)
155
- for label, score in zip(LABELS, scores) if score > thresholds[label]]
156
- base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
157
- base_score *= flag_multiplier
158
- return min(base_score, 100.0)
159
 
160
  def analyze_single_message(text, thresholds, motif_flags):
161
- motif_hits, matched_phrases = detect_motifs(text)
162
- sentiment = custom_sentiment(text)
163
- sentiment_score = 0.5 if sentiment["label"] == "undermining" else 0.0
164
- print(f"Detected emotion: {sentiment['emotion']} β†’ sentiment: {sentiment['label']}")
165
-
166
- adjusted_thresholds = {
167
- k: v * 0.8 for k, v in thresholds.items()
168
- } if sentiment["label"] == "undermining" else thresholds.copy()
169
-
170
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
171
- with torch.no_grad():
172
- outputs = model(**inputs)
173
- scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
174
-
175
- threshold_labels = [label for label, score in zip(LABELS, scores) if score > adjusted_thresholds[label]]
176
- phrase_labels = [label for label, _ in matched_phrases]
177
- pattern_labels_used = list(set(threshold_labels + phrase_labels))
178
-
179
- abuse_level = calculate_abuse_level(scores, adjusted_thresholds, motif_hits)
180
- top_patterns = sorted([(label, score) for label, score in zip(LABELS, scores)],
181
- key=lambda x: x[1], reverse=True)[:2]
182
-
183
- motif_phrases = [text for _, text in matched_phrases]
184
- contradiction_flag = detect_contradiction(text)
185
- darvo_score = calculate_darvo_score(pattern_labels_used, 0.0, sentiment_score, motif_phrases, contradiction_flag)
186
 
187
- return abuse_level, pattern_labels_used, top_patterns, darvo_score, sentiment
188
-
189
- # --- Composite Message Analysis ---
190
  def analyze_composite(msg1, msg2, msg3, flags):
191
- thresholds = THRESHOLDS.copy()
192
- messages = [msg1, msg2, msg3]
193
- active_messages = [m for m in messages if m.strip()]
194
- if not active_messages:
195
- return "Please enter at least one message."
196
-
197
- results = []
198
- sentiment_labels = []
199
- sentiment_score_total = 0.0
200
-
201
- for m in active_messages:
202
- result = analyze_single_message(m, thresholds, flags)
203
- print(f"Message: {m}")
204
- print(f"Sentiment result: {result[4]}")
205
- results.append(result)
206
- sentiment_labels.append(result[4]["label"])
207
- if result[4]["label"] == "undermining":
208
- sentiment_score_total += 0.5 # fixed value from `analyze_single_message`
209
-
210
- # Sentiment adjustment based on average and balance
211
- undermining_count = sentiment_labels.count("undermining")
212
- supportive_count = sentiment_labels.count("supportive")
213
-
214
- if undermining_count > supportive_count:
215
- thresholds = {k: v * 0.9 for k, v in thresholds.items()}
216
- elif undermining_count and supportive_count:
217
- thresholds = {k: v * 0.95 for k, v in thresholds.items()} # very subtle if mixed
218
- print("βš–οΈ Detected conflicting sentiment across messages.")
219
-
220
- # Abuse scoring
221
- abuse_scores = [r[0] for r in results]
222
- darvo_scores = [r[3] for r in results]
223
- average_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
224
- base_score = sum(abuse_scores) / len(abuse_scores)
225
-
226
- label_sets = [[label for label, _ in r[2]] for r in results]
227
- label_counts = {label: sum(label in s for s in label_sets) for label in set().union(*label_sets)}
228
- top_label = max(label_counts.items(), key=lambda x: x[1])
229
- top_explanation = EXPLANATIONS.get(top_label[0], "")
230
-
231
- flag_weights = {
232
- "They've threatened harm": 6,
233
- "They isolate me": 5,
234
- "I’ve changed my behavior out of fear": 4,
235
- "They monitor/follow me": 4,
236
- "I feel unsafe when alone with them": 6
237
- }
238
- flag_boost = sum(flag_weights.get(f, 3) for f in flags) / len(active_messages)
239
- composite_score = min(base_score + flag_boost, 100)
240
- if len(active_messages) == 1:
241
- composite_score *= 0.85
242
- elif len(active_messages) == 2:
243
- composite_score *= 0.93
244
- composite_score = round(min(composite_score, 100), 2)
245
-
246
- result = f"These messages show a pattern of **{top_label[0]}** and are estimated to be {composite_score}% likely abusive."
247
- if top_explanation:
248
- result += f"\nβ€’ {top_explanation}"
249
- if average_darvo > 0.25:
250
- darvo_descriptor = "moderate" if average_darvo < 0.65 else "high"
251
- result += f"\n\nDARVO Score: {average_darvo} β†’ This indicates a **{darvo_descriptor} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
252
- result += generate_risk_snippet(composite_score, top_label[0])
253
-
254
- if undermining_count and supportive_count:
255
- result += "\n\nβš–οΈ These messages contain **conflicting emotional tones** β€” this may indicate mixed signals, ambivalence, or a push-pull dynamic. Use caution interpreting any one message alone."
256
-
257
- return result
258
 
259
- # --- Gradio Interface ---
260
  textbox_inputs = [
261
- gr.Textbox(label="Message 1"),
262
- gr.Textbox(label="Message 2"),
263
- gr.Textbox(label="Message 3")
264
  ]
265
 
266
- checkboxes = gr.CheckboxGroup(label="Contextual Flags", choices=[
267
- "They've threatened harm", "They isolate me", "I’ve changed my behavior out of fear",
268
- "They monitor/follow me", "I feel unsafe when alone with them"
269
  ])
270
 
271
  iface = gr.Interface(
272
- fn=analyze_composite,
273
- inputs=textbox_inputs + [checkboxes],
274
- outputs=gr.Textbox(label="Results"),
275
- title="Abuse Pattern Detector (Multi-Message)",
276
- allow_flagging="manual"
277
  )
278
 
279
- if __name__ == "__main__":
280
- iface.launch()
 
6
  from motif_tagging import detect_motifs
7
  import re
8
 
9
+ β€” Sentiment Model: T5-based Emotion Classifier β€”
10
+
11
+ sentiment_tokenizer = AutoTokenizer.from_pretrained(β€œmrm8488/t5-base-finetuned-emotion”)
12
+ sentiment_model = AutoModelForSeq2SeqLM.from_pretrained(β€œmrm8488/t5-base-finetuned-emotion”)
13
 
14
  EMOTION_TO_SENTIMENT = {
15
+ β€œjoy”: β€œsupportive”,
16
+ β€œlove”: β€œsupportive”,
17
+ β€œsurprise”: β€œsupportive”,
18
+ β€œneutral”: β€œsupportive”,
19
+ β€œsadness”: β€œundermining”,
20
+ β€œanger”: β€œundermining”,
21
+ β€œfear”: β€œundermining”,
22
+ β€œdisgust”: β€œundermining”,
23
+ β€œshame”: β€œundermining”,
24
+ β€œguilt”: β€œundermining”
25
  }
26
 
27
+ β€” Abuse Detection Model β€”
28
+
29
+ model_name = β€œSamanthaStorm/autotrain-jlpi4-mllvp”
30
  model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
31
  tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
32
 
33
  LABELS = [
34
+ β€œblame shifting”, β€œcontradictory statements”, β€œcontrol”, β€œdismissiveness”,
35
+ β€œgaslighting”, β€œguilt tripping”, β€œinsults”, β€œobscure language”,
36
+ β€œprojection”, β€œrecovery phase”, β€œthreat”
37
  ]
38
 
39
  THRESHOLDS = {
40
+ β€œblame shifting”: 0.3,
41
+ β€œcontradictory statements”: 0.32,
42
+ β€œcontrol”: 0.48,
43
+ β€œdismissiveness”: 0.45,
44
+ β€œgaslighting”: 0.30,
45
+ β€œguilt tripping”: 0.20,
46
+ β€œinsults”: 0.34,
47
+ β€œobscure language”: 0.25,
48
+ β€œprojection”: 0.35,
49
+ β€œrecovery phase”: 0.25,
50
+ β€œthreat”: 0.25
51
  }
52
 
53
  PATTERN_WEIGHTS = {
54
+ β€œgaslighting”: 1.3,
55
+ β€œcontrol”: 1.2,
56
+ β€œdismissiveness”: 0.8,
57
+ β€œblame shifting”: 0.8,
58
+ β€œcontradictory statements”: 0.75
59
  }
60
 
61
  EXPLANATIONS = {
62
+ β€œblame shifting”: β€œBlame-shifting is when one person redirects responsibility onto someone else to avoid accountability.”,
63
+ β€œcontradictory statements”: β€œContradictory statements confuse the listener by flipping positions or denying previous claims.”,
64
+ β€œcontrol”: β€œControl restricts another person’s autonomy through coercion, manipulation, or threats.”,
65
+ β€œdismissiveness”: β€œDismissiveness is belittling or disregarding another person’s feelings, needs, or opinions.”,
66
+ β€œgaslighting”: β€œGaslighting involves making someone question their own reality, memory, or perceptions.”,
67
+ β€œguilt tripping”: β€œGuilt-tripping uses guilt to manipulate someone’s actions or decisions.”,
68
+ β€œinsults”: β€œInsults are derogatory or demeaning remarks meant to shame, belittle, or hurt someone.”,
69
+ β€œobscure language”: β€œObscure language manipulates through complexity, vagueness, or superiority to confuse the other person.”,
70
+ β€œprojection”: β€œProjection accuses someone else of the very behaviors or intentions the speaker is exhibiting.”,
71
+ β€œrecovery phase”: β€œRecovery phase statements attempt to soothe or reset tension without acknowledging harm or change.”,
72
+ β€œthreat”: β€œThreats use fear of harm (physical, emotional, or relational) to control or intimidate someone.”
73
  }
74
 
75
  RISK_SNIPPETS = {
76
+ β€œlow”: (
77
+ β€œπŸŸ’ Risk Level: Low”,
78
+ β€œThe language patterns here do not strongly indicate abuse.”,
79
+ β€œContinue to check in with yourself and notice how you feel in response to repeated patterns.”
80
+ ),
81
+ β€œmoderate”: (
82
+ β€œβš οΈ Risk Level: Moderate to High”,
83
+ β€œThis language includes control, guilt, or reversal tactics.”,
84
+ β€œThese patterns often lead to emotional confusion and reduced self-trust. Document these messages or talk with someone safe.”
85
+ ),
86
+ β€œhigh”: (
87
+ β€œπŸ›‘ Risk Level: High”,
88
+ β€œLanguage includes threats or coercive control, which are strong indicators of escalation.”,
89
+ β€œConsider creating a safety plan or contacting a support line. Trust your sense of unease.”
90
+ )
91
  }
92
 
93
  def generate_risk_snippet(abuse_score, top_label):
94
+ if abuse_score >= 85:
95
+ risk_level = β€œhigh”
96
+ elif abuse_score >= 60:
97
+ risk_level = β€œmoderate”
98
+ else:
99
+ risk_level = β€œlow”
100
+ title, summary, advice = RISK_SNIPPETS[risk_level]
101
+ return f”\n\n{title}\n{summary} (Pattern: {top_label})\nπŸ’‘ {advice}”
102
+
103
+ β€” DARVO Detection β€”
104
 
 
105
  DARVO_PATTERNS = {
106
+ β€œblame shifting”, β€œprojection”, β€œdismissiveness”, β€œguilt tripping”, β€œcontradictory statements”
107
  }
108
 
109
  DARVO_MOTIFS = [
110
+ β€œi guess i’m the bad guy”, β€œafter everything i’ve done”, β€œyou always twist everything”,
111
+ β€œso now it’s all my fault”, β€œi’m the villain”, β€œi’m always wrong”, β€œyou never listen”,
112
+ β€œyou’re attacking me”, β€œi’m done trying”, β€œi’m the only one who cares”
113
  ]
114
 
115
  def detect_contradiction(message):
116
+ contradiction_phrases = [
117
+ (r”\b(i love you).{0,15}(i hate you|you ruin everything)”, re.IGNORECASE),
118
+ (r”\b(i’m sorry).{0,15}(but you|if you hadn’t)”, re.IGNORECASE),
119
+ (r”\b(i’m trying).{0,15}(you never|why do you)”, re.IGNORECASE),
120
+ (r”\b(do what you want).{0,15}(you’ll regret it|i always give everything)”, re.IGNORECASE),
121
+ (r”\b(i don’t care).{0,15}(you never think of me)”, re.IGNORECASE),
122
+ (r”\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)”, re.IGNORECASE),
123
+ ]
124
+ return any(re.search(pattern, message, flags) for pattern, flags in contradiction_phrases)
 
 
 
 
 
125
 
126
  def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
127
+ pattern_hits = len([p.lower() for p in patterns if p.lower() in DARVO_PATTERNS])
128
+ pattern_score = pattern_hits / len(DARVO_PATTERNS)
129
+ sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
130
+ motif_hits = len([m.lower() for m in motifs_found if m.lower() in DARVO_MOTIFS])
131
+ motif_score = motif_hits / len(DARVO_MOTIFS)
132
+ contradiction_score = 1.0 if contradiction_flag else 0.0
133
+ darvo_score = (
134
+ 0.3 * pattern_score +
135
+ 0.3 * sentiment_shift_score +
136
+ 0.25 * motif_score +
137
+ 0.15 * contradiction_score
138
+ )
139
+ return round(min(darvo_score, 1.0), 3)
140
 
 
141
  def custom_sentiment(text):
142
+ input_ids = sentiment_tokenizer(f”emotion: {text}”, return_tensors=β€œpt”).input_ids
143
+ with torch.no_grad():
144
+ outputs = sentiment_model.generate(input_ids)
145
+ emotion = sentiment_tokenizer.decode(outputs[0], skip_special_tokens=True).strip().lower()
146
+ sentiment = EMOTION_TO_SENTIMENT.get(emotion, β€œundermining”)
147
+ return {β€œlabel”: sentiment, β€œemotion”: emotion}
148
 
 
149
  def calculate_abuse_level(scores, thresholds, motif_hits=None, flag_multiplier=1.0):
150
+ weighted_scores = [score * PATTERN_WEIGHTS.get(label, 1.0) for label, score in zip(LABELS, scores) if score > thresholds[label]]
151
+ base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
152
+ base_score *= flag_multiplier
153
+ return min(base_score, 100.0)
 
154
 
155
  def analyze_single_message(text, thresholds, motif_flags):
156
+ motif_hits, matched_phrases = detect_motifs(text)
157
+ sentiment = custom_sentiment(text)
158
+ sentiment_score = 0.5 if sentiment[β€œlabel”] == β€œundermining” else 0.0
159
+ print(f”Detected emotion: {sentiment[β€˜emotion’]} β†’ sentiment: {sentiment[β€˜label’]}”)
160
+
161
+ adjusted_thresholds = {k: v * 0.8 for k, v in thresholds.items()} if sentiment["label"] == "undermining" else thresholds.copy()
162
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
163
+ with torch.no_grad():
164
+ outputs = model(**inputs)
165
+ scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
166
+ threshold_labels = [label for label, score in zip(LABELS, scores) if score > adjusted_thresholds[label]]
167
+ phrase_labels = [label for label, _ in matched_phrases]
168
+ pattern_labels_used = list(set(threshold_labels + phrase_labels))
169
+ abuse_level = calculate_abuse_level(scores, adjusted_thresholds, motif_hits)
170
+ top_patterns = sorted([(label, score) for label, score in zip(LABELS, scores)], key=lambda x: x[1], reverse=True)[:2]
171
+ motif_phrases = [text for _, text in matched_phrases]
172
+ contradiction_flag = detect_contradiction(text)
173
+ darvo_score = calculate_darvo_score(pattern_labels_used, 0.0, sentiment_score, motif_phrases, contradiction_flag)
174
+ return abuse_level, pattern_labels_used, top_patterns, darvo_score, sentiment
 
 
 
 
 
 
175
 
 
 
 
176
  def analyze_composite(msg1, msg2, msg3, flags):
177
+ thresholds = THRESHOLDS.copy()
178
+ messages = [msg1, msg2, msg3]
179
+ active_messages = [m for m in messages if m.strip()]
180
+ if not active_messages:
181
+ return β€œPlease enter at least one message.”
182
+
183
+ results = []
184
+ sentiment_labels = []
185
+ sentiment_score_total = 0.0
186
+ for m in active_messages:
187
+ result = analyze_single_message(m, thresholds, flags)
188
+ print(f"Message: {m}")
189
+ print(f"Sentiment result: {result[4]}")
190
+ results.append(result)
191
+ sentiment_labels.append(result[4]["label"])
192
+ if result[4]["label"] == "undermining":
193
+ sentiment_score_total += 0.5
194
+
195
+ undermining_count = sentiment_labels.count("undermining")
196
+ supportive_count = sentiment_labels.count("supportive")
197
+ if undermining_count > supportive_count:
198
+ thresholds = {k: v * 0.9 for k, v in thresholds.items()}
199
+ elif undermining_count and supportive_count:
200
+ thresholds = {k: v * 0.95 for k, v in thresholds.items()}
201
+ print("βš–οΈ Detected conflicting sentiment across messages.")
202
+
203
+ abuse_scores = [r[0] for r in results]
204
+ darvo_scores = [r[3] for r in results]
205
+ average_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
206
+ base_score = sum(abuse_scores) / len(abuse_scores)
207
+ label_sets = [[label for label, _ in r[2]] for r in results]
208
+ label_counts = {label: sum(label in s for s in label_sets) for label in set().union(*label_sets)}
209
+ top_label = max(label_counts.items(), key=lambda x: x[1])
210
+ top_explanation = EXPLANATIONS.get(top_label[0], "")
211
+ flag_weights = {
212
+ "They've threatened harm": 6,
213
+ "They isolate me": 5,
214
+ "I’ve changed my behavior out of fear": 4,
215
+ "They monitor/follow me": 4,
216
+ "I feel unsafe when alone with them": 6
217
+ }
218
+ flag_boost = sum(flag_weights.get(f, 3) for f in flags) / len(active_messages)
219
+ composite_score = min(base_score + flag_boost, 100)
220
+ if len(active_messages) == 1:
221
+ composite_score *= 0.85
222
+ elif len(active_messages) == 2:
223
+ composite_score *= 0.93
224
+ composite_score = round(min(composite_score, 100), 2)
225
+
226
+ result = f"These messages show a pattern of **{top_label[0]}** and are estimated to be {composite_score}% likely abusive."
227
+ if top_explanation:
228
+ result += f"\nβ€’ {top_explanation}"
229
+ if average_darvo > 0.25:
230
+ darvo_descriptor = "moderate" if average_darvo < 0.65 else "high"
231
+ result += f"\n\nDARVO Score: {average_darvo} β†’ This indicates a **{darvo_descriptor} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
232
+ result += generate_risk_snippet(composite_score, top_label[0])
233
+ if undermining_count and supportive_count:
234
+ result += "\n\nβš–οΈ These messages contain **conflicting emotional tones** β€” this may indicate mixed signals, ambivalence, or a push-pull dynamic. Use caution interpreting any one message alone."
235
+ return result
 
 
 
 
 
 
 
 
236
 
 
237
  textbox_inputs = [
238
+ gr.Textbox(label=β€œMessage 1”),
239
+ gr.Textbox(label=β€œMessage 2”),
240
+ gr.Textbox(label=β€œMessage 3”)
241
  ]
242
 
243
+ checkboxes = gr.CheckboxGroup(label=β€œContextual Flags”, choices=[
244
+ β€œThey’ve threatened harm”, β€œThey isolate me”, β€œI’ve changed my behavior out of fear”,
245
+ β€œThey monitor/follow me”, β€œI feel unsafe when alone with them”
246
  ])
247
 
248
  iface = gr.Interface(
249
+ fn=analyze_composite,
250
+ inputs=textbox_inputs + [checkboxes],
251
+ outputs=gr.Textbox(label=β€œResults”),
252
+ title=β€œAbuse Pattern Detector (Multi-Message)”,
253
+ allow_flagging=β€œmanual”
254
  )
255
 
256
+ if name == β€œmain”:
257
+ iface.launch()