SamanthaStorm commited on
Commit
1dbc865
Β·
verified Β·
1 Parent(s): cb6b46c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +207 -195
app.py CHANGED
@@ -6,252 +6,264 @@ from transformers import RobertaForSequenceClassification, RobertaTokenizer
6
  from motif_tagging import detect_motifs
7
  import re
8
 
9
- β€” Sentiment Model: T5-based Emotion Classifier β€”
10
-
11
- sentiment_tokenizer = AutoTokenizer.from_pretrained(β€œmrm8488/t5-base-finetuned-emotion”)
12
- sentiment_model = AutoModelForSeq2SeqLM.from_pretrained(β€œmrm8488/t5-base-finetuned-emotion”)
13
 
14
  EMOTION_TO_SENTIMENT = {
15
- β€œjoy”: β€œsupportive”,
16
- β€œlove”: β€œsupportive”,
17
- β€œsurprise”: β€œsupportive”,
18
- β€œneutral”: β€œsupportive”,
19
- β€œsadness”: β€œundermining”,
20
- β€œanger”: β€œundermining”,
21
- β€œfear”: β€œundermining”,
22
- β€œdisgust”: β€œundermining”,
23
- β€œshame”: β€œundermining”,
24
- β€œguilt”: β€œundermining”
25
  }
26
 
27
- β€” Abuse Detection Model β€”
28
-
29
- model_name = β€œSamanthaStorm/autotrain-jlpi4-mllvp”
30
  model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
31
  tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
32
 
33
  LABELS = [
34
- β€œblame shifting”, β€œcontradictory statements”, β€œcontrol”, β€œdismissiveness”,
35
- β€œgaslighting”, β€œguilt tripping”, β€œinsults”, β€œobscure language”,
36
- β€œprojection”, β€œrecovery phase”, β€œthreat”
37
  ]
38
 
39
  THRESHOLDS = {
40
- β€œblame shifting”: 0.3,
41
- β€œcontradictory statements”: 0.32,
42
- β€œcontrol”: 0.48,
43
- β€œdismissiveness”: 0.45,
44
- β€œgaslighting”: 0.30,
45
- β€œguilt tripping”: 0.20,
46
- β€œinsults”: 0.34,
47
- β€œobscure language”: 0.25,
48
- β€œprojection”: 0.35,
49
- β€œrecovery phase”: 0.25,
50
- β€œthreat”: 0.25
51
  }
52
 
53
  PATTERN_WEIGHTS = {
54
- β€œgaslighting”: 1.3,
55
- β€œcontrol”: 1.2,
56
- β€œdismissiveness”: 0.8,
57
- β€œblame shifting”: 0.8,
58
- β€œcontradictory statements”: 0.75
59
  }
60
 
61
  EXPLANATIONS = {
62
- β€œblame shifting”: β€œBlame-shifting is when one person redirects responsibility onto someone else to avoid accountability.”,
63
- β€œcontradictory statements”: β€œContradictory statements confuse the listener by flipping positions or denying previous claims.”,
64
- β€œcontrol”: β€œControl restricts another person’s autonomy through coercion, manipulation, or threats.”,
65
- β€œdismissiveness”: β€œDismissiveness is belittling or disregarding another person’s feelings, needs, or opinions.”,
66
- β€œgaslighting”: β€œGaslighting involves making someone question their own reality, memory, or perceptions.”,
67
- β€œguilt tripping”: β€œGuilt-tripping uses guilt to manipulate someone’s actions or decisions.”,
68
- β€œinsults”: β€œInsults are derogatory or demeaning remarks meant to shame, belittle, or hurt someone.”,
69
- β€œobscure language”: β€œObscure language manipulates through complexity, vagueness, or superiority to confuse the other person.”,
70
- β€œprojection”: β€œProjection accuses someone else of the very behaviors or intentions the speaker is exhibiting.”,
71
- β€œrecovery phase”: β€œRecovery phase statements attempt to soothe or reset tension without acknowledging harm or change.”,
72
- β€œthreat”: β€œThreats use fear of harm (physical, emotional, or relational) to control or intimidate someone.”
73
  }
74
 
75
  RISK_SNIPPETS = {
76
- β€œlow”: (
77
- β€œπŸŸ’ Risk Level: Low”,
78
- β€œThe language patterns here do not strongly indicate abuse.”,
79
- β€œContinue to check in with yourself and notice how you feel in response to repeated patterns.”
80
- ),
81
- β€œmoderate”: (
82
- β€œβš οΈ Risk Level: Moderate to High”,
83
- β€œThis language includes control, guilt, or reversal tactics.”,
84
- β€œThese patterns often lead to emotional confusion and reduced self-trust. Document these messages or talk with someone safe.”
85
- ),
86
- β€œhigh”: (
87
- β€œπŸ›‘ Risk Level: High”,
88
- β€œLanguage includes threats or coercive control, which are strong indicators of escalation.”,
89
- β€œConsider creating a safety plan or contacting a support line. Trust your sense of unease.”
90
- )
91
  }
92
 
93
  def generate_risk_snippet(abuse_score, top_label):
94
- if abuse_score >= 85:
95
- risk_level = β€œhigh”
96
- elif abuse_score >= 60:
97
- risk_level = β€œmoderate”
98
- else:
99
- risk_level = β€œlow”
100
- title, summary, advice = RISK_SNIPPETS[risk_level]
101
- return f”\n\n{title}\n{summary} (Pattern: {top_label})\nπŸ’‘ {advice}”
102
-
103
- β€” DARVO Detection β€”
104
 
 
105
  DARVO_PATTERNS = {
106
- β€œblame shifting”, β€œprojection”, β€œdismissiveness”, β€œguilt tripping”, β€œcontradictory statements”
107
  }
108
 
109
  DARVO_MOTIFS = [
110
- β€œi guess i’m the bad guy”, β€œafter everything i’ve done”, β€œyou always twist everything”,
111
- β€œso now it’s all my fault”, β€œi’m the villain”, β€œi’m always wrong”, β€œyou never listen”,
112
- β€œyou’re attacking me”, β€œi’m done trying”, β€œi’m the only one who cares”
113
  ]
114
 
115
  def detect_contradiction(message):
116
- contradiction_phrases = [
117
- (r”\b(i love you).{0,15}(i hate you|you ruin everything)”, re.IGNORECASE),
118
- (r”\b(i’m sorry).{0,15}(but you|if you hadn’t)”, re.IGNORECASE),
119
- (r”\b(i’m trying).{0,15}(you never|why do you)”, re.IGNORECASE),
120
- (r”\b(do what you want).{0,15}(you’ll regret it|i always give everything)”, re.IGNORECASE),
121
- (r”\b(i don’t care).{0,15}(you never think of me)”, re.IGNORECASE),
122
- (r”\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)”, re.IGNORECASE),
123
- ]
124
- return any(re.search(pattern, message, flags) for pattern, flags in contradiction_phrases)
 
 
 
 
 
125
 
126
  def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
127
- pattern_hits = len([p.lower() for p in patterns if p.lower() in DARVO_PATTERNS])
128
- pattern_score = pattern_hits / len(DARVO_PATTERNS)
129
- sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
130
- motif_hits = len([m.lower() for m in motifs_found if m.lower() in DARVO_MOTIFS])
131
- motif_score = motif_hits / len(DARVO_MOTIFS)
132
- contradiction_score = 1.0 if contradiction_flag else 0.0
133
- darvo_score = (
134
- 0.3 * pattern_score +
135
- 0.3 * sentiment_shift_score +
136
- 0.25 * motif_score +
137
- 0.15 * contradiction_score
138
- )
139
- return round(min(darvo_score, 1.0), 3)
140
 
 
141
  def custom_sentiment(text):
142
- input_ids = sentiment_tokenizer(f”emotion: {text}”, return_tensors=β€œpt”).input_ids
143
- with torch.no_grad():
144
- outputs = sentiment_model.generate(input_ids)
145
- emotion = sentiment_tokenizer.decode(outputs[0], skip_special_tokens=True).strip().lower()
146
- sentiment = EMOTION_TO_SENTIMENT.get(emotion, β€œundermining”)
147
- return {β€œlabel”: sentiment, β€œemotion”: emotion}
148
 
 
149
  def calculate_abuse_level(scores, thresholds, motif_hits=None, flag_multiplier=1.0):
150
- weighted_scores = [score * PATTERN_WEIGHTS.get(label, 1.0) for label, score in zip(LABELS, scores) if score > thresholds[label]]
151
- base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
152
- base_score *= flag_multiplier
153
- return min(base_score, 100.0)
 
154
 
 
155
  def analyze_single_message(text, thresholds, motif_flags):
156
- motif_hits, matched_phrases = detect_motifs(text)
157
- sentiment = custom_sentiment(text)
158
- sentiment_score = 0.5 if sentiment[β€œlabel”] == β€œundermining” else 0.0
159
- print(f”Detected emotion: {sentiment[β€˜emotion’]} β†’ sentiment: {sentiment[β€˜label’]}”)
160
-
161
- adjusted_thresholds = {k: v * 0.8 for k, v in thresholds.items()} if sentiment["label"] == "undermining" else thresholds.copy()
162
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
163
- with torch.no_grad():
164
- outputs = model(**inputs)
165
- scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
166
- threshold_labels = [label for label, score in zip(LABELS, scores) if score > adjusted_thresholds[label]]
167
- phrase_labels = [label for label, _ in matched_phrases]
168
- pattern_labels_used = list(set(threshold_labels + phrase_labels))
169
- abuse_level = calculate_abuse_level(scores, adjusted_thresholds, motif_hits)
170
- top_patterns = sorted([(label, score) for label, score in zip(LABELS, scores)], key=lambda x: x[1], reverse=True)[:2]
171
- motif_phrases = [text for _, text in matched_phrases]
172
- contradiction_flag = detect_contradiction(text)
173
- darvo_score = calculate_darvo_score(pattern_labels_used, 0.0, sentiment_score, motif_phrases, contradiction_flag)
174
- return abuse_level, pattern_labels_used, top_patterns, darvo_score, sentiment
175
 
 
176
  def analyze_composite(msg1, msg2, msg3, flags):
177
- thresholds = THRESHOLDS.copy()
178
- messages = [msg1, msg2, msg3]
179
- active_messages = [m for m in messages if m.strip()]
180
- if not active_messages:
181
- return β€œPlease enter at least one message.”
182
 
183
- results = []
184
- sentiment_labels = []
185
- sentiment_score_total = 0.0
186
- for m in active_messages:
187
- result = analyze_single_message(m, thresholds, flags)
188
- print(f"Message: {m}")
189
- print(f"Sentiment result: {result[4]}")
190
- results.append(result)
191
- sentiment_labels.append(result[4]["label"])
192
- if result[4]["label"] == "undermining":
193
- sentiment_score_total += 0.5
194
 
195
- undermining_count = sentiment_labels.count("undermining")
196
- supportive_count = sentiment_labels.count("supportive")
197
- if undermining_count > supportive_count:
198
- thresholds = {k: v * 0.9 for k, v in thresholds.items()}
199
- elif undermining_count and supportive_count:
200
- thresholds = {k: v * 0.95 for k, v in thresholds.items()}
201
- print("βš–οΈ Detected conflicting sentiment across messages.")
202
 
203
- abuse_scores = [r[0] for r in results]
204
- darvo_scores = [r[3] for r in results]
205
- average_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
206
- base_score = sum(abuse_scores) / len(abuse_scores)
207
- label_sets = [[label for label, _ in r[2]] for r in results]
208
- label_counts = {label: sum(label in s for s in label_sets) for label in set().union(*label_sets)}
209
- top_label = max(label_counts.items(), key=lambda x: x[1])
210
- top_explanation = EXPLANATIONS.get(top_label[0], "")
211
- flag_weights = {
212
- "They've threatened harm": 6,
213
- "They isolate me": 5,
214
- "I’ve changed my behavior out of fear": 4,
215
- "They monitor/follow me": 4,
216
- "I feel unsafe when alone with them": 6
217
- }
218
- flag_boost = sum(flag_weights.get(f, 3) for f in flags) / len(active_messages)
219
- composite_score = min(base_score + flag_boost, 100)
220
- if len(active_messages) == 1:
221
- composite_score *= 0.85
222
- elif len(active_messages) == 2:
223
- composite_score *= 0.93
224
- composite_score = round(min(composite_score, 100), 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
- result = f"These messages show a pattern of **{top_label[0]}** and are estimated to be {composite_score}% likely abusive."
227
- if top_explanation:
228
- result += f"\nβ€’ {top_explanation}"
229
- if average_darvo > 0.25:
230
- darvo_descriptor = "moderate" if average_darvo < 0.65 else "high"
231
- result += f"\n\nDARVO Score: {average_darvo} β†’ This indicates a **{darvo_descriptor} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
232
- result += generate_risk_snippet(composite_score, top_label[0])
233
- if undermining_count and supportive_count:
234
- result += "\n\nβš–οΈ These messages contain **conflicting emotional tones** β€” this may indicate mixed signals, ambivalence, or a push-pull dynamic. Use caution interpreting any one message alone."
235
- return result
236
 
 
237
  textbox_inputs = [
238
- gr.Textbox(label=β€œMessage 1”),
239
- gr.Textbox(label=β€œMessage 2”),
240
- gr.Textbox(label=β€œMessage 3”)
241
  ]
242
 
243
- checkboxes = gr.CheckboxGroup(label=β€œContextual Flags”, choices=[
244
- β€œThey’ve threatened harm”, β€œThey isolate me”, β€œI’ve changed my behavior out of fear”,
245
- β€œThey monitor/follow me”, β€œI feel unsafe when alone with them”
246
  ])
247
 
248
  iface = gr.Interface(
249
- fn=analyze_composite,
250
- inputs=textbox_inputs + [checkboxes],
251
- outputs=gr.Textbox(label=β€œResults”),
252
- title=β€œAbuse Pattern Detector (Multi-Message)”,
253
- allow_flagging=β€œmanual”
254
  )
255
 
256
- if name == β€œmain”:
257
- iface.launch()
 
6
  from motif_tagging import detect_motifs
7
  import re
8
 
9
+ # --- Sentiment Model: T5-based Emotion Classifier ---
10
+ sentiment_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")
11
+ sentiment_model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-emotion")
 
12
 
13
  EMOTION_TO_SENTIMENT = {
14
+ "joy": "supportive",
15
+ "love": "supportive",
16
+ "surprise": "supportive",
17
+ "neutral": "supportive",
18
+ "sadness": "undermining",
19
+ "anger": "undermining",
20
+ "fear": "undermining",
21
+ "disgust": "undermining",
22
+ "shame": "undermining",
23
+ "guilt": "undermining"
24
  }
25
 
26
+ # --- Abuse Detection Model ---
27
+ model_name = "SamanthaStorm/autotrain-jlpi4-mllvp"
 
28
  model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
29
  tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
30
 
31
  LABELS = [
32
+ "blame shifting", "contradictory statements", "control", "dismissiveness",
33
+ "gaslighting", "guilt tripping", "insults", "obscure language",
34
+ "projection", "recovery phase", "threat"
35
  ]
36
 
37
  THRESHOLDS = {
38
+ "blame shifting": 0.3,
39
+ "contradictory statements": 0.32,
40
+ "control": 0.48,
41
+ "dismissiveness": 0.45,
42
+ "gaslighting": 0.30,
43
+ "guilt tripping": 0.20,
44
+ "insults": 0.34,
45
+ "obscure language": 0.25,
46
+ "projection": 0.35,
47
+ "recovery phase": 0.25,
48
+ "threat": 0.25
49
  }
50
 
51
  PATTERN_WEIGHTS = {
52
+ "gaslighting": 1.3,
53
+ "control": 1.2,
54
+ "dismissiveness": 0.8,
55
+ "blame shifting": 0.8,
56
+ "contradictory statements": 0.75
57
  }
58
 
59
  EXPLANATIONS = {
60
+ "blame shifting": "Blame-shifting is when one person redirects responsibility onto someone else to avoid accountability.",
61
+ "contradictory statements": "Contradictory statements confuse the listener by flipping positions or denying previous claims.",
62
+ "control": "Control restricts another person’s autonomy through coercion, manipulation, or threats.",
63
+ "dismissiveness": "Dismissiveness is belittling or disregarding another person’s feelings, needs, or opinions.",
64
+ "gaslighting": "Gaslighting involves making someone question their own reality, memory, or perceptions.",
65
+ "guilt tripping": "Guilt-tripping uses guilt to manipulate someone’s actions or decisions.",
66
+ "insults": "Insults are derogatory or demeaning remarks meant to shame, belittle, or hurt someone.",
67
+ "obscure language": "Obscure language manipulates through complexity, vagueness, or superiority to confuse the other person.",
68
+ "projection": "Projection accuses someone else of the very behaviors or intentions the speaker is exhibiting.",
69
+ "recovery phase": "Recovery phase statements attempt to soothe or reset tension without acknowledging harm or change.",
70
+ "threat": "Threats use fear of harm (physical, emotional, or relational) to control or intimidate someone."
71
  }
72
 
73
  RISK_SNIPPETS = {
74
+ "low": (
75
+ "🟒 Risk Level: Low",
76
+ "The language patterns here do not strongly indicate abuse.",
77
+ "Continue to check in with yourself and notice how you feel in response to repeated patterns."
78
+ ),
79
+ "moderate": (
80
+ "⚠️ Risk Level: Moderate to High",
81
+ "This language includes control, guilt, or reversal tactics.",
82
+ "These patterns often lead to emotional confusion and reduced self-trust. Document these messages or talk with someone safe."
83
+ ),
84
+ "high": (
85
+ "πŸ›‘ Risk Level: High",
86
+ "Language includes threats or coercive control, which are strong indicators of escalation.",
87
+ "Consider creating a safety plan or contacting a support line. Trust your sense of unease."
88
+ )
89
  }
90
 
91
  def generate_risk_snippet(abuse_score, top_label):
92
+ if abuse_score >= 85:
93
+ risk_level = "high"
94
+ elif abuse_score >= 60:
95
+ risk_level = "moderate"
96
+ else:
97
+ risk_level = "low"
98
+ title, summary, advice = RISK_SNIPPETS[risk_level]
99
+ return f"\n\n{title}\n{summary} (Pattern: **{top_label}**)\nπŸ’‘ {advice}"
 
 
100
 
101
+ # --- DARVO Detection ---
102
  DARVO_PATTERNS = {
103
+ "blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
104
  }
105
 
106
  DARVO_MOTIFS = [
107
+ "i guess i’m the bad guy", "after everything i’ve done", "you always twist everything",
108
+ "so now it’s all my fault", "i’m the villain", "i’m always wrong", "you never listen",
109
+ "you’re attacking me", "i’m done trying", "i’m the only one who cares"
110
  ]
111
 
112
  def detect_contradiction(message):
113
+ contradiction_flag = False
114
+ contradiction_phrases = [
115
+ (r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
116
+ (r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
117
+ (r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
118
+ (r"\b(do what you want).{0,15}(you’ll regret it|i always give everything)", re.IGNORECASE),
119
+ (r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
120
+ (r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE),
121
+ ]
122
+ for pattern, flags in contradiction_phrases:
123
+ if re.search(pattern, message, flags):
124
+ contradiction_flag = True
125
+ break
126
+ return contradiction_flag
127
 
128
  def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
129
+ pattern_hits = len([p.lower() for p in patterns if p.lower() in DARVO_PATTERNS])
130
+ pattern_score = pattern_hits / len(DARVO_PATTERNS)
131
+ sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
132
+ motif_hits = len([m.lower() for m in motifs_found if m.lower() in DARVO_MOTIFS])
133
+ motif_score = motif_hits / len(DARVO_MOTIFS)
134
+ contradiction_score = 1.0 if contradiction_flag else 0.0
135
+ darvo_score = (
136
+ 0.3 * pattern_score +
137
+ 0.3 * sentiment_shift_score +
138
+ 0.25 * motif_score +
139
+ 0.15 * contradiction_score
140
+ )
141
+ return round(min(darvo_score, 1.0), 3)
142
 
143
+ # --- Sentiment Function ---
144
  def custom_sentiment(text):
145
+ input_ids = sentiment_tokenizer(f"emotion: {text}", return_tensors="pt").input_ids
146
+ with torch.no_grad():
147
+ outputs = sentiment_model.generate(input_ids)
148
+ emotion = sentiment_tokenizer.decode(outputs[0], skip_special_tokens=True).strip().lower()
149
+ sentiment = EMOTION_TO_SENTIMENT.get(emotion, "undermining")
150
+ return {"label": sentiment, "emotion": emotion}
151
 
152
+ # --- Abuse Scoring ---
153
  def calculate_abuse_level(scores, thresholds, motif_hits=None, flag_multiplier=1.0):
154
+ weighted_scores = [score * PATTERN_WEIGHTS.get(label, 1.0)
155
+ for label, score in zip(LABELS, scores) if score > thresholds[label]]
156
+ base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
157
+ base_score *= flag_multiplier
158
+ return min(base_score, 100.0)
159
 
160
+ # --- Single Message Analysis ---
161
  def analyze_single_message(text, thresholds, motif_flags):
162
+ motif_hits, matched_phrases = detect_motifs(text)
163
+ sentiment = custom_sentiment(text)
164
+ sentiment_score = 0.5 if sentiment["label"] == "undermining" else 0.0
165
+ print(f"Detected emotion: {sentiment['emotion']} β†’ sentiment: {sentiment['label']}")
166
+ adjusted_thresholds = {k: v * 0.8 for k, v in thresholds.items()} if sentiment["label"] == "undermining" else thresholds.copy()
167
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
168
+ with torch.no_grad():
169
+ outputs = model(**inputs)
170
+ scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
171
+ threshold_labels = [label for label, score in zip(LABELS, scores) if score > adjusted_thresholds[label]]
172
+ phrase_labels = [label for label, _ in matched_phrases]
173
+ pattern_labels_used = list(set(threshold_labels + phrase_labels))
174
+ abuse_level = calculate_abuse_level(scores, adjusted_thresholds, motif_hits)
175
+ top_patterns = sorted([(label, score) for label, score in zip(LABELS, scores)], key=lambda x: x[1], reverse=True)[:2]
176
+ motif_phrases = [text for _, text in matched_phrases]
177
+ contradiction_flag = detect_contradiction(text)
178
+ darvo_score = calculate_darvo_score(pattern_labels_used, 0.0, sentiment_score, motif_phrases, contradiction_flag)
179
+ return abuse_level, pattern_labels_used, top_patterns, darvo_score, sentiment
 
180
 
181
+ # --- Composite Analysis ---
182
  def analyze_composite(msg1, msg2, msg3, flags):
183
+ thresholds = THRESHOLDS.copy()
184
+ messages = [msg1, msg2, msg3]
185
+ active_messages = [m for m in messages if m.strip()]
186
+ if not active_messages:
187
+ return "Please enter at least one message."
188
 
189
+ results = []
190
+ sentiment_labels = []
191
+ sentiment_score_total = 0.0
 
 
 
 
 
 
 
 
192
 
193
+ for m in active_messages:
194
+ result = analyze_single_message(m, thresholds, flags)
195
+ results.append(result)
196
+ sentiment_labels.append(result[4]["label"])
197
+ if result[4]["label"] == "undermining":
198
+ sentiment_score_total += 0.5
 
199
 
200
+ # Conflicting tone logic
201
+ undermining_count = sentiment_labels.count("undermining")
202
+ supportive_count = sentiment_labels.count("supportive")
203
+
204
+ if undermining_count > supportive_count:
205
+ thresholds = {k: v * 0.9 for k, v in thresholds.items()}
206
+ elif undermining_count and supportive_count:
207
+ thresholds = {k: v * 0.95 for k, v in thresholds.items()}
208
+ print("βš–οΈ Detected conflicting sentiment across messages.")
209
+
210
+ abuse_scores = [r[0] for r in results]
211
+ darvo_scores = [r[3] for r in results]
212
+ average_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
213
+ base_score = sum(abuse_scores) / len(abuse_scores)
214
+
215
+ label_sets = [[label for label, _ in r[2]] for r in results]
216
+ label_counts = {label: sum(label in s for s in label_sets) for label in set().union(*label_sets)}
217
+ top_label = max(label_counts.items(), key=lambda x: x[1])
218
+ top_explanation = EXPLANATIONS.get(top_label[0], "")
219
+
220
+ flag_weights = {
221
+ "They've threatened harm": 6,
222
+ "They isolate me": 5,
223
+ "I’ve changed my behavior out of fear": 4,
224
+ "They monitor/follow me": 4,
225
+ "I feel unsafe when alone with them": 6
226
+ }
227
+ flag_boost = sum(flag_weights.get(f, 3) for f in flags) / len(active_messages)
228
+ composite_score = min(base_score + flag_boost, 100)
229
+ if len(active_messages) == 1:
230
+ composite_score *= 0.85
231
+ elif len(active_messages) == 2:
232
+ composite_score *= 0.93
233
+ composite_score = round(min(composite_score, 100), 2)
234
+
235
+ result = f"These messages show a pattern of **{top_label[0]}** and are estimated to be {composite_score}% likely abusive."
236
+ if top_explanation:
237
+ result += f"\nβ€’ {top_explanation}"
238
+ if average_darvo > 0.25:
239
+ darvo_descriptor = "moderate" if average_darvo < 0.65 else "high"
240
+ result += f"\n\nDARVO Score: {average_darvo} β†’ This indicates a **{darvo_descriptor} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
241
+ result += generate_risk_snippet(composite_score, top_label[0])
242
+
243
+ if undermining_count and supportive_count:
244
+ result += "\n\nβš–οΈ These messages contain **conflicting emotional tones** β€” this may indicate mixed signals, ambivalence, or a push-pull dynamic. Use caution interpreting any one message alone."
245
 
246
+ return result
 
 
 
 
 
 
 
 
 
247
 
248
+ # --- Gradio Interface ---
249
  textbox_inputs = [
250
+ gr.Textbox(label="Message 1"),
251
+ gr.Textbox(label="Message 2"),
252
+ gr.Textbox(label="Message 3")
253
  ]
254
 
255
+ checkboxes = gr.CheckboxGroup(label="Contextual Flags", choices=[
256
+ "They've threatened harm", "They isolate me", "I’ve changed my behavior out of fear",
257
+ "They monitor/follow me", "I feel unsafe when alone with them"
258
  ])
259
 
260
  iface = gr.Interface(
261
+ fn=analyze_composite,
262
+ inputs=textbox_inputs + [checkboxes],
263
+ outputs=gr.Textbox(label="Results"),
264
+ title="Abuse Pattern Detector (Multi-Message)",
265
+ allow_flagging="manual"
266
  )
267
 
268
+ if __name__ == "__main__":
269
+ iface.launch()