SamanthaStorm commited on
Commit
ca1d104
Β·
verified Β·
1 Parent(s): cbc7dd5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -21,7 +21,7 @@ LABELS = [
21
  ]
22
 
23
  THRESHOLDS = {
24
- "blame shifting": 0.3, "contradictory statements": 0.32, "control": 0.52, "dismissiveness": 0.45,
25
  "gaslighting": 0.30, "guilt tripping": 0.20, "insults": 0.34, "obscure language": 0.25,
26
  "projection": 0.35, "recovery phase": 0.25, "threat": 0.25
27
  }
@@ -118,7 +118,6 @@ def generate_risk_snippet(abuse_score, top_label):
118
  def analyze_single_message(text, thresholds, motif_flags):
119
  motif_hits, matched_phrases = detect_motifs(text)
120
 
121
- # SST Sentiment
122
  result = sst_pipeline(text)[0]
123
  sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
124
  sentiment_score = result['score'] if sentiment == "undermining" else 0.0
@@ -136,10 +135,14 @@ def analyze_single_message(text, thresholds, motif_flags):
136
  outputs = model(**inputs)
137
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
138
 
139
- threshold_labels = [
140
- label for label, score in zip(LABELS, scores)
141
- if score > adjusted_thresholds[label]
142
- ]
 
 
 
 
143
  top_patterns = sorted(
144
  [(label, score) for label, score in zip(LABELS, scores)],
145
  key=lambda x: x[1],
@@ -148,17 +151,18 @@ def analyze_single_message(text, thresholds, motif_flags):
148
 
149
  pattern_labels = threshold_labels + [label for label, _ in matched_phrases]
150
  darvo_score = calculate_darvo_score(pattern_labels, 0.0, sentiment_score, motifs, contradiction_flag)
151
-
152
  print("\n--- Debug Info ---")
153
  print(f"Text: {text}")
154
  print(f"Sentiment: {sentiment} (raw: {result['label']}, score: {result['score']:.3f})")
155
  print("Abuse Pattern Scores:")
156
  for label, score in zip(LABELS, scores):
157
- passed = "βœ…" if score > adjusted_thresholds[label] else "❌"
158
  print(f" {label:25} β†’ {score:.3f} {passed}")
159
  print(f"Motifs: {motifs}")
160
  print(f"Contradiction: {contradiction_flag}")
161
  print("------------------\n")
 
162
  return (
163
  np.mean([score for _, score in top_patterns]) * 100,
164
  threshold_labels,
@@ -166,7 +170,6 @@ def analyze_single_message(text, thresholds, motif_flags):
166
  darvo_score,
167
  {"label": sentiment, "raw_label": result['label'], "score": result['score']}
168
  )
169
-
170
  def analyze_composite(msg1, msg2, msg3, *answers_and_none):
171
  responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
172
  none_selected = answers_and_none[-1]
@@ -191,7 +194,7 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
191
 
192
  out = f"Abuse Intensity: {composite_abuse}%\n"
193
  out += f"Escalation Potential: {escalation_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})"
194
- out += generate_risk_snippet(composite_abuse, top_label)
195
  if avg_darvo > 0.25:
196
  level = "moderate" if avg_darvo < 0.65 else "high"
197
  out += f"\n\nDARVO Score: {avg_darvo} β†’ This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
 
21
  ]
22
 
23
  THRESHOLDS = {
24
+ "blame shifting": 0.3, "contradictory statements": 0.32, "control": 0.48, "dismissiveness": 0.45,
25
  "gaslighting": 0.30, "guilt tripping": 0.20, "insults": 0.34, "obscure language": 0.25,
26
  "projection": 0.35, "recovery phase": 0.25, "threat": 0.25
27
  }
 
118
  def analyze_single_message(text, thresholds, motif_flags):
119
  motif_hits, matched_phrases = detect_motifs(text)
120
 
 
121
  result = sst_pipeline(text)[0]
122
  sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
123
  sentiment_score = result['score'] if sentiment == "undermining" else 0.0
 
135
  outputs = model(**inputs)
136
  scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
137
 
138
+ threshold_labels = []
139
+ for label, score in zip(LABELS, scores):
140
+ if label in {"control", "dismissiveness", "blame shifting"}:
141
+ if sentiment == "undermining" and result["score"] > 0.85 and score > adjusted_thresholds[label]:
142
+ threshold_labels.append(label)
143
+ elif score > adjusted_thresholds[label]:
144
+ threshold_labels.append(label)
145
+
146
  top_patterns = sorted(
147
  [(label, score) for label, score in zip(LABELS, scores)],
148
  key=lambda x: x[1],
 
151
 
152
  pattern_labels = threshold_labels + [label for label, _ in matched_phrases]
153
  darvo_score = calculate_darvo_score(pattern_labels, 0.0, sentiment_score, motifs, contradiction_flag)
154
+
155
  print("\n--- Debug Info ---")
156
  print(f"Text: {text}")
157
  print(f"Sentiment: {sentiment} (raw: {result['label']}, score: {result['score']:.3f})")
158
  print("Abuse Pattern Scores:")
159
  for label, score in zip(LABELS, scores):
160
+ passed = "βœ…" if label in threshold_labels else "❌"
161
  print(f" {label:25} β†’ {score:.3f} {passed}")
162
  print(f"Motifs: {motifs}")
163
  print(f"Contradiction: {contradiction_flag}")
164
  print("------------------\n")
165
+
166
  return (
167
  np.mean([score for _, score in top_patterns]) * 100,
168
  threshold_labels,
 
170
  darvo_score,
171
  {"label": sentiment, "raw_label": result['label'], "score": result['score']}
172
  )
 
173
  def analyze_composite(msg1, msg2, msg3, *answers_and_none):
174
  responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
175
  none_selected = answers_and_none[-1]
 
194
 
195
  out = f"Abuse Intensity: {composite_abuse}%\n"
196
  out += f"Escalation Potential: {escalation_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})"
197
+ out += generate_risk_snippet(composite_abuse, top_label, escalation_score)
198
  if avg_darvo > 0.25:
199
  level = "moderate" if avg_darvo < 0.65 else "high"
200
  out += f"\n\nDARVO Score: {avg_darvo} β†’ This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."