Spaces:

SamanthaStorm
/

Tether

Running on Zero

App Files Files Community

SamanthaStorm commited on Apr 21

Commit

ca1d104

verified ·

1 Parent(s): cbc7dd5

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -10

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ LABELS = [
 ]
 THRESHOLDS = {
-    "blame shifting": 0.3, "contradictory statements": 0.32, "control": 0.52, "dismissiveness": 0.45,
     "gaslighting": 0.30, "guilt tripping": 0.20, "insults": 0.34, "obscure language": 0.25,
     "projection": 0.35, "recovery phase": 0.25, "threat": 0.25
 }
@@ -118,7 +118,6 @@ def generate_risk_snippet(abuse_score, top_label):
 def analyze_single_message(text, thresholds, motif_flags):
     motif_hits, matched_phrases = detect_motifs(text)
-    # SST Sentiment
     result = sst_pipeline(text)[0]
     sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
     sentiment_score = result['score'] if sentiment == "undermining" else 0.0
@@ -136,10 +135,14 @@ def analyze_single_message(text, thresholds, motif_flags):
         outputs = model(**inputs)
     scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
-    threshold_labels = [
-        label for label, score in zip(LABELS, scores)
-        if score > adjusted_thresholds[label]
-    ]
     top_patterns = sorted(
         [(label, score) for label, score in zip(LABELS, scores)],
         key=lambda x: x[1],
@@ -148,17 +151,18 @@ def analyze_single_message(text, thresholds, motif_flags):
     pattern_labels = threshold_labels + [label for label, _ in matched_phrases]
     darvo_score = calculate_darvo_score(pattern_labels, 0.0, sentiment_score, motifs, contradiction_flag)
     print("\n--- Debug Info ---")
     print(f"Text: {text}")
     print(f"Sentiment: {sentiment} (raw: {result['label']}, score: {result['score']:.3f})")
     print("Abuse Pattern Scores:")
     for label, score in zip(LABELS, scores):
-        passed = "✅" if score > adjusted_thresholds[label] else "❌"
         print(f"  {label:25} → {score:.3f} {passed}")
     print(f"Motifs: {motifs}")
     print(f"Contradiction: {contradiction_flag}")
     print("------------------\n")
     return (
         np.mean([score for _, score in top_patterns]) * 100,
         threshold_labels,
@@ -166,7 +170,6 @@ def analyze_single_message(text, thresholds, motif_flags):
         darvo_score,
         {"label": sentiment, "raw_label": result['label'], "score": result['score']}
     )
 def analyze_composite(msg1, msg2, msg3, *answers_and_none):
     responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
     none_selected = answers_and_none[-1]
@@ -191,7 +194,7 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
     out = f"Abuse Intensity: {composite_abuse}%\n"
     out += f"Escalation Potential: {escalation_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})"
-    out += generate_risk_snippet(composite_abuse, top_label)
     if avg_darvo > 0.25:
         level = "moderate" if avg_darvo < 0.65 else "high"
         out += f"\n\nDARVO Score: {avg_darvo} → This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."

 ]
 THRESHOLDS = {
+    "blame shifting": 0.3, "contradictory statements": 0.32, "control": 0.48, "dismissiveness": 0.45,
     "gaslighting": 0.30, "guilt tripping": 0.20, "insults": 0.34, "obscure language": 0.25,
     "projection": 0.35, "recovery phase": 0.25, "threat": 0.25
 }
 def analyze_single_message(text, thresholds, motif_flags):
     motif_hits, matched_phrases = detect_motifs(text)
     result = sst_pipeline(text)[0]
     sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
     sentiment_score = result['score'] if sentiment == "undermining" else 0.0
         outputs = model(**inputs)
     scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
+    threshold_labels = []
+    for label, score in zip(LABELS, scores):
+        if label in {"control", "dismissiveness", "blame shifting"}:
+            if sentiment == "undermining" and result["score"] > 0.85 and score > adjusted_thresholds[label]:
+                threshold_labels.append(label)
+        elif score > adjusted_thresholds[label]:
+            threshold_labels.append(label)
     top_patterns = sorted(
         [(label, score) for label, score in zip(LABELS, scores)],
         key=lambda x: x[1],
     pattern_labels = threshold_labels + [label for label, _ in matched_phrases]
     darvo_score = calculate_darvo_score(pattern_labels, 0.0, sentiment_score, motifs, contradiction_flag)
     print("\n--- Debug Info ---")
     print(f"Text: {text}")
     print(f"Sentiment: {sentiment} (raw: {result['label']}, score: {result['score']:.3f})")
     print("Abuse Pattern Scores:")
     for label, score in zip(LABELS, scores):
+        passed = "✅" if label in threshold_labels else "❌"
         print(f"  {label:25} → {score:.3f} {passed}")
     print(f"Motifs: {motifs}")
     print(f"Contradiction: {contradiction_flag}")
     print("------------------\n")
     return (
         np.mean([score for _, score in top_patterns]) * 100,
         threshold_labels,
         darvo_score,
         {"label": sentiment, "raw_label": result['label'], "score": result['score']}
     )
 def analyze_composite(msg1, msg2, msg3, *answers_and_none):
     responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
     none_selected = answers_and_none[-1]
     out = f"Abuse Intensity: {composite_abuse}%\n"
     out += f"Escalation Potential: {escalation_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})"
+    out += generate_risk_snippet(composite_abuse, top_label, escalation_score)
     if avg_darvo > 0.25:
         level = "moderate" if avg_darvo < 0.65 else "high"
         out += f"\n\nDARVO Score: {avg_darvo} → This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."