Spaces:

SamanthaStorm
/

Tether

Running on Zero

App Files Files Community

SamanthaStorm commited on Apr 21

Commit

c6c79a8

verified ·

1 Parent(s): 8240c34

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -128

app.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import gradio as gr
 import torch
 import numpy as np
-from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
-from transformers import RobertaForSequenceClassification, RobertaTokenizer
 from motif_tagging import detect_motifs
 import re
@@ -31,47 +30,13 @@ PATTERN_WEIGHTS = {
     "blame shifting": 0.8, "contradictory statements": 0.75
 }
-EXPLANATIONS = {
-    "blame shifting": "Blame-shifting redirects responsibility to avoid accountability.",
-    "contradictory statements": "Flipping positions or denying previous claims.",
-    "control": "Attempts to restrict another person’s autonomy.",
-    "dismissiveness": "Disregarding or belittling someone’s feelings or needs.",
-    "gaslighting": "Manipulating someone into questioning their reality.",
-    "guilt tripping": "Using guilt to control or pressure.",
-    "insults": "Derogatory or demeaning language.",
-    "obscure language": "Vague, superior, or confusing language used manipulatively.",
-    "projection": "Accusing someone else of your own behaviors.",
-    "recovery phase": "Resetting tension without real change.",
-    "threat": "Using fear or harm to control or intimidate."
 }
-RISK_SNIPPETS = {
-    "low": (
-        "🟢 Risk Level: Low",
-        "The language patterns here do not strongly indicate abuse.",
-        "Check in with yourself and monitor for repeated patterns."
-    ),
-    "moderate": (
-        "⚠️ Risk Level: Moderate to High",
-        "Language includes control, guilt, or reversal tactics.",
-        "These patterns reduce self-trust. Document or talk with someone safe."
-    ),
-    "high": (
-        "🛑 Risk Level: High",
-        "Strong indicators of coercive control or threat present.",
-        "Consider building a safety plan or contacting support."
-    )
-}
-DARVO_PATTERNS = {
-    "blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
-}
-DARVO_MOTIFS = [
-    "i guess i’m the bad guy", "after everything i’ve done", "you always twist everything",
-    "so now it’s all my fault", "i’m the villain", "i’m always wrong", "you never listen",
-    "you’re attacking me", "i’m done trying", "i’m the only one who cares"
-]
 ESCALATION_QUESTIONS = [
     ("Partner has access to firearms or weapons", 4),
     ("Partner threatened to kill you", 3),
@@ -96,50 +61,19 @@ def detect_contradiction(message):
     ]
     return any(re.search(p, message, flags) for p, flags in patterns)
-def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
-    pattern_hits = len([p for p in patterns if p in DARVO_PATTERNS])
-    pattern_score = pattern_hits / len(DARVO_PATTERNS)
-    sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
-    motif_hits = len([m for m in motifs_found if m.lower() in DARVO_MOTIFS])
-    motif_score = motif_hits / len(DARVO_MOTIFS)
-    contradiction_score = 1.0 if contradiction_flag else 0.0
-    return round(min(0.3 * pattern_score + 0.3 * sentiment_shift_score + 0.25 * motif_score + 0.15 * contradiction_score, 1.0), 3)
-def generate_risk_snippet(abuse_score, top_label, escalation_score):
-    if abuse_score >= 85 or escalation_score >= 16:
-        risk_level = "high"
-    elif abuse_score >= 60 or escalation_score >= 8:
-        risk_level = "moderate"
-    else:
-        risk_level = "low"
-    pattern_label = top_label.split(" – ")[0]
-    pattern_score = top_label.split(" – ")[1] if " – " in top_label else ""
-    base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
-    base += f"This message shows strong indicators of **{pattern_label}**. "
-    if risk_level == "high":
-        base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
-    elif risk_level == "moderate":
-        base += "There are signs of emotional pressure or indirect control that may escalate if repeated.\n"
-    else:
-        base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
-    base += "\n💡 *Why this might be flagged:*\n"
-    base += (
-        "This message may seem supportive, but language like “Do you need me to come home?” can sometimes carry implied pressure, especially if declining leads to guilt, tension, or emotional withdrawal. "
-        "The model looks for patterns that reflect subtle coercion, obligation, or reversal dynamics—even when not overtly aggressive.\n"
-    )
-    base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
-    base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
-    return base
-def analyze_single_message(text, thresholds, motif_flags):
     motif_hits, matched_phrases = detect_motifs(text)
     result = sst_pipeline(text)[0]
     sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
     sentiment_score = result['score'] if sentiment == "undermining" else 0.0
@@ -157,13 +91,10 @@ def analyze_single_message(text, thresholds, motif_flags):
         outputs = model(**inputs)
     scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
-    threshold_labels = []
-    for label, score in zip(LABELS, scores):
-        if label in {"control", "dismissiveness", "blame shifting"}:
-            if sentiment == "undermining" and result["score"] > 0.85 and score > adjusted_thresholds[label]:
-                threshold_labels.append(label)
-        elif score > adjusted_thresholds[label]:
-            threshold_labels.append(label)
     top_patterns = sorted(
         [(label, score) for label, score in zip(LABELS, scores)],
@@ -171,55 +102,46 @@ def analyze_single_message(text, thresholds, motif_flags):
         reverse=True
     )[:2]
-    pattern_labels = threshold_labels + [label for label, _ in matched_phrases]
-    darvo_score = calculate_darvo_score(pattern_labels, 0.0, sentiment_score, motifs, contradiction_flag)
-    print("\n--- Debug Info ---")
-    print(f"Text: {text}")
-    print(f"Sentiment: {sentiment} (raw: {result['label']}, score: {result['score']:.3f})")
-    print("Abuse Pattern Scores:")
-    for label, score in zip(LABELS, scores):
-        passed = "✅" if label in threshold_labels else "❌"
-        print(f"  {label:25} → {score:.3f} {passed}")
-    print(f"Motifs: {motifs}")
-    print(f"Contradiction: {contradiction_flag}")
-    print("------------------\n")
-    return (
-        np.mean([score for _, score in top_patterns]) * 100,
-        threshold_labels,
-        top_patterns,
-        darvo_score,
-        {"label": sentiment, "raw_label": result['label'], "score": result['score']}
-    )
 def analyze_composite(msg1, msg2, msg3, *answers_and_none):
     responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
     none_selected = answers_and_none[-1]
     escalation_score = 0 if none_selected else sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, responses) if a)
-    escalation_level = "High" if escalation_score >= 16 else "Moderate" if escalation_score >= 8 else "Low"
     messages = [msg1, msg2, msg3]
     active = [m for m in messages if m.strip()]
     if not active:
         return "Please enter at least one message."
-    results = [analyze_single_message(m, THRESHOLDS.copy(), []) for m in active]
     abuse_scores = [r[0] for r in results]
-    darvo_scores = [r[3] for r in results]
-    top_pattern = max(
-    [(label, score) for r in results for label, score in r[2]],
-    key=lambda x: x[1]
-    )
-    top_label = f"{top_pattern[0]} – {int(round(top_pattern[1] * 100))}%"
     composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
-    avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
     out = f"Abuse Intensity: {composite_abuse}%\n"
-    out += f"Escalation Potential: {escalation_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})"
-    out += generate_risk_snippet(composite_abuse, top_label, escalation_score)
-    if avg_darvo > 0.25:
-        level = "moderate" if avg_darvo < 0.65 else "high"
-        out += f"\n\nDARVO Score: {avg_darvo} → This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
     return out
 textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
@@ -235,4 +157,4 @@ iface = gr.Interface(
 )
 if __name__ == "__main__":
-    iface.launch()

 import gradio as gr
 import torch
 import numpy as np
+from transformers import pipeline, RobertaForSequenceClassification, RobertaTokenizer
 from motif_tagging import detect_motifs
 import re
     "blame shifting": 0.8, "contradictory statements": 0.75
 }
+RISK_STAGE_LABELS = {
+    1: "🌀 Risk Stage: Tension-Building\nThis message reflects rising emotional pressure or subtle control attempts.",
+    2: "🔥 Risk Stage: Escalation\nThis message includes direct or aggressive patterns, suggesting active harm.",
+    3: "🌧️ Risk Stage: Reconciliation\nThis message reflects a reset attempt—apologies or emotional repair without accountability.",
+    4: "🌸 Risk Stage: Calm / Honeymoon\nThis message appears supportive but may follow prior harm, minimizing it."
 }
 ESCALATION_QUESTIONS = [
     ("Partner has access to firearms or weapons", 4),
     ("Partner threatened to kill you", 3),
     ]
     return any(re.search(p, message, flags) for p, flags in patterns)
+def get_risk_stage(patterns, sentiment):
+    if "threat" in patterns or "insults" in patterns:
+        return 2
+    elif "recovery phase" in patterns:
+        return 3
+    elif "control" in patterns or "guilt tripping" in patterns:
+        return 1
+    elif sentiment == "supportive" and any(p in patterns for p in ["projection", "dismissiveness"]):
+        return 4
+    return 1
+def analyze_single_message(text, thresholds):
     motif_hits, matched_phrases = detect_motifs(text)
     result = sst_pipeline(text)[0]
     sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
     sentiment_score = result['score'] if sentiment == "undermining" else 0.0
         outputs = model(**inputs)
     scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
+    threshold_labels = [
+        label for label, score in zip(LABELS, scores)
+        if score > adjusted_thresholds[label]
+    ]
     top_patterns = sorted(
         [(label, score) for label, score in zip(LABELS, scores)],
         reverse=True
     )[:2]
+    weighted_scores = [(PATTERN_WEIGHTS.get(label, 1.0) * score) for label, score in top_patterns]
+    abuse_score = np.mean(weighted_scores) * 100
+    stage = get_risk_stage(threshold_labels, sentiment)
+    return abuse_score, threshold_labels, top_patterns, result, stage
 def analyze_composite(msg1, msg2, msg3, *answers_and_none):
     responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
     none_selected = answers_and_none[-1]
     escalation_score = 0 if none_selected else sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, responses) if a)
     messages = [msg1, msg2, msg3]
     active = [m for m in messages if m.strip()]
     if not active:
         return "Please enter at least one message."
+    results = [analyze_single_message(m, THRESHOLDS.copy()) for m in active]
     abuse_scores = [r[0] for r in results]
+    top_labels = [r[2][0][0] for r in results]
+    top_scores = [r[2][0][1] for r in results]
+    sentiments = [r[3]['label'] for r in results]
+    stages = [r[4] for r in results]
+    most_common_stage = max(set(stages), key=stages.count)
+    stage_text = RISK_STAGE_LABELS[most_common_stage]
+    top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
     composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
+    if composite_abuse >= 85 or escalation_score >= 16:
+        risk_level = "high"
+    elif composite_abuse >= 60 or escalation_score >= 8:
+        risk_level = "moderate"
+    else:
+        risk_level = "low"
     out = f"Abuse Intensity: {composite_abuse}%\n"
+    out += f"Escalation Potential: {risk_level.capitalize()} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})\n"
+    out += f"Top Pattern: {top_label}\n"
+    out += f"\n{stage_text}"
     return out
 textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
 )
 if __name__ == "__main__":
+    iface.launch()