Spaces:

SamanthaStorm
/

Tether

Running on Zero

App Files Files Community

SamanthaStorm commited on Apr 21

Commit

83bf881

verified ·

1 Parent(s): 88862a6

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -99

app.py CHANGED Viewed

@@ -72,43 +72,6 @@ def get_risk_stage(patterns, sentiment):
         return 4
     return 1
-def analyze_single_message(text, thresholds):
-    motif_hits, matched_phrases = detect_motifs(text)
-    result = sst_pipeline(text)[0]
-    sentiment = "supportive" if result['label'] == "POSITIVE" else "undermining"
-    sentiment_score = result['score'] if sentiment == "undermining" else 0.0
-    adjusted_thresholds = {
-        k: v + 0.05 if sentiment == "supportive" else v
-        for k, v in thresholds.items()
-    }
-    contradiction_flag = detect_contradiction(text)
-    motifs = [phrase for _, phrase in matched_phrases]
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-    with torch.no_grad():
-        outputs = model(**inputs)
-    scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
-    threshold_labels = [
-        label for label, score in zip(LABELS, scores)
-        if score > adjusted_thresholds[label]
-    ]
-    top_patterns = sorted(
-        [(label, score) for label, score in zip(LABELS, scores)],
-        key=lambda x: x[1],
-        reverse=True
-    )[:2]
-    weighted_scores = [(PATTERN_WEIGHTS.get(label, 1.0) * score) for label, score in top_patterns]
-    abuse_score = min(np.mean(weighted_scores) * 100, 100)
-    stage = get_risk_stage(threshold_labels, sentiment)
-    return abuse_score, threshold_labels, top_patterns, result, stage
 def generate_risk_snippet(abuse_score, top_label, escalation_score):
     if abuse_score >= 85 or escalation_score >= 16:
         risk_level = "high"
@@ -120,73 +83,44 @@ def generate_risk_snippet(abuse_score, top_label, escalation_score):
     pattern_label = top_label.split(" – ")[0]
     pattern_score = top_label.split(" – ")[1] if " – " in top_label else ""
-    base = f"\n\n🛑 Risk Level: {risk_level.capitalize()}\n"
     base += f"This message shows strong indicators of **{pattern_label}**. "
     if risk_level == "high":
-        base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.\n"
     elif risk_level == "moderate":
-        base += "There are signs of emotional pressure or indirect control that may escalate if repeated.\n"
     else:
-        base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.\n"
-    base += "\n💡 *Why this might be flagged:*\n"
-    base += (
-        "This message may seem supportive, but language like “Do you need me to come home?” can sometimes carry implied pressure, especially if declining leads to guilt, tension, or emotional withdrawal. "
-        "The model looks for patterns that reflect subtle coercion, obligation, or reversal dynamics—even when not overtly aggressive.\n"
-    )
-    base += f"\nDetected Pattern: **{pattern_label} ({pattern_score})**\n"
     base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
     return base
-def analyze_composite(msg1, msg2, msg3, *answers_and_none):
-    responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
-    none_selected = answers_and_none[-1]
-    escalation_score = 0 if none_selected else sum(w for (_, w), a in zip(ESCALATION_QUESTIONS, responses) if a)
-    messages = [msg1, msg2, msg3]
-    active = [m for m in messages if m.strip()]
-    if not active:
-        return "Please enter at least one message."
-    results = [analyze_single_message(m, THRESHOLDS.copy()) for m in active]
-    abuse_scores = [r[0] for r in results]
-    top_labels = [r[2][0][0] for r in results]
-    top_scores = [r[2][0][1] for r in results]
-    sentiments = [r[3]['label'] for r in results]
-    stages = [r[4] for r in results]
-    most_common_stage = max(set(stages), key=stages.count)
-    stage_text = RISK_STAGE_LABELS[most_common_stage]
-    top_label = f"{top_labels[0]} – {int(round(top_scores[0] * 100))}%"
-    composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
-    if composite_abuse >= 85 or escalation_score >= 16:
-        risk_level = "high"
-    elif composite_abuse >= 60 or escalation_score >= 8:
-        risk_level = "moderate"
-    else:
-        risk_level = "low"
-    out = f"Abuse Intensity: {composite_abuse}%\n"
-    out += f"Escalation Potential: {risk_level.capitalize()} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})"
-    out += generate_risk_snippet(composite_abuse, top_label, escalation_score)
-    out += f"\n\n{stage_text}"
-    return out
-textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
-quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
-none_box = gr.Checkbox(label="None of the above")
-iface = gr.Interface(
-    fn=analyze_composite,
-    inputs=textbox_inputs + quiz_boxes + [none_box],
-    outputs=gr.Textbox(label="Results"),
-    title="Abuse Pattern Detector + Escalation Quiz",
-    allow_flagging="manual"
-)
-if __name__ == "__main__":
-    iface.launch()

         return 4
     return 1
 def generate_risk_snippet(abuse_score, top_label, escalation_score):
     if abuse_score >= 85 or escalation_score >= 16:
         risk_level = "high"
     pattern_label = top_label.split(" – ")[0]
     pattern_score = top_label.split(" – ")[1] if " – " in top_label else ""
+    WHY_FLAGGED = {
+        "control": "This message may reflect efforts to restrict someone’s autonomy, even if it's framed as concern or care.",
+        "gaslighting": "This message could be manipulating someone into questioning their perception or feelings.",
+        "dismissiveness": "This message may include belittling, invalidating, or ignoring the other person’s experience.",
+        "insults": "Direct insults often appear in escalating abusive dynamics and can erode emotional safety.",
+        "threat": "This message includes threatening language, which is a strong predictor of harm.",
+        "blame shifting": "This message may redirect responsibility to avoid accountability, especially during conflict.",
+        "guilt tripping": "This message may induce guilt in order to control or manipulate behavior.",
+        "recovery phase": "This message may be part of a tension-reset cycle, appearing kind but avoiding change.",
+        "projection": "This message may involve attributing the abuser’s own behaviors to the victim.",
+        "default": "This message contains language patterns that may affect safety, clarity, or emotional autonomy."
+    }
+    explanation = WHY_FLAGGED.get(pattern_label.lower(), WHY_FLAGGED["default"])
+    base = f"
+🛑 Risk Level: {risk_level.capitalize()}
+"
     base += f"This message shows strong indicators of **{pattern_label}**. "
     if risk_level == "high":
+        base += "The language may reflect patterns of emotional control, even when expressed in soft or caring terms.
+"
     elif risk_level == "moderate":
+        base += "There are signs of emotional pressure or indirect control that may escalate if repeated.
+"
     else:
+        base += "The message does not strongly indicate abuse, but it's important to monitor for patterns.
+"
+    base += f"
+💡 *Why this might be flagged:*
+{explanation}
+"
+    base += f"
+Detected Pattern: **{pattern_label} ({pattern_score})**
+"
     base += "🧠 You can review the pattern in context. This tool highlights possible dynamics—not judgments."
     return base