Spaces:

SamanthaStorm
/

Tether

Running on Zero

App Files Files Community

SamanthaStorm commited on Apr 18

Commit

f32b7e3

verified ·

1 Parent(s): fd6c90c

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -104

app.py CHANGED Viewed

@@ -6,24 +6,17 @@ from transformers import RobertaForSequenceClassification, RobertaTokenizer
 from motif_tagging import detect_motifs
 import re
-# --- Sentiment Model: T5-based Emotion Classifier ---
 sentiment_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")
 sentiment_model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-emotion")
 EMOTION_TO_SENTIMENT = {
-    "joy": "supportive",
-    "love": "supportive",
-    "surprise": "supportive",
-    "neutral": "supportive",
-    "sadness": "undermining",
-    "anger": "undermining",
-    "fear": "undermining",
-    "disgust": "undermining",
-    "shame": "undermining",
-    "guilt": "undermining"
 }
-# --- Abuse Detection Model ---
 model_name = "SamanthaStorm/autotrain-jlpi4-mllvp"
 model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
 tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
@@ -35,69 +28,48 @@ LABELS = [
 ]
 THRESHOLDS = {
-    "blame shifting": 0.3,
-    "contradictory statements": 0.32,
-    "control": 0.48,
-    "dismissiveness": 0.45,
-    "gaslighting": 0.30,
-    "guilt tripping": 0.20,
-    "insults": 0.34,
-    "obscure language": 0.25,
-    "projection": 0.35,
-    "recovery phase": 0.25,
-    "threat": 0.25
 }
 PATTERN_WEIGHTS = {
-    "gaslighting": 1.3,
-    "control": 1.2,
-    "dismissiveness": 0.8,
-    "blame shifting": 0.8,
-    "contradictory statements": 0.75
 }
 EXPLANATIONS = {
-    "blame shifting": "Blame-shifting is when one person redirects responsibility onto someone else to avoid accountability.",
-    "contradictory statements": "Contradictory statements confuse the listener by flipping positions or denying previous claims.",
-    "control": "Control restricts another person’s autonomy through coercion, manipulation, or threats.",
-    "dismissiveness": "Dismissiveness is belittling or disregarding another person’s feelings, needs, or opinions.",
-    "gaslighting": "Gaslighting involves making someone question their own reality, memory, or perceptions.",
-    "guilt tripping": "Guilt-tripping uses guilt to manipulate someone’s actions or decisions.",
-    "insults": "Insults are derogatory or demeaning remarks meant to shame, belittle, or hurt someone.",
-    "obscure language": "Obscure language manipulates through complexity, vagueness, or superiority to confuse the other person.",
-    "projection": "Projection accuses someone else of the very behaviors or intentions the speaker is exhibiting.",
-    "recovery phase": "Recovery phase statements attempt to soothe or reset tension without acknowledging harm or change.",
-    "threat": "Threats use fear of harm (physical, emotional, or relational) to control or intimidate someone."
 }
 RISK_SNIPPETS = {
     "low": (
         "🟢 Risk Level: Low",
         "The language patterns here do not strongly indicate abuse.",
-        "Continue to check in with yourself and notice how you feel in response to repeated patterns."
     ),
     "moderate": (
         "⚠️ Risk Level: Moderate to High",
-        "This language includes control, guilt, or reversal tactics.",
-        "These patterns often lead to emotional confusion and reduced self-trust. Document these messages or talk with someone safe."
     ),
     "high": (
         "🛑 Risk Level: High",
-        "Language includes threats or coercive control, which are strong indicators of escalation.",
-        "Consider creating a safety plan or contacting a support line. Trust your sense of unease."
     )
 }
-def generate_risk_snippet(abuse_score, top_label):
-    if abuse_score >= 85:
-        risk_level = "high"
-    elif abuse_score >= 60:
-        risk_level = "moderate"
-    else:
-        risk_level = "low"
-    title, summary, advice = RISK_SNIPPETS[risk_level]
-    return f"\n\n{title}\n{summary} (Pattern: **{top_label}**)\n💡 {advice}"
 DARVO_PATTERNS = {
     "blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
 }
@@ -107,8 +79,21 @@ DARVO_MOTIFS = [
     "you’re attacking me", "i’m done trying", "i’m the only one who cares"
 ]
 def detect_contradiction(message):
-    contradiction_phrases = [
         (r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
         (r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
         (r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
@@ -116,72 +101,56 @@ def detect_contradiction(message):
         (r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
         (r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
     ]
-    return any(re.search(pattern, message, flags) for pattern, flags in contradiction_phrases)
 def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
-    pattern_hits = len([p.lower() for p in patterns if p.lower() in DARVO_PATTERNS])
     pattern_score = pattern_hits / len(DARVO_PATTERNS)
     sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
-    motif_hits = len([m.lower() for m in motifs_found if m.lower() in DARVO_MOTIFS])
     motif_score = motif_hits / len(DARVO_MOTIFS)
     contradiction_score = 1.0 if contradiction_flag else 0.0
-    darvo_score = (
-        0.3 * pattern_score +
-        0.3 * sentiment_shift_score +
-        0.25 * motif_score +
-        0.15 * contradiction_score
-    )
-    return round(min(darvo_score, 1.0), 3)
-ESCALATION_QUESTIONS = [
-    ("Partner has access to firearms or weapons", 4),
-    ("Partner threatened to kill you", 3),
-    ("Partner threatened you with a weapon", 3),
-    ("Partner has ever choked you, even if you considered it consensual at the time", 4),
-    ("Partner injured or threatened your pet(s)", 3),
-    ("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
-    ("Partner forced or coerced you into unwanted sexual acts", 3),
-    ("Partner threatened to take away your children", 2),
-    ("Violence has increased in frequency or severity", 3),
-    ("Partner monitors your calls/GPS/social media", 2)
-]
 def analyze_single_message(text, thresholds, motif_flags):
     motif_hits, matched_phrases = detect_motifs(text)
-    # Sentiment Analysis
     input_ids = sentiment_tokenizer(f"emotion: {text}", return_tensors="pt").input_ids
     with torch.no_grad():
-        outputs = sentiment_model.generate(input_ids)
-    emotion = sentiment_tokenizer.decode(outputs[0], skip_special_tokens=True).strip().lower()
     sentiment = EMOTION_TO_SENTIMENT.get(emotion, "undermining")
     sentiment_score = 0.5 if sentiment == "undermining" else 0.0
-    # Contradiction Check
-    contradiction_flag = detect_contradiction(text)
-    # Motifs
-    motifs = [phrase for _, phrase in matched_phrases]
-    # Model Prediction
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         outputs = model(**inputs)
     scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
-    threshold_labels = [label for label, score in zip(LABELS, scores) if score > thresholds[label]]
-    top_patterns = sorted([(label, score) for label, score in zip(LABELS, scores)], key=lambda x: x[1], reverse=True)[:2]
     pattern_labels = threshold_labels + [label for label, _ in matched_phrases]
     darvo_score = calculate_darvo_score(pattern_labels, 0.0, sentiment_score, motifs, contradiction_flag)
-    return (
-        np.mean([score for _, score in top_patterns]) * 100,
-        threshold_labels,
-        top_patterns,
-        darvo_score,
-        {"label": sentiment, "emotion": emotion}
-    )
 def analyze_composite(msg1, msg2, msg3, *answers_and_none):
     responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
     none_selected = answers_and_none[-1]
@@ -196,23 +165,19 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
     results = [analyze_single_message(m, THRESHOLDS.copy(), []) for m in active]
     abuse_scores = [r[0] for r in results]
     darvo_scores = [r[3] for r in results]
-    top_pattern = max({label for r in results for label in r[2]}, key=lambda l: abuse_scores[0])
-    composite_abuse = int(round(sum(abuse_scores)/len(abuse_scores)))
-    avg_darvo = round(sum(darvo_scores)/len(darvo_scores), 3)
     out = f"Abuse Intensity: {composite_abuse}%\n"
-    out += f"Escalation Potential: {escalation_level} ({escalation_score}/{sum(w for _,w in ESCALATION_QUESTIONS)})"
-    out += generate_risk_snippet(composite_abuse, top_pattern[0])
     if avg_darvo > 0.25:
         level = "moderate" if avg_darvo < 0.65 else "high"
         out += f"\n\nDARVO Score: {avg_darvo} → This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
     return out
-textbox_inputs = [
-    gr.Textbox(label="Message 1"),
-    gr.Textbox(label="Message 2"),
-    gr.Textbox(label="Message 3")
-]
 quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
 none_box = gr.Checkbox(label="None of the above")

 from motif_tagging import detect_motifs
 import re
+# --- Sentiment Model ---
 sentiment_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")
 sentiment_model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-emotion")
 EMOTION_TO_SENTIMENT = {
+    "joy": "supportive", "love": "supportive", "surprise": "supportive", "neutral": "supportive",
+    "sadness": "undermining", "anger": "undermining", "fear": "undermining",
+    "disgust": "undermining", "shame": "undermining", "guilt": "undermining"
 }
+# --- Abuse Model ---
 model_name = "SamanthaStorm/autotrain-jlpi4-mllvp"
 model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
 tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
 ]
 THRESHOLDS = {
+    "blame shifting": 0.3, "contradictory statements": 0.32, "control": 0.48, "dismissiveness": 0.45,
+    "gaslighting": 0.30, "guilt tripping": 0.20, "insults": 0.34, "obscure language": 0.25,
+    "projection": 0.35, "recovery phase": 0.25, "threat": 0.25
 }
 PATTERN_WEIGHTS = {
+    "gaslighting": 1.3, "control": 1.2, "dismissiveness": 0.8,
+    "blame shifting": 0.8, "contradictory statements": 0.75
 }
 EXPLANATIONS = {
+    "blame shifting": "Blame-shifting redirects responsibility to avoid accountability.",
+    "contradictory statements": "Flipping positions or denying previous claims.",
+    "control": "Attempts to restrict another person’s autonomy.",
+    "dismissiveness": "Disregarding or belittling someone’s feelings or needs.",
+    "gaslighting": "Manipulating someone into questioning their reality.",
+    "guilt tripping": "Using guilt to control or pressure.",
+    "insults": "Derogatory or demeaning language.",
+    "obscure language": "Vague, superior, or confusing language used manipulatively.",
+    "projection": "Accusing someone else of your own behaviors.",
+    "recovery phase": "Resetting tension without real change.",
+    "threat": "Using fear or harm to control or intimidate."
 }
 RISK_SNIPPETS = {
     "low": (
         "🟢 Risk Level: Low",
         "The language patterns here do not strongly indicate abuse.",
+        "Check in with yourself and monitor for repeated patterns."
     ),
     "moderate": (
         "⚠️ Risk Level: Moderate to High",
+        "Language includes control, guilt, or reversal tactics.",
+        "These patterns reduce self-trust. Document or talk with someone safe."
     ),
     "high": (
         "🛑 Risk Level: High",
+        "Strong indicators of coercive control or threat present.",
+        "Consider building a safety plan or contacting support."
     )
 }
 DARVO_PATTERNS = {
     "blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
 }
     "you’re attacking me", "i’m done trying", "i’m the only one who cares"
 ]
+ESCALATION_QUESTIONS = [
+    ("Partner has access to firearms or weapons", 4),
+    ("Partner threatened to kill you", 3),
+    ("Partner threatened you with a weapon", 3),
+    ("Partner has ever choked you, even if you considered it consensual at the time", 4),
+    ("Partner injured or threatened your pet(s)", 3),
+    ("Partner has broken your things, punched or kicked walls, or thrown things ", 2),
+    ("Partner forced or coerced you into unwanted sexual acts", 3),
+    ("Partner threatened to take away your children", 2),
+    ("Violence has increased in frequency or severity", 3),
+    ("Partner monitors your calls/GPS/social media", 2)
+]
 def detect_contradiction(message):
+    patterns = [
         (r"\b(i love you).{0,15}(i hate you|you ruin everything)", re.IGNORECASE),
         (r"\b(i’m sorry).{0,15}(but you|if you hadn’t)", re.IGNORECASE),
         (r"\b(i’m trying).{0,15}(you never|why do you)", re.IGNORECASE),
         (r"\b(i don’t care).{0,15}(you never think of me)", re.IGNORECASE),
         (r"\b(i guess i’m just).{0,15}(the bad guy|worthless|never enough)", re.IGNORECASE)
     ]
+    return any(re.search(p, message, flags) for p, flags in patterns)
 def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_found, contradiction_flag=False):
+    pattern_hits = len([p for p in patterns if p in DARVO_PATTERNS])
     pattern_score = pattern_hits / len(DARVO_PATTERNS)
     sentiment_shift_score = max(0.0, sentiment_after - sentiment_before)
+    motif_hits = len([m for m in motifs_found if m.lower() in DARVO_MOTIFS])
     motif_score = motif_hits / len(DARVO_MOTIFS)
     contradiction_score = 1.0 if contradiction_flag else 0.0
+    return round(min(0.3 * pattern_score + 0.3 * sentiment_shift_score + 0.25 * motif_score + 0.15 * contradiction_score, 1.0), 3)
+def generate_risk_snippet(score, top_label):
+    level = "high" if score >= 85 else "moderate" if score >= 60 else "low"
+    title, summary, advice = RISK_SNIPPETS[level]
+    return f"\n\n{title}\n{summary} (Pattern: **{top_label}**)\n💡 {advice}"
 def analyze_single_message(text, thresholds, motif_flags):
     motif_hits, matched_phrases = detect_motifs(text)
+    # Sentiment
     input_ids = sentiment_tokenizer(f"emotion: {text}", return_tensors="pt").input_ids
     with torch.no_grad():
+        sentiment_out = sentiment_model.generate(input_ids)
+    emotion = sentiment_tokenizer.decode(sentiment_out[0], skip_special_tokens=True).lower()
     sentiment = EMOTION_TO_SENTIMENT.get(emotion, "undermining")
     sentiment_score = 0.5 if sentiment == "undermining" else 0.0
+    # Adjust thresholds
+    adjusted_thresholds = {
+        k: v * 0.8 if sentiment == "undermining" else v * 1.2 if sentiment == "supportive" else v
+        for k, v in thresholds.items()
+    }
+    contradiction_flag = detect_contradiction(text)
+    motifs = [text for _, text in matched_phrases]
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         outputs = model(**inputs)
     scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
+    threshold_labels = [l for l, s in zip(LABELS, scores) if s > adjusted_thresholds[l]]
+    top_patterns = sorted(zip(LABELS, scores), key=lambda x: x[1], reverse=True)[:2]
     pattern_labels = threshold_labels + [label for label, _ in matched_phrases]
+    abuse_score = round(np.mean([s * PATTERN_WEIGHTS.get(l, 1.0) for l, s in top_patterns]) * 100, 2)
     darvo_score = calculate_darvo_score(pattern_labels, 0.0, sentiment_score, motifs, contradiction_flag)
+    return abuse_score, threshold_labels, top_patterns, darvo_score, {"label": sentiment, "emotion": emotion}
 def analyze_composite(msg1, msg2, msg3, *answers_and_none):
     responses = answers_and_none[:len(ESCALATION_QUESTIONS)]
     none_selected = answers_and_none[-1]
     results = [analyze_single_message(m, THRESHOLDS.copy(), []) for m in active]
     abuse_scores = [r[0] for r in results]
     darvo_scores = [r[3] for r in results]
+    top_label = max({label for r in results for label in r[2]}, key=lambda l: abuse_scores[0])
+    composite_abuse = int(round(sum(abuse_scores) / len(abuse_scores)))
+    avg_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
     out = f"Abuse Intensity: {composite_abuse}%\n"
+    out += f"Escalation Potential: {escalation_level} ({escalation_score}/{sum(w for _, w in ESCALATION_QUESTIONS)})"
+    out += generate_risk_snippet(composite_abuse, top_label)
     if avg_darvo > 0.25:
         level = "moderate" if avg_darvo < 0.65 else "high"
         out += f"\n\nDARVO Score: {avg_darvo} → This indicates a **{level} likelihood** of narrative reversal (DARVO), where the speaker may be denying, attacking, or reversing blame."
     return out
+textbox_inputs = [gr.Textbox(label=f"Message {i+1}") for i in range(3)]
 quiz_boxes = [gr.Checkbox(label=q) for q, _ in ESCALATION_QUESTIONS]
 none_box = gr.Checkbox(label="None of the above")