Spaces:

SamanthaStorm
/

Tether

Runtime error

App Files Files Community

SamanthaStorm commited on Apr 17

Commit

2f6ac5d

verified ·

1 Parent(s): 0e53c22

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -28

app.py CHANGED Viewed

@@ -1,17 +1,30 @@
 import gradio as gr
 import torch
 import numpy as np
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
 from transformers import RobertaForSequenceClassification, RobertaTokenizer
 from motif_tagging import detect_motifs
 import re
-# custom fine-tuned sentiment model
-sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
-sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment")
-# Load abuse pattern model
-model_name ="SamanthaStorm/autotrain-jlpi4-mllvp"
 model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
 tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
@@ -35,7 +48,13 @@ THRESHOLDS = {
     "threat": 0.25
 }
-PATTERN_LABELS = LABELS
 EXPLANATIONS = {
     "blame shifting": "Blame-shifting is when one person redirects responsibility onto someone else to avoid accountability.",
@@ -51,11 +70,6 @@ EXPLANATIONS = {
     "threat": "Threats use fear of harm (physical, emotional, or relational) to control or intimidate someone."
 }
-PATTERN_WEIGHTS = {
-    "gaslighting": 1.3, "control": 1.2, "dismissiveness": 0.8, "blame shifting": 0.8,
-    "contradictory statements": 0.75
-}
 RISK_SNIPPETS = {
     "low": (
         "🟢 Risk Level: Low",
@@ -82,12 +96,13 @@ def generate_risk_snippet(abuse_score, top_label):
     else:
         risk_level = "low"
     title, summary, advice = RISK_SNIPPETS[risk_level]
-    return f"\n\n{title}\n{summary} (Pattern: **{top_label}**)\n💡 {advice}"
-# --- DARVO Detection Tools ---
 DARVO_PATTERNS = {
     "blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
 }
 DARVO_MOTIFS = [
     "i guess i’m the bad guy", "after everything i’ve done", "you always twist everything",
     "so now it’s all my fault", "i’m the villain", "i’m always wrong", "you never listen",
@@ -125,17 +140,19 @@ def calculate_darvo_score(patterns, sentiment_before, sentiment_after, motifs_fo
     )
     return round(min(darvo_score, 1.0), 3)
 def custom_sentiment(text):
-    inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
-        outputs = sentiment_model(**inputs)
-        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
-        label_idx = torch.argmax(probs).item()
-    label_map = {0: "supportive", 1: "undermining"}
-    return {"label": label_map[label_idx], "score": probs[0][label_idx].item()}
 def calculate_abuse_level(scores, thresholds, motif_hits=None, flag_multiplier=1.0):
-    weighted_scores = [score * PATTERN_WEIGHTS.get(label, 1.0) for label, score in zip(LABELS, scores) if score > thresholds[label]]
     base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
     base_score *= flag_multiplier
     return min(base_score, 100.0)
@@ -143,12 +160,12 @@ def calculate_abuse_level(scores, thresholds, motif_hits=None, flag_multiplier=1
 def analyze_single_message(text, thresholds, motif_flags):
     motif_hits, matched_phrases = detect_motifs(text)
     sentiment = custom_sentiment(text)
-    sentiment_score = sentiment["score"] if sentiment["label"] == "undermining" else 0.0
-    # TEMP: print sentiment to console for debugging
-    print(f"Sentiment label: {sentiment['label']}, score: {sentiment['score']}")
-    adjusted_thresholds = {k: v * 0.8 for k, v in thresholds.items()} if sentiment['label'] == "undermining" else thresholds.copy()
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
@@ -160,13 +177,16 @@ def analyze_single_message(text, thresholds, motif_flags):
     pattern_labels_used = list(set(threshold_labels + phrase_labels))
     abuse_level = calculate_abuse_level(scores, adjusted_thresholds, motif_hits)
-    top_patterns = sorted([(label, score) for label, score in zip(LABELS, scores)], key=lambda x: x[1], reverse=True)[:2]
     motif_phrases = [text for _, text in matched_phrases]
     contradiction_flag = detect_contradiction(text)
     darvo_score = calculate_darvo_score(pattern_labels_used, 0.0, sentiment_score, motif_phrases, contradiction_flag)
     return abuse_level, pattern_labels_used, top_patterns, darvo_score, sentiment
 def analyze_composite(msg1, msg2, msg3, flags):
     thresholds = THRESHOLDS
     messages = [msg1, msg2, msg3]
@@ -180,15 +200,17 @@ def analyze_composite(msg1, msg2, msg3, flags):
         print(f"Message: {m}")
         print(f"Sentiment result: {result[4]}")
         results.append(result)
     abuse_scores = [r[0] for r in results]
     darvo_scores = [r[3] for r in results]
     average_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
     base_score = sum(abuse_scores) / len(abuse_scores)
     label_sets = [[label for label, _ in r[2]] for r in results]
     label_counts = {label: sum(label in s for s in label_sets) for label in set().union(*label_sets)}
     top_label = max(label_counts.items(), key=lambda x: x[1])
     top_explanation = EXPLANATIONS.get(top_label[0], "")
-    danger_weight = 5
     flag_weights = {
         "They've threatened harm": 6,
         "They isolate me": 5,
@@ -196,6 +218,7 @@ def analyze_composite(msg1, msg2, msg3, flags):
         "They monitor/follow me": 4,
         "I feel unsafe when alone with them": 6
     }
     flag_boost = sum(flag_weights.get(f, 3) for f in flags) / len(active_messages)
     composite_score = min(base_score + flag_boost, 100)
     if len(active_messages) == 1:
@@ -203,6 +226,7 @@ def analyze_composite(msg1, msg2, msg3, flags):
     elif len(active_messages) == 2:
         composite_score *= 0.93
     composite_score = round(min(composite_score, 100), 2)
     result = f"These messages show a pattern of **{top_label[0]}** and are estimated to be {composite_score}% likely abusive."
     if top_explanation:
         result += f"\n• {top_explanation}"
@@ -212,6 +236,7 @@ def analyze_composite(msg1, msg2, msg3, flags):
     result += generate_risk_snippet(composite_score, top_label[0])
     return result
 textbox_inputs = [
     gr.Textbox(label="Message 1"),
     gr.Textbox(label="Message 2"),

 import gradio as gr
 import torch
 import numpy as np
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from transformers import RobertaForSequenceClassification, RobertaTokenizer
 from motif_tagging import detect_motifs
 import re
+# --- Sentiment Model: T5-based Emotion Classifier ---
+sentiment_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-emotion")
+sentiment_model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-emotion")
+EMOTION_TO_SENTIMENT = {
+    "joy": "supportive",
+    "love": "supportive",
+    "surprise": "supportive",
+    "neutral": "supportive",
+    "sadness": "undermining",
+    "anger": "undermining",
+    "fear": "undermining",
+    "disgust": "undermining",
+    "shame": "undermining",
+    "guilt": "undermining"
+}
+# --- Abuse Detection Model ---
+model_name = "SamanthaStorm/autotrain-jlpi4-mllvp"
 model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
 tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
     "threat": 0.25
 }
+PATTERN_WEIGHTS = {
+    "gaslighting": 1.3,
+    "control": 1.2,
+    "dismissiveness": 0.8,
+    "blame shifting": 0.8,
+    "contradictory statements": 0.75
+}
 EXPLANATIONS = {
     "blame shifting": "Blame-shifting is when one person redirects responsibility onto someone else to avoid accountability.",
     "threat": "Threats use fear of harm (physical, emotional, or relational) to control or intimidate someone."
 }
 RISK_SNIPPETS = {
     "low": (
         "🟢 Risk Level: Low",
     else:
         risk_level = "low"
     title, summary, advice = RISK_SNIPPETS[risk_level]
+    return f"\n\n{title}\n{summary} (Pattern: {top_label})\n💡 {advice}"
+# --- DARVO Detection ---
 DARVO_PATTERNS = {
     "blame shifting", "projection", "dismissiveness", "guilt tripping", "contradictory statements"
 }
 DARVO_MOTIFS = [
     "i guess i’m the bad guy", "after everything i’ve done", "you always twist everything",
     "so now it’s all my fault", "i’m the villain", "i’m always wrong", "you never listen",
     )
     return round(min(darvo_score, 1.0), 3)
+# --- Sentiment Mapping ---
 def custom_sentiment(text):
+    input_ids = sentiment_tokenizer(f"emotion: {text}", return_tensors="pt").input_ids
     with torch.no_grad():
+        outputs = sentiment_model.generate(input_ids)
+    emotion = sentiment_tokenizer.decode(outputs[0], skip_special_tokens=True).strip().lower()
+    sentiment = EMOTION_TO_SENTIMENT.get(emotion, "undermining")
+    return {"label": sentiment, "emotion": emotion}
+# --- Abuse Analysis Core ---
 def calculate_abuse_level(scores, thresholds, motif_hits=None, flag_multiplier=1.0):
+    weighted_scores = [score * PATTERN_WEIGHTS.get(label, 1.0)
+                       for label, score in zip(LABELS, scores) if score > thresholds[label]]
     base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
     base_score *= flag_multiplier
     return min(base_score, 100.0)
 def analyze_single_message(text, thresholds, motif_flags):
     motif_hits, matched_phrases = detect_motifs(text)
     sentiment = custom_sentiment(text)
+    sentiment_score = 0.5 if sentiment["label"] == "undermining" else 0.0
+    print(f"Detected emotion: {sentiment['emotion']} → sentiment: {sentiment['label']}")
+    adjusted_thresholds = {
+        k: v * 0.8 for k, v in thresholds.items()
+    } if sentiment["label"] == "undermining" else thresholds.copy()
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
     pattern_labels_used = list(set(threshold_labels + phrase_labels))
     abuse_level = calculate_abuse_level(scores, adjusted_thresholds, motif_hits)
+    top_patterns = sorted([(label, score) for label, score in zip(LABELS, scores)],
+                          key=lambda x: x[1], reverse=True)[:2]
     motif_phrases = [text for _, text in matched_phrases]
     contradiction_flag = detect_contradiction(text)
     darvo_score = calculate_darvo_score(pattern_labels_used, 0.0, sentiment_score, motif_phrases, contradiction_flag)
     return abuse_level, pattern_labels_used, top_patterns, darvo_score, sentiment
+# --- Composite Message Analysis ---
 def analyze_composite(msg1, msg2, msg3, flags):
     thresholds = THRESHOLDS
     messages = [msg1, msg2, msg3]
         print(f"Message: {m}")
         print(f"Sentiment result: {result[4]}")
         results.append(result)
     abuse_scores = [r[0] for r in results]
     darvo_scores = [r[3] for r in results]
     average_darvo = round(sum(darvo_scores) / len(darvo_scores), 3)
     base_score = sum(abuse_scores) / len(abuse_scores)
     label_sets = [[label for label, _ in r[2]] for r in results]
     label_counts = {label: sum(label in s for s in label_sets) for label in set().union(*label_sets)}
     top_label = max(label_counts.items(), key=lambda x: x[1])
     top_explanation = EXPLANATIONS.get(top_label[0], "")
     flag_weights = {
         "They've threatened harm": 6,
         "They isolate me": 5,
         "They monitor/follow me": 4,
         "I feel unsafe when alone with them": 6
     }
     flag_boost = sum(flag_weights.get(f, 3) for f in flags) / len(active_messages)
     composite_score = min(base_score + flag_boost, 100)
     if len(active_messages) == 1:
     elif len(active_messages) == 2:
         composite_score *= 0.93
     composite_score = round(min(composite_score, 100), 2)
     result = f"These messages show a pattern of **{top_label[0]}** and are estimated to be {composite_score}% likely abusive."
     if top_explanation:
         result += f"\n• {top_explanation}"
     result += generate_risk_snippet(composite_score, top_label[0])
     return result
+# --- Gradio Interface ---
 textbox_inputs = [
     gr.Textbox(label="Message 1"),
     gr.Textbox(label="Message 2"),