Spaces:

SamanthaStorm
/

Tether

Running on Zero

App Files Files Community

SamanthaStorm commited on Jun 2

Commit

8dc9e3c

verified ·

1 Parent(s): 90bf0d2

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -52

app.py CHANGED Viewed

@@ -2,14 +2,40 @@ import gradio as gr
 import spaces
 import torch
 import numpy as np
-from transformers import pipeline, RobertaForSequenceClassification, RobertaTokenizer
-from motif_tagging import detect_motifs
 import re
 import matplotlib.pyplot as plt
 import io
 from PIL import Image
 from datetime import datetime
-from transformers import pipeline as hf_pipeline  # prevent name collision with gradio pipeline
 def get_emotion_profile(text):
     emotions = emotion_pipeline(text)
@@ -87,9 +113,9 @@ THRESHOLDS = {
 }
 PATTERN_WEIGHTS = {
-    "recovery": 0.7,
     "control": 1.4,
-    "gaslighting": 1.50,
     "guilt tripping": 0.9,
     "dismissiveness": 0.9,
     "blame shifting": 0.8,
@@ -382,7 +408,6 @@ THREAT_MOTIFS = [
 ]
-@spaces.GPU
 @spaces.GPU
 def compute_abuse_score(matched_scores, sentiment):
     """
@@ -429,17 +454,15 @@ def compute_abuse_score(matched_scores, sentiment):
         base_score *= 1.05  # Reduced
     # Sentiment modifier (more nuanced)
-    if sentiment == "supportive":
-        manipulative_patterns = {'guilt tripping', 'gaslighting', 'blame shifting', 'love bombing'}
-        if any(label in manipulative_patterns for label, score, _ in matched_scores if score > 0.6): # Higher threshold
-            base_score *= 0.95  # Smaller reduction for strongly manipulative "support"
-        elif any(label in manipulative_patterns for label, score, _ in matched_scores if score > 0.4): # Moderate threshold
-            base_score *= 0.9  # Moderate reduction for manipulative "support"
-        else:
-            base_score *= 0.8  # Larger reduction for genuine support
-    elif sentiment == "undermining":
-        base_score *= 1.15
     # Reduce minimum score and threshold for activation
     if any(score > 0.9 for _, score, _ in matched_scores):  # Higher threshold
@@ -449,6 +472,7 @@ def compute_abuse_score(matched_scores, sentiment):
     return min(round(base_score, 1), 100.0)
 def analyze_single_message(text, thresholds):
     print("⚡ ENTERED analyze_single_message")
     stage = 1
@@ -459,19 +483,21 @@ def analyze_single_message(text, thresholds):
     sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
     # Get model scores
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         outputs = model(**inputs)
     scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
     # Sentiment override
     if emotion_profile.get("neutral", 0) > 0.85 and any(
-        scores[LABELS.index(l)] > thresholds[l]
-        for l in ["control", "blame shifting"]
     ):
         sentiment = "undermining"
     else:
-        sentiment = "undermining" if sentiment_score > 0.25 else "supportive"
     weapon_flag = detect_weapon_language(text)
@@ -486,10 +512,10 @@ def analyze_single_message(text, thresholds):
         label for label, score in zip(LABELS, scores)
         if score > adjusted_thresholds[label]
     ]
-    # Early exit if nothing passed
     if not threshold_labels:
-        return 0.0, [], [], {"label": sentiment}, 1, 0.0, "supportive"
     top_patterns = sorted(
         [(label, score) for label, score in zip(LABELS, scores)],
@@ -497,34 +523,25 @@ def analyze_single_message(text, thresholds):
         reverse=True
     )[:2]
-    matched_scores = [
-        (label, score, PATTERN_WEIGHTS.get(label, 1.0))
-        for label, score in zip(LABELS, scores)
-        if score > adjusted_thresholds[label]
-    ]
-    # Cap subtle insults to avoid excessive abuse score
-    if (
-        len(threshold_labels) == 1 and "insults" in threshold_labels
-        and emotion_profile.get("neutral", 0) > 0.85
-    ):
-        abuse_score_raw = min(abuse_score_raw, 40)
     # Abuse score
-    abuse_score_raw = compute_abuse_score(matched_scores, sentiment)
-    # Weapon adjustment
     if weapon_flag:
-        abuse_score_raw = min(abuse_score_raw + 25, 100)
         if stage < 2:
             stage = 2
-    abuse_score = min(abuse_score_raw, 100 if "control" in threshold_labels else 95)
-    # Tone tag
     tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, abuse_score)
     # Remove recovery tag if tone is fake
     if "recovery" in threshold_labels and tone_tag == "forced accountability flip":
         threshold_labels.remove("recovery")
@@ -563,7 +580,7 @@ def analyze_single_message(text, thresholds):
     return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag
-import spaces
 @spaces.GPU
 def analyze_composite(msg1, msg2, msg3, *answers_and_none):
@@ -612,7 +629,7 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
     immediate_threats = [detect_threat_motifs(m, THREAT_MOTIFS) for m, _ in active]
     flat_threats = [t for sublist in immediate_threats for t in sublist]
     threat_risk = "Yes" if flat_threats else "No"
-    results = [(analyze_single_message(m, THRESHOLDS.copy()), d) for m, d in active]
     abuse_scores = [r[0][0] for r in results]
     stages = [r[0][4] for r in results]
@@ -713,14 +730,18 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
             f"• Checklist Risk: {checklist_escalation_risk}\n"
             f"• Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
         )
-    # Composite Abuse Score
     composite_abuse_scores = []
-    for result, _ in results:
-        _, _, top_patterns, sentiment, _, _, _ = result
-        matched_scores = [(label, score, PATTERN_WEIGHTS.get(label, 1.0)) for label, score in top_patterns]
-        final_score = compute_abuse_score(matched_scores, sentiment["label"])
-        composite_abuse_scores.append(final_score)
-    composite_abuse = int(round(sum(composite_abuse_scores) / len(composite_abuse_scores)))
     most_common_stage = max(set(stages), key=stages.count)
     stage_text = RISK_STAGE_LABELS[most_common_stage]
@@ -763,7 +784,7 @@ def analyze_composite(msg1, msg2, msg3, *answers_and_none):
     pats[0][0] if (pats := r[0][2]) else "none"
     for r in results
     ]
-    timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, top_labels)
     out += "\n\n" + escalation_text
     return out, timeline_image

 import spaces
 import torch
 import numpy as np
 import re
 import matplotlib.pyplot as plt
 import io
 from PIL import Image
 from datetime import datetime
+from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
+from motif_tagging import detect_motifs
+from functools import lru_cache
+from torch.nn.functional import sigmoid
+# ----- Models -----
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Emotion model (CPU for stability)
+emotion_pipeline = pipeline(
+    "text-classification",
+    model="j-hartmann/emotion-english-distilroberta-base",
+    top_k=6,
+    truncation=True,
+    device=-1  # Force CPU usage
+)
+# Abuse Model
+model_name = "SamanthaStorm/tether-multilabel-v4"  # Or your HF Hub path
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
+model.to(device)
+# DARVO Model
+darvo_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1")
+darvo_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-darvo-regressor-v1", use_fast=False)
+darvo_model.eval()
+darvo_model.to(device)
 def get_emotion_profile(text):
     emotions = emotion_pipeline(text)
 }
 PATTERN_WEIGHTS = {
+    "recovery": 0.5,
     "control": 1.4,
+    "gaslighting": 1.0,
     "guilt tripping": 0.9,
     "dismissiveness": 0.9,
     "blame shifting": 0.8,
 ]
 @spaces.GPU
 def compute_abuse_score(matched_scores, sentiment):
     """
         base_score *= 1.05  # Reduced
     # Sentiment modifier (more nuanced)
+    if emotion_profile.get("neutral", 0) > 0.85 and any(
+        scores[LABELS.index(l)] > thresholds[l] * 0.8  # Scale down thresholds for neutral sentiment
+        for l in ["control", "blame shifting", "insults", "guilt tripping"] # Consider more labels
+    ):
+        sentiment = "undermining"  # Only override if multiple patterns are present with moderate confidence
+    elif sentiment_score > 0.35:  # Increased threshold
+        sentiment = "undermining"
+    else:
+        sentiment = "supportive"
     # Reduce minimum score and threshold for activation
     if any(score > 0.9 for _, score, _ in matched_scores):  # Higher threshold
     return min(round(base_score, 1), 100.0)
+@lru_cache(maxsize=1024)  # Cache results for performance
 def analyze_single_message(text, thresholds):
     print("⚡ ENTERED analyze_single_message")
     stage = 1
     sentiment_score = emotion_profile.get("anger", 0) + emotion_profile.get("disgust", 0)
     # Get model scores
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
     with torch.no_grad():
         outputs = model(**inputs)
     scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
     # Sentiment override
     if emotion_profile.get("neutral", 0) > 0.85 and any(
+        scores[LABELS.index(l)] > thresholds[l] * 0.8  # Scale down thresholds for neutral sentiment
+        for l in ["control", "blame shifting", "insults", "guilt tripping"]  # Consider more labels
     ):
+        sentiment = "undermining"  # Only override if multiple patterns are present with moderate confidence
+    elif sentiment_score > 0.35:  # Increased threshold
         sentiment = "undermining"
     else:
+        sentiment = "supportive"
     weapon_flag = detect_weapon_language(text)
         label for label, score in zip(LABELS, scores)
         if score > adjusted_thresholds[label]
     ]
+    matched_scores = [(label, score, PATTERN_WEIGHTS.get(label, 1.0)) for label, score in zip(LABELS, scores) if score > adjusted_thresholds[label]]
     if not threshold_labels:
+        return 0.0, [], [], {"label": sentiment}, 1, 0.0, None
     top_patterns = sorted(
         [(label, score) for label, score in zip(LABELS, scores)],
         reverse=True
     )[:2]
     # Abuse score
+    abuse_score = compute_abuse_score(matched_scores, sentiment) # Calculate before adjustments
     if weapon_flag:
+        abuse_score = min(abuse_score + 25, 100)  # Apply weapon adjustment directly to abuse_score
         if stage < 2:
             stage = 2
+    abuse_score = min(abuse_score, 100 if "control" in threshold_labels else 95) # Apply cap after weapon adjustment
     tone_tag = get_emotional_tone_tag(emotion_profile, sentiment, threshold_labels, abuse_score)
+    threshold_labels = [label for label, score in zip(LABELS, scores) if score > adjusted_thresholds[label]]
+    matched_scores = [(label, score, PATTERN_WEIGHTS.get(label, 1.0)) for label, score in zip(LABELS, scores) if score > adjusted_thresholds[label]]
+    if not threshold_labels:
+        return 0.0, [], [], {"label": sentiment}, 1, 0.0, None
     # Remove recovery tag if tone is fake
     if "recovery" in threshold_labels and tone_tag == "forced accountability flip":
         threshold_labels.remove("recovery")
     return abuse_score, threshold_labels, top_patterns, {"label": sentiment}, stage, darvo_score, tone_tag
 @spaces.GPU
 def analyze_composite(msg1, msg2, msg3, *answers_and_none):
     immediate_threats = [detect_threat_motifs(m, THREAT_MOTIFS) for m, _ in active]
     flat_threats = [t for sublist in immediate_threats for t in sublist]
     threat_risk = "Yes" if flat_threats else "No"
+    results = [(analyze_single_message(m.lower(), THRESHOLDS.copy()), d) for m, d in active]
     abuse_scores = [r[0][0] for r in results]
     stages = [r[0][4] for r in results]
             f"• Checklist Risk: {checklist_escalation_risk}\n"
             f"• Escalation Bump: +{escalation_bump} (from DARVO, tone, intensity, etc.)"
         )
+    # Composite Abuse Score (weighted average based on message length)
     composite_abuse_scores = []
+    message_lengths = [len(m.split()) for m, _ in active]
+    total_length = sum(message_lengths)
+    for result, length in zip(results, message_lengths):
+        abuse_score = result[0][0]
+        weight = length / total_length if total_length > 0 else 1 / len(results) if len(results) > 0 else 1
+        composite_abuse_scores.append(abuse_score * weight)
+    composite_abuse = int(round(sum(composite_abuse_scores)))
     most_common_stage = max(set(stages), key=stages.count)
     stage_text = RISK_STAGE_LABELS[most_common_stage]
     pats[0][0] if (pats := r[0][2]) else "none"
     for r in results
     ]
+    timeline_image = generate_abuse_score_chart(dates_used, abuse_scores, top_labels)
     out += "\n\n" + escalation_text
     return out, timeline_image