Spaces:

RyanS974
/

525GradioApp

Sleeping

525GradioApp / processors /bias_detection.py

Ryan

update

c74b269 4 months ago

9.02 kB

	"""
	Bias detection processor for analyzing political bias in text responses
	"""
	import nltk
	from nltk.sentiment import SentimentIntensityAnalyzer
	from sklearn.feature_extraction.text import CountVectorizer
	import re
	import json
	import os
	import numpy as np

	# Ensure NLTK resources are available
	def download_nltk_resources():
	"""Download required NLTK resources if not already downloaded"""
	try:
	nltk.download('vader_lexicon', quiet=True)
	except:
	pass

	download_nltk_resources()

	# Dictionary of partisan-leaning words
	# These are simplified examples; a real implementation would use a more comprehensive lexicon
	PARTISAN_WORDS = {
	"liberal": [
	"progressive", "equity", "climate", "reform", "collective",
	"diversity", "inclusive", "sustainable", "justice", "regulation"
	],
	"conservative": [
	"traditional", "freedom", "liberty", "individual", "faith",
	"values", "efficient", "deregulation", "patriot", "security"
	]
	}

	# Dictionary of framing patterns
	FRAMING_PATTERNS = {
	"economic": [
	r"econom(y\|ic\|ics)", r"tax(es\|ation)", r"budget", r"spend(ing)",
	r"jobs?", r"wage", r"growth", r"inflation", r"invest(ment)?"
	],
	"moral": [
	r"values?", r"ethic(s\|al)", r"moral(s\|ity)", r"right(s\|eous)",
	r"wrong", r"good", r"bad", r"faith", r"belief", r"tradition(al)?"
	],
	"security": [
	r"secur(e\|ity)", r"defense", r"protect(ion)?", r"threat",
	r"danger(ous)?", r"safe(ty)?", r"nation(al)?", r"terror(ism\|ist)"
	],
	"social_welfare": [
	r"health(care)?", r"education", r"welfare", r"benefit", r"program",
	r"help", r"assist(ance)?", r"support", r"service", r"care"
	]
	}

	def detect_sentiment_bias(text):
	"""
	Analyze the sentiment of a text to identify potential bias

	Args:
	text (str): The text to analyze

	Returns:
	dict: Sentiment analysis results
	"""
	sia = SentimentIntensityAnalyzer()
	sentiment = sia.polarity_scores(text)

	# Determine if sentiment indicates bias
	if sentiment['compound'] >= 0.25:
	bias_direction = "positive"
	bias_strength = min(1.0, sentiment['compound'] * 2) # Scale to 0-1
	elif sentiment['compound'] <= -0.25:
	bias_direction = "negative"
	bias_strength = min(1.0, abs(sentiment['compound'] * 2)) # Scale to 0-1
	else:
	bias_direction = "neutral"
	bias_strength = 0.0

	return {
	"sentiment_scores": sentiment,
	"bias_direction": bias_direction,
	"bias_strength": bias_strength
	}

	def detect_partisan_leaning(text):
	"""
	Analyze text for partisan-leaning language

	Args:
	text (str): The text to analyze

	Returns:
	dict: Partisan leaning analysis results
	"""
	text_lower = text.lower()

	# Count partisan words
	liberal_count = 0
	conservative_count = 0

	liberal_matches = []
	conservative_matches = []

	# Search for partisan words in text
	for word in PARTISAN_WORDS["liberal"]:
	matches = re.findall(r'\b' + word + r'\b', text_lower)
	if matches:
	liberal_count += len(matches)
	liberal_matches.extend(matches)

	for word in PARTISAN_WORDS["conservative"]:
	matches = re.findall(r'\b' + word + r'\b', text_lower)
	if matches:
	conservative_count += len(matches)
	conservative_matches.extend(matches)

	# Calculate partisan lean score (-1 to 1, negative = liberal, positive = conservative)
	total_count = liberal_count + conservative_count
	if total_count > 0:
	lean_score = (conservative_count - liberal_count) / total_count
	else:
	lean_score = 0

	# Determine leaning based on score
	if lean_score <= -0.2:
	leaning = "liberal"
	strength = min(1.0, abs(lean_score * 2))
	elif lean_score >= 0.2:
	leaning = "conservative"
	strength = min(1.0, lean_score * 2)
	else:
	leaning = "balanced"
	strength = 0.0

	return {
	"liberal_count": liberal_count,
	"conservative_count": conservative_count,
	"liberal_terms": liberal_matches,
	"conservative_terms": conservative_matches,
	"lean_score": lean_score,
	"leaning": leaning,
	"strength": strength
	}

	def detect_framing_bias(text):
	"""
	Analyze how the text frames issues

	Args:
	text (str): The text to analyze

	Returns:
	dict: Framing analysis results
	"""
	text_lower = text.lower()
	framing_counts = {}
	framing_examples = {}

	# Count framing patterns
	for frame, patterns in FRAMING_PATTERNS.items():
	framing_counts[frame] = 0
	framing_examples[frame] = []

	for pattern in patterns:
	matches = re.findall(pattern, text_lower)
	if matches:
	framing_counts[frame] += len(matches)
	# Store up to 5 examples of each frame
	unique_matches = set(matches)
	framing_examples[frame].extend(list(unique_matches)[:5])

	# Calculate dominant frame
	total_framing = sum(framing_counts.values())
	framing_distribution = {}

	if total_framing > 0:
	for frame, count in framing_counts.items():
	framing_distribution[frame] = count / total_framing

	dominant_frame = max(framing_counts.items(), key=lambda x: x[1])[0]
	frame_bias_strength = max(0.0, framing_distribution[dominant_frame] - 0.25)
	else:
	dominant_frame = "none"
	frame_bias_strength = 0.0
	framing_distribution = {frame: 0.0 for frame in FRAMING_PATTERNS.keys()}

	return {
	"framing_counts": framing_counts,
	"framing_examples": framing_examples,
	"framing_distribution": framing_distribution,
	"dominant_frame": dominant_frame,
	"frame_bias_strength": frame_bias_strength
	}

	def compare_bias(text1, text2, model_names=None):
	"""
	Compare potential bias in two texts

	Args:
	text1 (str): First text to analyze
	text2 (str): Second text to analyze
	model_names (list): Optional names of models being compared

	Returns:
	dict: Comparative bias analysis
	"""
	# Set default model names if not provided
	if model_names is None or len(model_names) < 2:
	model_names = ["Model 1", "Model 2"]

	model1_name, model2_name = model_names[0], model_names[1]

	# Analyze each text
	sentiment_results1 = detect_sentiment_bias(text1)
	sentiment_results2 = detect_sentiment_bias(text2)

	partisan_results1 = detect_partisan_leaning(text1)
	partisan_results2 = detect_partisan_leaning(text2)

	framing_results1 = detect_framing_bias(text1)
	framing_results2 = detect_framing_bias(text2)

	# Determine if there's a significant difference in bias
	sentiment_difference = abs(sentiment_results1["bias_strength"] - sentiment_results2["bias_strength"])

	# For partisan leaning, compare the scores (negative is liberal, positive is conservative)
	partisan_difference = abs(partisan_results1["lean_score"] - partisan_results2["lean_score"])

	# Calculate overall bias difference
	overall_difference = (sentiment_difference + partisan_difference) / 2

	# Compare dominant frames
	frame_difference = framing_results1["dominant_frame"] != framing_results2["dominant_frame"] and \
	(framing_results1["frame_bias_strength"] > 0.1 or framing_results2["frame_bias_strength"] > 0.1)

	# Create comparative analysis
	comparative = {
	"sentiment": {
	model1_name: sentiment_results1["bias_direction"],
	model2_name: sentiment_results2["bias_direction"],
	"difference": sentiment_difference,
	"significant": sentiment_difference > 0.3
	},
	"partisan": {
	model1_name: partisan_results1["leaning"],
	model2_name: partisan_results2["leaning"],
	"difference": partisan_difference,
	"significant": partisan_difference > 0.4
	},
	"framing": {
	model1_name: framing_results1["dominant_frame"],
	model2_name: framing_results2["dominant_frame"],
	"different_frames": frame_difference
	},
	"overall": {
	"difference": overall_difference,
	"significant_bias_difference": overall_difference > 0.35
	}
	}

	return {
	"models": model_names,
	model1_name: {
	"sentiment": sentiment_results1,
	"partisan": partisan_results1,
	"framing": framing_results1
	},
	model2_name: {
	"sentiment": sentiment_results2,
	"partisan": partisan_results2,
	"framing": framing_results2
	},
	"comparative": comparative
	}