""" Bias detection processor for analyzing political bias in text responses """ import nltk from nltk.sentiment import SentimentIntensityAnalyzer from sklearn.feature_extraction.text import CountVectorizer import re import json import os import numpy as np import logging # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger('bias_detection') # Ensure NLTK resources are available def download_nltk_resources(): """Download required NLTK resources if not already downloaded""" try: logger.info("Downloading NLTK resources for bias detection...") nltk.download('vader_lexicon', quiet=True) nltk.download('punkt', quiet=True) nltk.download('stopwords', quiet=True) except Exception as e: logger.error(f"Error downloading NLTK resources: {e}") pass download_nltk_resources() # Dictionary of partisan-leaning words # These are simplified examples; a real implementation would use a more comprehensive lexicon PARTISAN_WORDS = { "liberal": [ "progressive", "equity", "climate", "reform", "collective", "diversity", "inclusive", "sustainable", "justice", "regulation", "equity", "social", "community", "rights", "environment", "equality", "welfare", "public", "protection", "universal" ], "conservative": [ "traditional", "freedom", "liberty", "individual", "faith", "values", "efficient", "deregulation", "patriot", "security", "family", "business", "market", "nation", "protect", "heritage", "responsibility", "constitution", "fiscal", "private" ] } # Dictionary of framing patterns FRAMING_PATTERNS = { "economic": [ r"econom(y|ic|ics)", r"tax(es|ation)", r"budget", r"spend(ing)", r"jobs?", r"wage", r"growth", r"inflation", r"invest(ment)?" ], "moral": [ r"values?", r"ethic(s|al)", r"moral(s|ity)", r"right(s|eous)", r"wrong", r"good", r"bad", r"faith", r"belief", r"tradition(al)?" ], "security": [ r"secur(e|ity)", r"defense", r"protect(ion)?", r"threat", r"danger(ous)?", r"safe(ty)?", r"nation(al)?", r"terror(ism|ist)" ], "social_welfare": [ r"health(care)?", r"education", r"welfare", r"benefit", r"program", r"help", r"assist(ance)?", r"support", r"service", r"care" ] } def detect_sentiment_bias(text): """ Analyze the sentiment of a text to identify potential bias Args: text (str): The text to analyze Returns: dict: Sentiment analysis results """ try: logger.info(f"Starting sentiment analysis on text (length: {len(text)})") sia = SentimentIntensityAnalyzer() sentiment = sia.polarity_scores(text) # Determine if sentiment indicates bias if sentiment['compound'] >= 0.25: bias_direction = "positive" bias_strength = min(1.0, sentiment['compound'] * 2) # Scale to 0-1 elif sentiment['compound'] <= -0.25: bias_direction = "negative" bias_strength = min(1.0, abs(sentiment['compound'] * 2)) # Scale to 0-1 else: bias_direction = "neutral" bias_strength = 0.0 logger.info(f"Sentiment analysis complete. Direction: {bias_direction}, Strength: {bias_strength:.2f}") return { "sentiment_scores": sentiment, "bias_direction": bias_direction, "bias_strength": bias_strength } except Exception as e: logger.error(f"Error in sentiment analysis: {str(e)}") # Return a default neutral sentiment to prevent failures return { "sentiment_scores": {"pos": 0, "neg": 0, "neu": 1, "compound": 0}, "bias_direction": "neutral", "bias_strength": 0.0, "error": str(e) } def detect_partisan_leaning(text): """ Analyze text for partisan-leaning language Args: text (str): The text to analyze Returns: dict: Partisan leaning analysis results """ try: logger.info(f"Starting partisan leaning analysis on text (length: {len(text)})") text_lower = text.lower() # Count partisan words liberal_count = 0 conservative_count = 0 liberal_matches = [] conservative_matches = [] # Search for partisan words in text for word in PARTISAN_WORDS["liberal"]: matches = re.findall(r'\b' + word + r'\b', text_lower) if matches: liberal_count += len(matches) liberal_matches.extend(matches) for word in PARTISAN_WORDS["conservative"]: matches = re.findall(r'\b' + word + r'\b', text_lower) if matches: conservative_count += len(matches) conservative_matches.extend(matches) logger.info(f"Found {liberal_count} liberal terms and {conservative_count} conservative terms") # Calculate partisan lean score (-1 to 1, negative = liberal, positive = conservative) total_count = liberal_count + conservative_count if total_count > 0: lean_score = (conservative_count - liberal_count) / total_count else: lean_score = 0 # Determine leaning based on score if lean_score <= -0.2: leaning = "liberal" strength = min(1.0, abs(lean_score * 2)) elif lean_score >= 0.2: leaning = "conservative" strength = min(1.0, lean_score * 2) else: leaning = "balanced" strength = 0.0 logger.info(f"Partisan analysis complete. Leaning: {leaning}, Score: {lean_score:.2f}") return { "liberal_count": liberal_count, "conservative_count": conservative_count, "liberal_terms": liberal_matches, "conservative_terms": conservative_matches, "lean_score": lean_score, "leaning": leaning, "strength": strength } except Exception as e: logger.error(f"Error in partisan leaning analysis: {str(e)}") # Return default balanced values to prevent failures return { "liberal_count": 0, "conservative_count": 0, "liberal_terms": [], "conservative_terms": [], "lean_score": 0, "leaning": "balanced", "strength": 0.0, "error": str(e) } def detect_framing_bias(text): """ Analyze how the text frames issues Args: text (str): The text to analyze Returns: dict: Framing analysis results """ try: logger.info(f"Starting framing analysis on text (length: {len(text)})") text_lower = text.lower() framing_counts = {} framing_examples = {} # Count framing patterns for frame, patterns in FRAMING_PATTERNS.items(): framing_counts[frame] = 0 framing_examples[frame] = [] for pattern in patterns: matches = re.findall(pattern, text_lower) if matches: framing_counts[frame] += len(matches) # Store up to 5 examples of each frame unique_matches = set(matches) framing_examples[frame].extend(list(unique_matches)[:5]) logger.info(f"Frame counts: {framing_counts}") # Calculate dominant frame total_framing = sum(framing_counts.values()) framing_distribution = {} if total_framing > 0: for frame, count in framing_counts.items(): framing_distribution[frame] = count / total_framing dominant_frame = max(framing_counts.items(), key=lambda x: x[1])[0] frame_bias_strength = max(0.0, framing_distribution[dominant_frame] - 0.25) else: dominant_frame = "none" frame_bias_strength = 0.0 framing_distribution = {frame: 0.0 for frame in FRAMING_PATTERNS.keys()} logger.info(f"Framing analysis complete. Dominant frame: {dominant_frame}") return { "framing_counts": framing_counts, "framing_examples": framing_examples, "framing_distribution": framing_distribution, "dominant_frame": dominant_frame, "frame_bias_strength": frame_bias_strength } except Exception as e: logger.error(f"Error in framing analysis: {str(e)}") # Return default values to prevent failures return { "framing_counts": {frame: 0 for frame in FRAMING_PATTERNS.keys()}, "framing_examples": {frame: [] for frame in FRAMING_PATTERNS.keys()}, "framing_distribution": {frame: 0.0 for frame in FRAMING_PATTERNS.keys()}, "dominant_frame": "none", "frame_bias_strength": 0.0, "error": str(e) } def compare_bias(text1, text2, model_names=None): """ Compare potential bias in two texts Args: text1 (str): First text to analyze text2 (str): Second text to analyze model_names (list): Optional names of models being compared Returns: dict: Comparative bias analysis """ logger.info(f"Starting bias comparison analysis") logger.info(f"Text lengths - Text1: {len(text1)}, Text2: {len(text2)}") # Set default model names if not provided if model_names is None or len(model_names) < 2: logger.info(f"Using default model names") model_names = ["Model 1", "Model 2"] else: logger.info(f"Using provided model names: {model_names}") model1_name, model2_name = model_names[0], model_names[1] try: # Analyze each text sentiment_results1 = detect_sentiment_bias(text1) sentiment_results2 = detect_sentiment_bias(text2) partisan_results1 = detect_partisan_leaning(text1) partisan_results2 = detect_partisan_leaning(text2) framing_results1 = detect_framing_bias(text1) framing_results2 = detect_framing_bias(text2) # Determine if there's a significant difference in bias sentiment_difference = abs(sentiment_results1["bias_strength"] - sentiment_results2["bias_strength"]) # For partisan leaning, compare the scores (negative is liberal, positive is conservative) partisan_difference = abs(partisan_results1["lean_score"] - partisan_results2["lean_score"]) # Calculate overall bias difference overall_difference = (sentiment_difference + partisan_difference) / 2 # Compare dominant frames frame_difference = framing_results1["dominant_frame"] != framing_results2["dominant_frame"] and \ (framing_results1["frame_bias_strength"] > 0.1 or framing_results2["frame_bias_strength"] > 0.1) logger.info(f"Differences calculated - Sentiment: {sentiment_difference:.2f}, Partisan: {partisan_difference:.2f}") # Create comparative analysis comparative = { "sentiment": { model1_name: sentiment_results1["bias_direction"], model2_name: sentiment_results2["bias_direction"], "difference": sentiment_difference, "significant": sentiment_difference > 0.3 }, "partisan": { model1_name: partisan_results1["leaning"], model2_name: partisan_results2["leaning"], "difference": partisan_difference, "significant": partisan_difference > 0.4 }, "framing": { model1_name: framing_results1["dominant_frame"], model2_name: framing_results2["dominant_frame"], "different_frames": frame_difference }, "overall": { "difference": overall_difference, "significant_bias_difference": overall_difference > 0.35 } } # Assemble the complete result result = { "models": model_names, model1_name: { "sentiment": sentiment_results1, "partisan": partisan_results1, "framing": framing_results1 }, model2_name: { "sentiment": sentiment_results2, "partisan": partisan_results2, "framing": framing_results2 }, "comparative": comparative } logger.info(f"Bias comparison complete. Result has {len(result)} top-level keys.") logger.info(f"Result keys: {result.keys()}") return result except Exception as e: import traceback error_msg = f"Error in bias comparison: {str(e)}\n{traceback.format_exc()}" logger.error(error_msg) # Return an error result that won't break visualization return { "models": model_names, "error": str(e), "comparative": { "sentiment": {model1_name: "neutral", model2_name: "neutral", "difference": 0, "significant": False}, "partisan": {model1_name: "balanced", model2_name: "balanced", "difference": 0, "significant": False}, "framing": {model1_name: "none", model2_name: "none", "different_frames": False}, "overall": {"difference": 0, "significant_bias_difference": False} }, model1_name: { "sentiment": {"bias_direction": "neutral", "bias_strength": 0}, "partisan": {"leaning": "balanced", "strength": 0}, "framing": {"dominant_frame": "none"} }, model2_name: { "sentiment": {"bias_direction": "neutral", "bias_strength": 0}, "partisan": {"leaning": "balanced", "strength": 0}, "framing": {"dominant_frame": "none"} } }