import spacy from textblob import TextBlob nlp = spacy.load('en_core_web_sm') # Define comprehensive biased terms/phrases biased_terms = [ "motherhood", "fatherhood", "stay-at-home parent", "single parent", "working mom", "working dad", "manpower", "man-hours", "man-made", "young", "old", "youthful", "elderly", "fresh", "experienced", "seasoned", "retirement", "pensioner", "generation gap", "junior", "senior", "race", "ethnicity", "color", "origin", "black", "white", "Asian", "Hispanic", "minority", "majority", "ethnic", "racial", "caucasian", "African-American", "Latino", "foreigner", "native", "immigrant", "rich", "poor", "wealthy", "impoverished", "affluent", "destitute", "low-income", "high-income", "upper class", "lower class", "social status", "blue-collar", "white-collar", "able-bodied", "disabled", "handicapped", "impaired", "crippled", "invalid", "wheelchair-bound", "mentally challenged", "deaf", "blind", "religion", "faith", "belief", "Christian", "Muslim", "Hindu", "Jewish", "atheist", "agnostic", "god", "divine", "holy", "sacred", "gay", "lesbian", "bisexual", "heterosexual", "LGBT", "LGBTQIA", "coming out", "partner", "same-sex", "straight", "homosexual", "transgender", "married", "single", "divorced", "widowed", "husband", "wife", "spouse", "children", "kids", "family", "dumb", "homemaker", "breadwinner", "caretaker", "guardian", "dependent", "accomplished", "inexperienced", "intermediate", "novice", "beginner", "skilled", "talented", "gifted", "active", "energetic", "lively", "vigorous", "enthusiastic", "spirited", "dynamic", "passive", "inactive", "lethargic", "sluggish", "apathetic", "unmotivated", "introvert", "extrovert", "ambivert", "shy", "outgoing", "sociable", "reserved", "gregarious", "optimistic", "pessimistic", "realistic", "pragmatic", "idealistic", "dreamer", "curious", "inquisitive", "interested", "uninterested", "indifferent", "apathetic", "brave", "courageous", "fearless", "bold", "daring", "audacious", "intrepid", "scared", "frightened", "afraid", "timid", "cowardly", "nervous", "anxious", "happy", "joyful", "cheerful", "content", "delighted", "pleased", "ecstatic", "sad", "unhappy", "sorrowful", "depressed", "miserable", "melancholic", "angry", "furious", "irate", "enraged", "mad", "upset", "annoyed", "frustrated", "calm", "peaceful", "serene", "tranquil", "relaxed", "composed", "collected", "confident", "assured", "self-assured", "self-confident", "assertive", "bold", "insecure", "self-doubting", "unconfident", "hesitant", "tentative", "loyal", "faithful", "trustworthy", "reliable", "dependable", "disloyal", "unfaithful", "untrustworthy", "unreliable", "generous", "kind", "benevolent", "charitable", "philanthropic", "magnanimous", "selfish", "greedy", "stingy", "miserly", "self-centered", "egotistical", "intelligent", "smart", "clever", "wise", "knowledgeable", "brilliant", "dumb", "stupid", "foolish", "ignorant", "unintelligent", "beautiful", "attractive", "handsome", "pretty", "gorgeous", "ugly", "unattractive", "plain", "homely", "unsightly" ] def screen_for_bias(question): doc = nlp(question) for token in doc: if token.text.lower() in biased_terms: return False # Question is biased return True # Question is unbiased def screen_for_offensive_language(question): sentiment = TextBlob(question).sentiment if sentiment.polarity < -0.5: # Threshold for negative sentiment return False # Question is offensive return True # Question is not offensive def screen_questions(questions): """ Screens a list of questions for bias and offensive language. Returns a tuple: (valid_questions, invalid_questions, accuracy) where accuracy is the ratio of valid questions to total questions. """ valid_questions = [] invalid_questions = [] validity = [] for question in questions: if screen_for_bias(question) and screen_for_offensive_language(question): valid_questions.append(question) validity.append(0) else: invalid_questions.append(question) validity.append(1) accuracy = len(valid_questions) / len(questions) if questions else 0 return valid_questions, invalid_questions, accuracy, validity if __name__ == "__main__": # For testing purposes: use a sample list of 4 questions. generated_questions = [ "What motivated you to apply for this role?", "How do you handle tight deadlines and manage stress?", "Can you describe a challenging project you worked on?", "Do you think being young gives you an edge in today's market?" ] valid, invalid, acc = screen_questions(generated_questions) print("Valid Questions:") for q in valid: print(q) print("\nInvalid Questions:") for q in invalid: print(q) print('Accuracy is ', acc * 100)