tronskel's picture
Upload 46 files
2875866 verified
import spacy
from textblob import TextBlob
nlp = spacy.load('en_core_web_sm')
# Define comprehensive biased terms/phrases
biased_terms = [
"motherhood", "fatherhood", "stay-at-home parent", "single parent", "working mom", "working dad",
"manpower", "man-hours", "man-made",
"young", "old", "youthful", "elderly", "fresh", "experienced", "seasoned", "retirement", "pensioner",
"generation gap", "junior", "senior",
"race", "ethnicity", "color", "origin", "black", "white", "Asian", "Hispanic", "minority", "majority", "ethnic", "racial", "caucasian", "African-American", "Latino", "foreigner", "native", "immigrant",
"rich", "poor", "wealthy", "impoverished", "affluent", "destitute", "low-income", "high-income", "upper class", "lower class", "social status", "blue-collar", "white-collar",
"able-bodied", "disabled", "handicapped", "impaired", "crippled", "invalid", "wheelchair-bound", "mentally challenged", "deaf", "blind",
"religion", "faith", "belief", "Christian", "Muslim", "Hindu", "Jewish", "atheist", "agnostic", "god", "divine", "holy", "sacred",
"gay", "lesbian", "bisexual", "heterosexual", "LGBT", "LGBTQIA", "coming out", "partner", "same-sex", "straight", "homosexual", "transgender",
"married", "single", "divorced", "widowed", "husband", "wife", "spouse", "children", "kids", "family",
"dumb", "homemaker", "breadwinner", "caretaker", "guardian", "dependent",
"accomplished", "inexperienced", "intermediate", "novice", "beginner", "skilled", "talented", "gifted",
"active", "energetic", "lively", "vigorous", "enthusiastic", "spirited", "dynamic",
"passive", "inactive", "lethargic", "sluggish", "apathetic", "unmotivated",
"introvert", "extrovert", "ambivert", "shy", "outgoing", "sociable", "reserved", "gregarious",
"optimistic", "pessimistic", "realistic", "pragmatic", "idealistic", "dreamer",
"curious", "inquisitive", "interested", "uninterested", "indifferent", "apathetic",
"brave", "courageous", "fearless", "bold", "daring", "audacious", "intrepid",
"scared", "frightened", "afraid", "timid", "cowardly", "nervous", "anxious",
"happy", "joyful", "cheerful", "content", "delighted", "pleased", "ecstatic",
"sad", "unhappy", "sorrowful", "depressed", "miserable", "melancholic",
"angry", "furious", "irate", "enraged", "mad", "upset", "annoyed", "frustrated",
"calm", "peaceful", "serene", "tranquil", "relaxed", "composed", "collected",
"confident", "assured", "self-assured", "self-confident", "assertive", "bold",
"insecure", "self-doubting", "unconfident", "hesitant", "tentative",
"loyal", "faithful", "trustworthy", "reliable", "dependable",
"disloyal", "unfaithful", "untrustworthy", "unreliable",
"generous", "kind", "benevolent", "charitable", "philanthropic", "magnanimous",
"selfish", "greedy", "stingy", "miserly", "self-centered", "egotistical",
"intelligent", "smart", "clever", "wise", "knowledgeable", "brilliant",
"dumb", "stupid", "foolish", "ignorant", "unintelligent",
"beautiful", "attractive", "handsome", "pretty", "gorgeous",
"ugly", "unattractive", "plain", "homely", "unsightly"
]
def screen_for_bias(question):
doc = nlp(question)
for token in doc:
if token.text.lower() in biased_terms:
return False # Question is biased
return True # Question is unbiased
def screen_for_offensive_language(question):
sentiment = TextBlob(question).sentiment
if sentiment.polarity < -0.5: # Threshold for negative sentiment
return False # Question is offensive
return True # Question is not offensive
def screen_questions(questions):
"""
Screens a list of questions for bias and offensive language.
Returns a tuple: (valid_questions, invalid_questions, accuracy)
where accuracy is the ratio of valid questions to total questions.
"""
valid_questions = []
invalid_questions = []
validity = []
for question in questions:
if screen_for_bias(question) and screen_for_offensive_language(question):
valid_questions.append(question)
validity.append(0)
else:
invalid_questions.append(question)
validity.append(1)
accuracy = len(valid_questions) / len(questions) if questions else 0
return valid_questions, invalid_questions, accuracy, validity
if __name__ == "__main__":
# For testing purposes: use a sample list of 4 questions.
generated_questions = [
"What motivated you to apply for this role?",
"How do you handle tight deadlines and manage stress?",
"Can you describe a challenging project you worked on?",
"Do you think being young gives you an edge in today's market?"
]
valid, invalid, acc = screen_questions(generated_questions)
print("Valid Questions:")
for q in valid:
print(q)
print("\nInvalid Questions:")
for q in invalid:
print(q)
print('Accuracy is ', acc * 100)