Spaces:
No application file
No application file
import spacy | |
from textblob import TextBlob | |
nlp = spacy.load('en_core_web_sm') | |
# Define comprehensive biased terms/phrases | |
biased_terms = [ | |
"motherhood", "fatherhood", "stay-at-home parent", "single parent", "working mom", "working dad", | |
"manpower", "man-hours", "man-made", | |
"young", "old", "youthful", "elderly", "fresh", "experienced", "seasoned", "retirement", "pensioner", | |
"generation gap", "junior", "senior", | |
"race", "ethnicity", "color", "origin", "black", "white", "Asian", "Hispanic", "minority", "majority", "ethnic", "racial", "caucasian", "African-American", "Latino", "foreigner", "native", "immigrant", | |
"rich", "poor", "wealthy", "impoverished", "affluent", "destitute", "low-income", "high-income", "upper class", "lower class", "social status", "blue-collar", "white-collar", | |
"able-bodied", "disabled", "handicapped", "impaired", "crippled", "invalid", "wheelchair-bound", "mentally challenged", "deaf", "blind", | |
"religion", "faith", "belief", "Christian", "Muslim", "Hindu", "Jewish", "atheist", "agnostic", "god", "divine", "holy", "sacred", | |
"gay", "lesbian", "bisexual", "heterosexual", "LGBT", "LGBTQIA", "coming out", "partner", "same-sex", "straight", "homosexual", "transgender", | |
"married", "single", "divorced", "widowed", "husband", "wife", "spouse", "children", "kids", "family", | |
"dumb", "homemaker", "breadwinner", "caretaker", "guardian", "dependent", | |
"accomplished", "inexperienced", "intermediate", "novice", "beginner", "skilled", "talented", "gifted", | |
"active", "energetic", "lively", "vigorous", "enthusiastic", "spirited", "dynamic", | |
"passive", "inactive", "lethargic", "sluggish", "apathetic", "unmotivated", | |
"introvert", "extrovert", "ambivert", "shy", "outgoing", "sociable", "reserved", "gregarious", | |
"optimistic", "pessimistic", "realistic", "pragmatic", "idealistic", "dreamer", | |
"curious", "inquisitive", "interested", "uninterested", "indifferent", "apathetic", | |
"brave", "courageous", "fearless", "bold", "daring", "audacious", "intrepid", | |
"scared", "frightened", "afraid", "timid", "cowardly", "nervous", "anxious", | |
"happy", "joyful", "cheerful", "content", "delighted", "pleased", "ecstatic", | |
"sad", "unhappy", "sorrowful", "depressed", "miserable", "melancholic", | |
"angry", "furious", "irate", "enraged", "mad", "upset", "annoyed", "frustrated", | |
"calm", "peaceful", "serene", "tranquil", "relaxed", "composed", "collected", | |
"confident", "assured", "self-assured", "self-confident", "assertive", "bold", | |
"insecure", "self-doubting", "unconfident", "hesitant", "tentative", | |
"loyal", "faithful", "trustworthy", "reliable", "dependable", | |
"disloyal", "unfaithful", "untrustworthy", "unreliable", | |
"generous", "kind", "benevolent", "charitable", "philanthropic", "magnanimous", | |
"selfish", "greedy", "stingy", "miserly", "self-centered", "egotistical", | |
"intelligent", "smart", "clever", "wise", "knowledgeable", "brilliant", | |
"dumb", "stupid", "foolish", "ignorant", "unintelligent", | |
"beautiful", "attractive", "handsome", "pretty", "gorgeous", | |
"ugly", "unattractive", "plain", "homely", "unsightly" | |
] | |
def screen_for_bias(question): | |
doc = nlp(question) | |
for token in doc: | |
if token.text.lower() in biased_terms: | |
return False # Question is biased | |
return True # Question is unbiased | |
def screen_for_offensive_language(question): | |
sentiment = TextBlob(question).sentiment | |
if sentiment.polarity < -0.5: # Threshold for negative sentiment | |
return False # Question is offensive | |
return True # Question is not offensive | |
def screen_questions(questions): | |
""" | |
Screens a list of questions for bias and offensive language. | |
Returns a tuple: (valid_questions, invalid_questions, accuracy) | |
where accuracy is the ratio of valid questions to total questions. | |
""" | |
valid_questions = [] | |
invalid_questions = [] | |
validity = [] | |
for question in questions: | |
if screen_for_bias(question) and screen_for_offensive_language(question): | |
valid_questions.append(question) | |
validity.append(0) | |
else: | |
invalid_questions.append(question) | |
validity.append(1) | |
accuracy = len(valid_questions) / len(questions) if questions else 0 | |
return valid_questions, invalid_questions, accuracy, validity | |
if __name__ == "__main__": | |
# For testing purposes: use a sample list of 4 questions. | |
generated_questions = [ | |
"What motivated you to apply for this role?", | |
"How do you handle tight deadlines and manage stress?", | |
"Can you describe a challenging project you worked on?", | |
"Do you think being young gives you an edge in today's market?" | |
] | |
valid, invalid, acc = screen_questions(generated_questions) | |
print("Valid Questions:") | |
for q in valid: | |
print(q) | |
print("\nInvalid Questions:") | |
for q in invalid: | |
print(q) | |
print('Accuracy is ', acc * 100) | |