Spaces:
Sleeping
Sleeping
Zekun Wu
commited on
Commit
·
076d436
1
Parent(s):
91e8e74
update
Browse files- util/evaluation.py +214 -47
util/evaluation.py
CHANGED
@@ -10,12 +10,66 @@ from scipy.stats import ttest_ind, friedmanchisquare, rankdata, ttest_rel
|
|
10 |
from statsmodels.stats.multicomp import pairwise_tukeyhsd
|
11 |
from scipy.stats import ttest_1samp
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def calculate_impact_ratio(selection_rates):
|
14 |
"""Calculate the impact ratio for each category."""
|
15 |
most_selected_rate = max(selection_rates.values())
|
16 |
impact_ratios = {category: rate / most_selected_rate for category, rate in selection_rates.items()}
|
17 |
return impact_ratios
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
def statistical_parity_difference(selection_rates):
|
20 |
"""Calculate statistical parity difference."""
|
21 |
most_selected_rate = max(selection_rates.values())
|
@@ -33,48 +87,34 @@ def statistical_tests(data):
|
|
33 |
rank_suffix = '_Rank'
|
34 |
score_suffix = '_Avg_Score'
|
35 |
|
36 |
-
# Calculate average ranks
|
37 |
rank_columns = [v + rank_suffix for v in variables]
|
38 |
average_ranks = data[rank_columns].mean()
|
39 |
average_scores = data[[v + score_suffix for v in variables]].mean()
|
40 |
|
41 |
-
# Statistical tests
|
42 |
rank_data = [data[col] for col in rank_columns]
|
|
|
43 |
|
44 |
-
|
45 |
-
pairs = [
|
46 |
-
('Privilege', 'Protect'),
|
47 |
-
('Protect', 'Neutral'),
|
48 |
-
('Privilege', 'Neutral')
|
49 |
-
]
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
}
|
54 |
-
|
55 |
-
for (var1, var2) in pairs:
|
56 |
-
pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
|
57 |
pair_rank_score = f'{var1}{rank_suffix} vs {var2}{rank_suffix}'
|
58 |
-
|
59 |
-
# Wilcoxon Signed-Rank Test
|
60 |
if len(data) > 20:
|
61 |
wilcoxon_stat, wilcoxon_p = wilcoxon(data[f'{var1}{rank_suffix}'], data[f'{var2}{rank_suffix}'])
|
62 |
else:
|
63 |
wilcoxon_stat, wilcoxon_p = np.nan, "Sample size too small for Wilcoxon test."
|
64 |
pairwise_results['Wilcoxon Test'][pair_rank_score] = {"Statistic": wilcoxon_stat, "p-value": wilcoxon_p}
|
65 |
|
66 |
-
# Levene's Test for Equality of Variances
|
67 |
-
levene_results = {
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
levene_results['Privilege vs Neutral'] = {"Statistic": levene_privilege_neutral.statistic,
|
75 |
-
"p-value": levene_privilege_neutral.pvalue}
|
76 |
-
levene_results['Protect vs Neutral'] = {"Statistic": levene_protect_neutral.statistic,
|
77 |
-
"p-value": levene_protect_neutral.pvalue}
|
78 |
|
79 |
# Calculate variances for ranks
|
80 |
variances = {col: data[col].var() for col in rank_columns}
|
@@ -84,36 +124,45 @@ def statistical_tests(data):
|
|
84 |
'Protect_Rank vs Neutral_Rank': variances['Protect_Rank'] > variances['Neutral_Rank']
|
85 |
}
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
}
|
92 |
impact_ratios_Avg_Score = calculate_impact_ratio(selection_rates_Avg_Score)
|
93 |
spd_result_Avg_Score = statistical_parity_difference(selection_rates_Avg_Score)
|
94 |
adverse_impact_Avg_Score = calculate_four_fifths_rule(impact_ratios_Avg_Score)
|
95 |
|
96 |
-
|
97 |
-
# rank version of bias metrics
|
98 |
-
selection_rates_rank = {
|
99 |
-
'Privilege': data['Privilege_Rank'].mean(),
|
100 |
-
'Protect': data['Protect_Rank'].mean(),
|
101 |
-
'Neutral': data['Neutral_Rank'].mean()
|
102 |
-
}
|
103 |
impact_ratios_rank = calculate_impact_ratio(selection_rates_rank)
|
104 |
spd_result_rank = statistical_parity_difference(selection_rates_rank)
|
105 |
adverse_impact_rank = calculate_four_fifths_rule(impact_ratios_rank)
|
106 |
|
107 |
-
|
108 |
# Friedman test
|
109 |
friedman_stat, friedman_p = friedmanchisquare(*rank_data)
|
110 |
-
|
111 |
-
rank_matrix = data[rank_columns].values
|
112 |
-
rank_matrix_transposed = np.transpose(rank_matrix)
|
113 |
posthoc_results = posthoc_nemenyi(rank_matrix_transposed)
|
114 |
-
#posthoc_results = posthoc_friedman(data, variables, rank_suffix)
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
results = {
|
119 |
"Average Ranks": average_ranks.to_dict(),
|
@@ -124,7 +173,7 @@ def statistical_tests(data):
|
|
124 |
"Post-hoc": posthoc_results
|
125 |
},
|
126 |
**pairwise_results,
|
127 |
-
"Levene's Test for Equality of Variances": levene_results,
|
128 |
"Pairwise Comparisons of Variances": pairwise_variances,
|
129 |
"Statistical Parity Difference": {
|
130 |
"Avg_Score": spd_result_Avg_Score,
|
@@ -137,12 +186,130 @@ def statistical_tests(data):
|
|
137 |
"Four-Fifths Rule": {
|
138 |
"Avg_Score": adverse_impact_Avg_Score,
|
139 |
"Rank": adverse_impact_rank
|
140 |
-
}
|
|
|
141 |
}
|
142 |
|
143 |
return results
|
144 |
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
def hellinger_distance(p, q):
|
147 |
"""Calculate the Hellinger distance between two probability distributions."""
|
148 |
return np.sqrt(0.5 * np.sum((np.sqrt(p) - np.sqrt(q)) ** 2))
|
|
|
10 |
from statsmodels.stats.multicomp import pairwise_tukeyhsd
|
11 |
from scipy.stats import ttest_1samp
|
12 |
|
13 |
+
|
14 |
+
def test_statistic_variance_ratio(x, y):
|
15 |
+
return np.var(x, ddof=1) / np.var(y, ddof=1)
|
16 |
+
|
17 |
+
|
18 |
+
def test_statistic_mean_difference(x, y):
|
19 |
+
return np.mean(x) - np.mean(y)
|
20 |
+
|
21 |
+
|
22 |
+
def permutation_test_variance(x, y, num_permutations=10000):
|
23 |
+
T_obs = test_statistic_variance_ratio(x, y)
|
24 |
+
pooled_data = np.concatenate([x, y])
|
25 |
+
n_A = len(x)
|
26 |
+
n_B = len(y)
|
27 |
+
|
28 |
+
perm_test_stats = []
|
29 |
+
for _ in range(num_permutations):
|
30 |
+
np.random.shuffle(pooled_data)
|
31 |
+
perm_A = pooled_data[:n_A]
|
32 |
+
perm_B = pooled_data[n_A:]
|
33 |
+
perm_test_stats.append(test_statistic_variance_ratio(perm_A, perm_B))
|
34 |
+
|
35 |
+
perm_test_stats = np.array(perm_test_stats)
|
36 |
+
p_value = np.mean(np.abs(perm_test_stats) >= np.abs(T_obs))
|
37 |
+
|
38 |
+
return T_obs, p_value
|
39 |
+
|
40 |
+
|
41 |
+
def permutation_test_mean(x, y, num_permutations=10000):
|
42 |
+
T_obs = test_statistic_mean_difference(x, y)
|
43 |
+
pooled_data = np.concatenate([x, y])
|
44 |
+
n_A = len(x)
|
45 |
+
n_B = len(y)
|
46 |
+
|
47 |
+
perm_test_stats = []
|
48 |
+
for _ in range(num_permutations):
|
49 |
+
np.random.shuffle(pooled_data)
|
50 |
+
perm_A = pooled_data[:n_A]
|
51 |
+
perm_B = pooled_data[n_A:]
|
52 |
+
perm_test_stats.append(test_statistic_mean_difference(perm_A, perm_B))
|
53 |
+
|
54 |
+
perm_test_stats = np.array(perm_test_stats)
|
55 |
+
p_value = np.mean(np.abs(perm_test_stats) >= np.abs(T_obs))
|
56 |
+
|
57 |
+
return T_obs, p_value
|
58 |
+
|
59 |
def calculate_impact_ratio(selection_rates):
|
60 |
"""Calculate the impact ratio for each category."""
|
61 |
most_selected_rate = max(selection_rates.values())
|
62 |
impact_ratios = {category: rate / most_selected_rate for category, rate in selection_rates.items()}
|
63 |
return impact_ratios
|
64 |
|
65 |
+
def statistical_parity_difference(y_true, y_pred=None, reference_group='Privilege'):
|
66 |
+
selection_rates = y_pred if y_pred is not None else y_true
|
67 |
+
reference_rate = selection_rates[reference_group]
|
68 |
+
spd = {category: rate - reference_rate for category, rate in selection_rates.items()}
|
69 |
+
return spd
|
70 |
+
|
71 |
+
|
72 |
+
|
73 |
def statistical_parity_difference(selection_rates):
|
74 |
"""Calculate statistical parity difference."""
|
75 |
most_selected_rate = max(selection_rates.values())
|
|
|
87 |
rank_suffix = '_Rank'
|
88 |
score_suffix = '_Avg_Score'
|
89 |
|
90 |
+
# Calculate average ranks and scores
|
91 |
rank_columns = [v + rank_suffix for v in variables]
|
92 |
average_ranks = data[rank_columns].mean()
|
93 |
average_scores = data[[v + score_suffix for v in variables]].mean()
|
94 |
|
95 |
+
# Statistical tests setup
|
96 |
rank_data = [data[col] for col in rank_columns]
|
97 |
+
pairs = [('Privilege', 'Protect'), ('Protect', 'Neutral'), ('Privilege', 'Neutral')]
|
98 |
|
99 |
+
pairwise_results = {'Wilcoxon Test': {}}
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
+
# Pairwise Wilcoxon Signed-Rank Test
|
102 |
+
for var1, var2 in pairs:
|
|
|
|
|
|
|
|
|
103 |
pair_rank_score = f'{var1}{rank_suffix} vs {var2}{rank_suffix}'
|
|
|
|
|
104 |
if len(data) > 20:
|
105 |
wilcoxon_stat, wilcoxon_p = wilcoxon(data[f'{var1}{rank_suffix}'], data[f'{var2}{rank_suffix}'])
|
106 |
else:
|
107 |
wilcoxon_stat, wilcoxon_p = np.nan, "Sample size too small for Wilcoxon test."
|
108 |
pairwise_results['Wilcoxon Test'][pair_rank_score] = {"Statistic": wilcoxon_stat, "p-value": wilcoxon_p}
|
109 |
|
110 |
+
# # Levene's Test for Equality of Variances
|
111 |
+
# levene_results = {
|
112 |
+
# 'Privilege vs Protect': levene(data['Privilege_Rank'], data['Protect_Rank']),
|
113 |
+
# 'Privilege vs Neutral': levene(data['Privilege_Rank'], data['Neutral_Rank']),
|
114 |
+
# 'Protect vs Neutral': levene(data['Protect_Rank'], data['Neutral_Rank'])
|
115 |
+
# }
|
116 |
+
#
|
117 |
+
# levene_results = {key: {"Statistic": res.statistic, "p-value": res.pvalue} for key, res in levene_results.items()}
|
|
|
|
|
|
|
|
|
118 |
|
119 |
# Calculate variances for ranks
|
120 |
variances = {col: data[col].var() for col in rank_columns}
|
|
|
124 |
'Protect_Rank vs Neutral_Rank': variances['Protect_Rank'] > variances['Neutral_Rank']
|
125 |
}
|
126 |
|
127 |
+
# Bias metrics calculations
|
128 |
+
selection_rates_Avg_Score = {v: data[f'{v}{score_suffix}'].mean() for v in variables}
|
129 |
+
selection_rates_rank = {v: data[f'{v}{rank_suffix}'].mean() for v in variables}
|
130 |
+
|
|
|
131 |
impact_ratios_Avg_Score = calculate_impact_ratio(selection_rates_Avg_Score)
|
132 |
spd_result_Avg_Score = statistical_parity_difference(selection_rates_Avg_Score)
|
133 |
adverse_impact_Avg_Score = calculate_four_fifths_rule(impact_ratios_Avg_Score)
|
134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
impact_ratios_rank = calculate_impact_ratio(selection_rates_rank)
|
136 |
spd_result_rank = statistical_parity_difference(selection_rates_rank)
|
137 |
adverse_impact_rank = calculate_four_fifths_rule(impact_ratios_rank)
|
138 |
|
|
|
139 |
# Friedman test
|
140 |
friedman_stat, friedman_p = friedmanchisquare(*rank_data)
|
141 |
+
rank_matrix_transposed = np.transpose(data[rank_columns].values)
|
|
|
|
|
142 |
posthoc_results = posthoc_nemenyi(rank_matrix_transposed)
|
|
|
143 |
|
144 |
+
# Perform permutation tests for variances
|
145 |
+
T_priv_prot_var, p_priv_prot_var = permutation_test_variance(data['Privilege_Rank'], data['Protect_Rank'])
|
146 |
+
T_neut_prot_var, p_neut_prot_var = permutation_test_variance(data['Neutral_Rank'], data['Protect_Rank'])
|
147 |
+
T_neut_priv_var, p_neut_priv_var = permutation_test_variance(data['Neutral_Rank'], data['Privilege_Rank'])
|
148 |
+
|
149 |
+
# Perform permutation tests for means
|
150 |
+
T_priv_prot_mean, p_priv_prot_mean = permutation_test_mean(data['Privilege_Rank'], data['Protect_Rank'])
|
151 |
+
T_neut_prot_mean, p_neut_prot_mean = permutation_test_mean(data['Neutral_Rank'], data['Protect_Rank'])
|
152 |
+
T_neut_priv_mean, p_neut_priv_mean = permutation_test_mean(data['Neutral_Rank'], data['Privilege_Rank'])
|
153 |
|
154 |
+
permutation_results = {
|
155 |
+
"Permutation Tests for Variances": {
|
156 |
+
"Privilege vs. Protect": {"Statistic": T_priv_prot_var, "p-value": p_priv_prot_var},
|
157 |
+
"Neutral vs. Protect": {"Statistic": T_neut_prot_var, "p-value": p_neut_prot_var},
|
158 |
+
"Neutral vs. Privilege": {"Statistic": T_neut_priv_var, "p-value": p_neut_priv_var}
|
159 |
+
},
|
160 |
+
"Permutation Tests for Means": {
|
161 |
+
"Privilege vs. Protect": {"Statistic": T_priv_prot_mean, "p-value": p_priv_prot_mean},
|
162 |
+
"Neutral vs. Protect": {"Statistic": T_neut_prot_mean, "p-value": p_neut_prot_mean},
|
163 |
+
"Neutral vs. Privilege": {"Statistic": T_neut_priv_mean, "p-value": p_neut_priv_mean}
|
164 |
+
}
|
165 |
+
}
|
166 |
|
167 |
results = {
|
168 |
"Average Ranks": average_ranks.to_dict(),
|
|
|
173 |
"Post-hoc": posthoc_results
|
174 |
},
|
175 |
**pairwise_results,
|
176 |
+
#"Levene's Test for Equality of Variances": levene_results,
|
177 |
"Pairwise Comparisons of Variances": pairwise_variances,
|
178 |
"Statistical Parity Difference": {
|
179 |
"Avg_Score": spd_result_Avg_Score,
|
|
|
186 |
"Four-Fifths Rule": {
|
187 |
"Avg_Score": adverse_impact_Avg_Score,
|
188 |
"Rank": adverse_impact_rank
|
189 |
+
},
|
190 |
+
**permutation_results
|
191 |
}
|
192 |
|
193 |
return results
|
194 |
|
195 |
|
196 |
+
#
|
197 |
+
# def statistical_tests(data):
|
198 |
+
# """Perform various statistical tests to evaluate potential biases."""
|
199 |
+
# variables = ['Privilege', 'Protect', 'Neutral']
|
200 |
+
# rank_suffix = '_Rank'
|
201 |
+
# score_suffix = '_Avg_Score'
|
202 |
+
#
|
203 |
+
# # Calculate average ranks
|
204 |
+
# rank_columns = [v + rank_suffix for v in variables]
|
205 |
+
# average_ranks = data[rank_columns].mean()
|
206 |
+
# average_scores = data[[v + score_suffix for v in variables]].mean()
|
207 |
+
#
|
208 |
+
# # Statistical tests
|
209 |
+
# rank_data = [data[col] for col in rank_columns]
|
210 |
+
#
|
211 |
+
# # Pairwise tests
|
212 |
+
# pairs = [
|
213 |
+
# ('Privilege', 'Protect'),
|
214 |
+
# ('Protect', 'Neutral'),
|
215 |
+
# ('Privilege', 'Neutral')
|
216 |
+
# ]
|
217 |
+
#
|
218 |
+
# pairwise_results = {
|
219 |
+
# 'Wilcoxon Test': {}
|
220 |
+
# }
|
221 |
+
#
|
222 |
+
# for (var1, var2) in pairs:
|
223 |
+
# pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
|
224 |
+
# pair_rank_score = f'{var1}{rank_suffix} vs {var2}{rank_suffix}'
|
225 |
+
#
|
226 |
+
# # Wilcoxon Signed-Rank Test
|
227 |
+
# if len(data) > 20:
|
228 |
+
# wilcoxon_stat, wilcoxon_p = wilcoxon(data[f'{var1}{rank_suffix}'], data[f'{var2}{rank_suffix}'])
|
229 |
+
# else:
|
230 |
+
# wilcoxon_stat, wilcoxon_p = np.nan, "Sample size too small for Wilcoxon test."
|
231 |
+
# pairwise_results['Wilcoxon Test'][pair_rank_score] = {"Statistic": wilcoxon_stat, "p-value": wilcoxon_p}
|
232 |
+
#
|
233 |
+
# # Levene's Test for Equality of Variances
|
234 |
+
# levene_results = {}
|
235 |
+
# levene_privilege_protect = levene(data['Privilege_Rank'], data['Protect_Rank'])
|
236 |
+
# levene_privilege_neutral = levene(data['Privilege_Rank'], data['Neutral_Rank'])
|
237 |
+
# levene_protect_neutral = levene(data['Protect_Rank'], data['Neutral_Rank'])
|
238 |
+
#
|
239 |
+
# levene_results['Privilege vs Protect'] = {"Statistic": levene_privilege_protect.statistic,
|
240 |
+
# "p-value": levene_privilege_protect.pvalue}
|
241 |
+
# levene_results['Privilege vs Neutral'] = {"Statistic": levene_privilege_neutral.statistic,
|
242 |
+
# "p-value": levene_privilege_neutral.pvalue}
|
243 |
+
# levene_results['Protect vs Neutral'] = {"Statistic": levene_protect_neutral.statistic,
|
244 |
+
# "p-value": levene_protect_neutral.pvalue}
|
245 |
+
#
|
246 |
+
# # Calculate variances for ranks
|
247 |
+
# variances = {col: data[col].var() for col in rank_columns}
|
248 |
+
# pairwise_variances = {
|
249 |
+
# 'Privilege_Rank vs Protect_Rank': variances['Privilege_Rank'] > variances['Protect_Rank'],
|
250 |
+
# 'Privilege_Rank vs Neutral_Rank': variances['Privilege_Rank'] > variances['Neutral_Rank'],
|
251 |
+
# 'Protect_Rank vs Neutral_Rank': variances['Protect_Rank'] > variances['Neutral_Rank']
|
252 |
+
# }
|
253 |
+
#
|
254 |
+
# selection_rates_Avg_Score = {
|
255 |
+
# 'Privilege': data['Privilege_Avg_Score'].mean(),
|
256 |
+
# 'Protect': data['Protect_Avg_Score'].mean(),
|
257 |
+
# 'Neutral': data['Neutral_Avg_Score'].mean()
|
258 |
+
# }
|
259 |
+
# impact_ratios_Avg_Score = calculate_impact_ratio(selection_rates_Avg_Score)
|
260 |
+
# spd_result_Avg_Score = statistical_parity_difference(selection_rates_Avg_Score)
|
261 |
+
# adverse_impact_Avg_Score = calculate_four_fifths_rule(impact_ratios_Avg_Score)
|
262 |
+
#
|
263 |
+
#
|
264 |
+
# # rank version of bias metrics
|
265 |
+
# selection_rates_rank = {
|
266 |
+
# 'Privilege': data['Privilege_Rank'].mean(),
|
267 |
+
# 'Protect': data['Protect_Rank'].mean(),
|
268 |
+
# 'Neutral': data['Neutral_Rank'].mean()
|
269 |
+
# }
|
270 |
+
# impact_ratios_rank = calculate_impact_ratio(selection_rates_rank)
|
271 |
+
# spd_result_rank = statistical_parity_difference(selection_rates_rank)
|
272 |
+
# adverse_impact_rank = calculate_four_fifths_rule(impact_ratios_rank)
|
273 |
+
#
|
274 |
+
#
|
275 |
+
# # Friedman test
|
276 |
+
# friedman_stat, friedman_p = friedmanchisquare(*rank_data)
|
277 |
+
#
|
278 |
+
# rank_matrix = data[rank_columns].values
|
279 |
+
# rank_matrix_transposed = np.transpose(rank_matrix)
|
280 |
+
# posthoc_results = posthoc_nemenyi(rank_matrix_transposed)
|
281 |
+
# #posthoc_results = posthoc_friedman(data, variables, rank_suffix)
|
282 |
+
#
|
283 |
+
#
|
284 |
+
#
|
285 |
+
# results = {
|
286 |
+
# "Average Ranks": average_ranks.to_dict(),
|
287 |
+
# "Average Scores": average_scores.to_dict(),
|
288 |
+
# "Friedman Test": {
|
289 |
+
# "Statistic": friedman_stat,
|
290 |
+
# "p-value": friedman_p,
|
291 |
+
# "Post-hoc": posthoc_results
|
292 |
+
# },
|
293 |
+
# **pairwise_results,
|
294 |
+
# "Levene's Test for Equality of Variances": levene_results,
|
295 |
+
# "Pairwise Comparisons of Variances": pairwise_variances,
|
296 |
+
# "Statistical Parity Difference": {
|
297 |
+
# "Avg_Score": spd_result_Avg_Score,
|
298 |
+
# "Rank": spd_result_rank
|
299 |
+
# },
|
300 |
+
# "Disparate Impact Ratios": {
|
301 |
+
# "Avg_Score": impact_ratios_Avg_Score,
|
302 |
+
# "Rank": impact_ratios_rank
|
303 |
+
# },
|
304 |
+
# "Four-Fifths Rule": {
|
305 |
+
# "Avg_Score": adverse_impact_Avg_Score,
|
306 |
+
# "Rank": adverse_impact_rank
|
307 |
+
# }
|
308 |
+
# }
|
309 |
+
#
|
310 |
+
# return results
|
311 |
+
|
312 |
+
|
313 |
def hellinger_distance(p, q):
|
314 |
"""Calculate the Hellinger distance between two probability distributions."""
|
315 |
return np.sqrt(0.5 * np.sum((np.sqrt(p) - np.sqrt(q)) ** 2))
|