Spaces:
Build error
Build error
Merge branch 'main' of https://huggingface.co/spaces/huggingface/data-measurements-tool-2 into main
Browse files- data_measurements/streamlit_utils.py +7 -1
- requirements.txt +2 -2
data_measurements/streamlit_utils.py
CHANGED
|
@@ -434,10 +434,16 @@ def npmi_show(paired_results):
|
|
| 434 |
s.index.name = "word"
|
| 435 |
npmi_cols = s.filter(like="npmi").columns
|
| 436 |
count_cols = s.filter(like="count").columns
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
# TODO: This is very different look than the duplicates table above. Should probably standardize.
|
| 438 |
cm = sns.palplot(sns.diverging_palette(270, 36, s=99, l=48, n=16))
|
| 439 |
out_df = (
|
| 440 |
-
|
| 441 |
.format(subset=npmi_cols, formatter="{:,.3f}")
|
| 442 |
.format(subset=count_cols, formatter=int)
|
| 443 |
.set_properties(
|
|
|
|
| 434 |
s.index.name = "word"
|
| 435 |
npmi_cols = s.filter(like="npmi").columns
|
| 436 |
count_cols = s.filter(like="count").columns
|
| 437 |
+
if s.shape[0] > 10000:
|
| 438 |
+
bias_thres = max(abs(s["npmi-bias"][5000]), abs(s["npmi-bias"][-5000]))
|
| 439 |
+
print(f"filtering with bias threshold: {bias_thres}")
|
| 440 |
+
s_filtered = s[s["npmi-bias"].abs() > bias_thres]
|
| 441 |
+
else:
|
| 442 |
+
s_filtered = s
|
| 443 |
# TODO: This is very different look than the duplicates table above. Should probably standardize.
|
| 444 |
cm = sns.palplot(sns.diverging_palette(270, 36, s=99, l=48, n=16))
|
| 445 |
out_df = (
|
| 446 |
+
s_filtered.style.background_gradient(subset=npmi_cols, cmap=cm)
|
| 447 |
.format(subset=npmi_cols, formatter="{:,.3f}")
|
| 448 |
.format(subset=count_cols, formatter=int)
|
| 449 |
.set_properties(
|
requirements.txt
CHANGED
|
@@ -10,7 +10,7 @@ iso_639==0.4.5
|
|
| 10 |
datasets==1.15.1
|
| 11 |
powerlaw==1.5
|
| 12 |
numpy==1.19.5
|
| 13 |
-
pandas==1.
|
| 14 |
dataclasses==0.6
|
| 15 |
iso639==0.1.4
|
| 16 |
python_igraph==0.9.6
|
|
@@ -23,4 +23,4 @@ numexpr==2.7.3
|
|
| 23 |
scikit-learn~=0.24.2
|
| 24 |
scipy~=1.7.3
|
| 25 |
tqdm~=4.62.3
|
| 26 |
-
pyarrow~=6.0.1
|
|
|
|
| 10 |
datasets==1.15.1
|
| 11 |
powerlaw==1.5
|
| 12 |
numpy==1.19.5
|
| 13 |
+
pandas==1.0.0
|
| 14 |
dataclasses==0.6
|
| 15 |
iso639==0.1.4
|
| 16 |
python_igraph==0.9.6
|
|
|
|
| 23 |
scikit-learn~=0.24.2
|
| 24 |
scipy~=1.7.3
|
| 25 |
tqdm~=4.62.3
|
| 26 |
+
pyarrow~=6.0.1
|