Commit
Β·
dba927a
1
Parent(s):
dd7b0db
Phase II Version 1
Browse files- app.py +39 -13
- ui/evaluation.py +0 -18
- ui/leaderboard.py +16 -30
- ui/submission.py +3 -4
app.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3 |
|
4 |
import pandas as pd
|
5 |
import gradio as gr
|
6 |
-
from ui.leaderboard import render_leader_board, render_info_html, render_citation
|
7 |
from ui.evaluation import render_eval_info
|
8 |
from ui.coming_soon import render_coming_soon
|
9 |
from ui.submission import render_submission_page
|
@@ -15,8 +15,14 @@ import os
|
|
15 |
import json
|
16 |
|
17 |
REPO_ID = os.getenv('REPO_ID')
|
18 |
-
|
|
|
|
|
|
|
|
|
19 |
CITATIONS_PATH = f'./data/data/model_citations.json'
|
|
|
|
|
20 |
|
21 |
if not os.path.exists('./data/data'):
|
22 |
snapshot_download(repo_id=REPO_ID,
|
@@ -25,33 +31,53 @@ if not os.path.exists('./data/data'):
|
|
25 |
with open(CITATIONS_PATH, 'r') as f:
|
26 |
model_citations = json.load(f)
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
# Load leaderboard data
|
29 |
leaderboard_df_err = load_leaderboard(DB_ERR_PATH)
|
|
|
|
|
|
|
|
|
30 |
|
31 |
def create_ui():
|
32 |
-
with gr.Blocks(theme=gr.themes.Soft(text_size=gr.themes.sizes.text_md)
|
33 |
-
|
34 |
-
|
|
|
35 |
show_fullscreen_button=False, show_share_button=False, show_label=False)
|
36 |
|
37 |
with gr.Tabs():
|
38 |
with gr.Tab("π Leaderboard"):
|
39 |
with gr.Column():
|
40 |
render_info_html()
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
render_citation()
|
44 |
-
render_dataset_list()
|
45 |
|
46 |
-
with gr.Tab("π
|
47 |
render_eval_info()
|
48 |
|
49 |
with gr.Tab("π€ Submit your own system !"):
|
50 |
render_submission_page()
|
51 |
-
|
52 |
-
with gr.Tab("π Coming Soon"):
|
53 |
-
render_coming_soon()
|
54 |
return demo
|
55 |
|
56 |
# Launch the app
|
57 |
-
create_ui().launch()
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
import pandas as pd
|
5 |
import gradio as gr
|
6 |
+
from ui.leaderboard import render_leader_board, render_info_html, render_citation
|
7 |
from ui.evaluation import render_eval_info
|
8 |
from ui.coming_soon import render_coming_soon
|
9 |
from ui.submission import render_submission_page
|
|
|
15 |
import json
|
16 |
|
17 |
REPO_ID = os.getenv('REPO_ID')
|
18 |
+
|
19 |
+
DB_ERR_PATH = './data/data/eers.csv'
|
20 |
+
DB_ACC_PATH = './data/data/accuracy.csv'
|
21 |
+
DB_F1_PATH = './data/data/f1.csv'
|
22 |
+
|
23 |
CITATIONS_PATH = f'./data/data/model_citations.json'
|
24 |
+
LICENSES_PATH = f'./data/data/model_licenses.json'
|
25 |
+
PARAMS_PATH = f'./data/data/model_parameter_count.json'
|
26 |
|
27 |
if not os.path.exists('./data/data'):
|
28 |
snapshot_download(repo_id=REPO_ID,
|
|
|
31 |
with open(CITATIONS_PATH, 'r') as f:
|
32 |
model_citations = json.load(f)
|
33 |
|
34 |
+
with open(LICENSES_PATH, 'r') as f:
|
35 |
+
model_licenses = json.load(f)
|
36 |
+
|
37 |
+
with open(PARAMS_PATH, 'r') as f:
|
38 |
+
model_params = json.load(f)
|
39 |
+
|
40 |
+
|
41 |
# Load leaderboard data
|
42 |
leaderboard_df_err = load_leaderboard(DB_ERR_PATH)
|
43 |
+
leaderboard_df_f1 = load_leaderboard(DB_F1_PATH)
|
44 |
+
leaderboard_df_acc = load_leaderboard(DB_ACC_PATH)
|
45 |
+
|
46 |
+
|
47 |
|
48 |
def create_ui():
|
49 |
+
# with gr.Blocks(theme=gr.themes.Soft(text_size=gr.themes.sizes.text_md)) as demo:
|
50 |
+
with gr.Blocks(theme=gr.themes.Soft(text_size=gr.themes.sizes.text_md)) as demo:
|
51 |
+
|
52 |
+
gr.Image(f'./data/data/df_arena.jpg', interactive=False,
|
53 |
show_fullscreen_button=False, show_share_button=False, show_label=False)
|
54 |
|
55 |
with gr.Tabs():
|
56 |
with gr.Tab("π Leaderboard"):
|
57 |
with gr.Column():
|
58 |
render_info_html()
|
59 |
+
render_leader_board(leaderboard_df_err, model_citations, model_licenses, model_params, True)
|
60 |
+
gr.HTML("<div style='text-align:center; font-style:italic;'>Table: EER (%)</div>")
|
61 |
+
gr.Markdown("<br>") # add spacing
|
62 |
+
render_leader_board(leaderboard_df_acc, model_citations, model_licenses, model_params, False)
|
63 |
+
gr.HTML("<div style='text-align:center; font-style:italic;'>Table: Accuracy (%)</div>")
|
64 |
+
gr.Markdown("<br>") # add spacing
|
65 |
+
render_leader_board(leaderboard_df_f1, model_citations, model_licenses, model_params, False)
|
66 |
+
gr.HTML("<div style='text-align:center; font-style:italic;'>Table: F1 scores</div>")
|
67 |
+
gr.Markdown("<br>") # add spacing
|
68 |
render_citation()
|
|
|
69 |
|
70 |
+
with gr.Tab("π Compute your scores"):
|
71 |
render_eval_info()
|
72 |
|
73 |
with gr.Tab("π€ Submit your own system !"):
|
74 |
render_submission_page()
|
75 |
+
|
|
|
|
|
76 |
return demo
|
77 |
|
78 |
# Launch the app
|
79 |
+
# create_ui().launch(share=True)
|
80 |
+
|
81 |
+
if __name__ == "__main__":
|
82 |
+
demo = create_ui()
|
83 |
+
demo.launch()
|
ui/evaluation.py
CHANGED
@@ -3,24 +3,6 @@ import gradio as gr
|
|
3 |
def render_eval_info():
|
4 |
text = r"""
|
5 |
|
6 |
-
We use **Equal Error Rate (EER %)** a standard method used in bimoretric and anti-spoofing systems.
|
7 |
-
|
8 |
-
### **What is EER?**
|
9 |
-
Equal Error Rate (EER) is a performance metric used to evaluate biometric systems. It represents the point at which the **False Acceptance Rate (FAR)** and **False Rejection Rate (FRR)** are equal. A lower EER indicates a more accurate system.
|
10 |
-
|
11 |
-
#### **False Acceptance Rate (FAR)**
|
12 |
-
FAR is the proportion of **unauthorized** users incorrectly accepted by the system.
|
13 |
-
|
14 |
-
$FAR = \frac{\text{False Acceptances}}{\text{Total Imposter Attempts}}$
|
15 |
-
|
16 |
-
#### **False Rejection Rate (FRR)**
|
17 |
-
FRR is the proportion of **genuine** users incorrectly rejected by the system.
|
18 |
-
|
19 |
-
$FRR = \frac{\text{False Rejections}}{\text{Total Genuine Attempts}}$
|
20 |
-
|
21 |
-
|
22 |
-
- EER is the point at which FAR and FRR are equal.
|
23 |
-
|
24 |
### How to compute your own EER score file ?
|
25 |
|
26 |
In order to streamline the evaluation process across many models and datasets, we
|
|
|
3 |
def render_eval_info():
|
4 |
text = r"""
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
### How to compute your own EER score file ?
|
7 |
|
8 |
In order to streamline the evaluation process across many models and datasets, we
|
ui/leaderboard.py
CHANGED
@@ -14,12 +14,12 @@ def render_info_html():
|
|
14 |
# "to detect deep fakes. With research moving at such a rapid pace, it becomes challenging to keep track of generalizability " \
|
15 |
# "of SOTA DF detection systems. This leaderboard thus presents a comprehensive benchmark of 10 SOTA speech antispoofing " \
|
16 |
# "systems across 13 popular speech deep fake detection datasets."
|
17 |
-
info_text = """The advent of machine generated speech calls for dedicated research to develop countermeasure systems to protect against their misuse.
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
"""
|
24 |
|
25 |
# HTML formatted info text
|
@@ -28,13 +28,16 @@ def render_info_html():
|
|
28 |
def highlight_min(s, props=''):
|
29 |
return np.where(s == np.nanmin(s.values), props, '')
|
30 |
|
31 |
-
def
|
|
|
32 |
|
33 |
-
|
34 |
-
print(leaderboard_df.shape)
|
35 |
-
leaderboard_df.insert(3, 'Average EER(%)', leaderboard_df.iloc[:, 3:].mean(axis=1))
|
36 |
|
37 |
-
|
|
|
|
|
|
|
|
|
38 |
|
39 |
# Assign rank emojis π₯π₯π₯
|
40 |
leaderboard_df["System"] = leaderboard_df["System"].apply(lambda x: f"[{x}]({model_citations.get(x, '#')})")
|
@@ -51,10 +54,10 @@ def render_leader_board(leaderboard_df, model_citations):
|
|
51 |
leaderboard_df
|
52 |
.style \
|
53 |
.format(precision=2)
|
54 |
-
.apply(highlight_min, props='color:green', axis=0)
|
55 |
)
|
56 |
|
57 |
-
return gr.Dataframe(styler, datatype=['markdown'] *
|
58 |
return gr.HTML(value="<p>No data available in the leaderboard.</p>")
|
59 |
|
60 |
def render_citation():
|
@@ -70,20 +73,3 @@ def render_citation():
|
|
70 |
howpublished = "\url{link}"
|
71 |
}
|
72 |
```""")
|
73 |
-
|
74 |
-
def render_dataset_list():
|
75 |
-
return gr.Markdown(r"""
|
76 |
-
List of all the datasets used in the benchmarking:
|
77 |
-
- [ASVSpoof 2019: A large-scale public database of synthesized, converted and replayed speech](https://www.asvspoof.org/index2019.html)
|
78 |
-
- [ASVspoof 2021 DF & LA: accelerating progress in spoofed and deepfake speech detection](https://www.asvspoof.org/index2021.html)
|
79 |
-
- [ASVspoof 5: Crowdsourced speech data, deepfakes, and adversarial attacks at scale](https://www.asvspoof.org/workshop2024)
|
80 |
-
- [FakeOrReal: FoR: A Dataset for Synthetic Speech Detection](https://bil.eecs.yorku.ca/datasets/)
|
81 |
-
- [Codecfake Yuankun et. al.](https://github.com/xieyuankun/Codecfake)
|
82 |
-
- [ADD 2022 Track 2 & 3: the First Audio Deep Synthesis Detection Challenge](http://addchallenge.cn/add2022)
|
83 |
-
- [ADD 2023 Round 2&3 the Second Audio Deepfake Detection Challenge](http://addchallenge.cn/add2023)
|
84 |
-
- [DFADD: The Diffusion and Flow-Matching Based Audio Deepfake Dataset](https://github.com/isjwdu/DFADD)
|
85 |
-
- [LibriVoc: Exposing AI-Synthesized Human Voices Using Neural Vocoder Artifacts](https://github.com/csun22/Synthetic-Voice-Detection-Vocoder-Artifacts)
|
86 |
-
- [SONAR: A Synthetic AI-Audio Detection Framework and Benchmark](https://github.com/Jessegator/SONAR)
|
87 |
-
- [In-the-Wild: Does Audio Deepfake Detection Generalize?](https://deepfake-total.com/in_the_wild)
|
88 |
-
|
89 |
-
""")
|
|
|
14 |
# "to detect deep fakes. With research moving at such a rapid pace, it becomes challenging to keep track of generalizability " \
|
15 |
# "of SOTA DF detection systems. This leaderboard thus presents a comprehensive benchmark of 10 SOTA speech antispoofing " \
|
16 |
# "systems across 13 popular speech deep fake detection datasets."
|
17 |
+
info_text = """The advent of machine generated speech calls for dedicated research to develop countermeasure systems to protect against their misuse through deepfakes.
|
18 |
+
The Speech DF arena leaderboard provides a standardized benchmarking platform for both commercial and open source systems to compare different detection approaches and ranks them using standard metrics.
|
19 |
+
This leaderboard is an evolving inititatie where new systems and attacks can can be added upon request to keep it up to date with the latest advancements in the field. Check out the Submit Your System section to learn how to submit your system.
|
20 |
+
|
21 |
+
Below we report the Equal Error Rate (EER %), Accuracy (%) and F1 scores. The table consists of both pooled and average results. Pooled results are computed
|
22 |
+
by using thresholds obtained across all datasets, while average results are computed by simply averaging the dataset level results. We rank the systems according to the Pooled results
|
23 |
"""
|
24 |
|
25 |
# HTML formatted info text
|
|
|
28 |
def highlight_min(s, props=''):
|
29 |
return np.where(s == np.nanmin(s.values), props, '')
|
30 |
|
31 |
+
def highlight_max(s, props=''):
|
32 |
+
return np.where(s == np.nanmax(s.values), props, '')
|
33 |
|
34 |
+
def render_leader_board(leaderboard_df, model_citations, model_licenses, model_params, _ascending):
|
|
|
|
|
35 |
|
36 |
+
if not leaderboard_df.empty:
|
37 |
+
leaderboard_df.insert(1, "License", leaderboard_df["System"].apply(lambda x: model_licenses.get(x)))
|
38 |
+
leaderboard_df.insert(2, "Num Params (M)", leaderboard_df["System"].apply(lambda x: model_params.get(x)))
|
39 |
+
print(leaderboard_df.head())
|
40 |
+
leaderboard_df = leaderboard_df.sort_values(by='Pooled', ascending=_ascending).reset_index(drop=True)
|
41 |
|
42 |
# Assign rank emojis π₯π₯π₯
|
43 |
leaderboard_df["System"] = leaderboard_df["System"].apply(lambda x: f"[{x}]({model_citations.get(x, '#')})")
|
|
|
54 |
leaderboard_df
|
55 |
.style \
|
56 |
.format(precision=2)
|
57 |
+
.apply(highlight_min if _ascending else highlight_max, props='color:green', axis=0)
|
58 |
)
|
59 |
|
60 |
+
return gr.Dataframe(styler, datatype=['markdown'] * 3 + ['number'] * 15, elem_id="leaderboard-table", pinned_columns=1, column_widths=["200px", "150px", "150px"] + ["100px"] * 15)
|
61 |
return gr.HTML(value="<p>No data available in the leaderboard.</p>")
|
62 |
|
63 |
def render_citation():
|
|
|
73 |
howpublished = "\url{link}"
|
74 |
}
|
75 |
```""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ui/submission.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1 |
import gradio as gr
|
2 |
|
3 |
def render_submission_page():
|
4 |
-
text = r"""Want to submit your own system to the leaderboard?
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
|
9 |
- [ASVSpoof2019](https://www.asvspoof.org/index2019.html)
|
10 |
- [ASVSpoof2021LA](https://www.asvspoof.org/index2021.html)
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
def render_submission_page():
|
4 |
+
text = r"""Want to submit your own system to the leaderboard? We accept submissions from both open source and closed source systems.
|
5 |
+
Instructions and submission form can be found here: [Submission Form](https://drive.google.com/file/d/1YmW3da68hYAWeTmMAJOcEgUlJG3iGXGx/view?usp=sharing). We request submitting teams to fill out this form and
|
6 |
+
and reach out to use at <[email protected]>. Below is the List of currently included datasets in the leaderboard:
|
|
|
7 |
|
8 |
- [ASVSpoof2019](https://www.asvspoof.org/index2019.html)
|
9 |
- [ASVSpoof2021LA](https://www.asvspoof.org/index2021.html)
|