Spaces:

Speech-Arena-2025
/

Speech-DF-Arena

Running

App Files Files Community

Speech-Arena-2025 commited on Feb 11

Commit

f510c1c

1 Parent(s): d71af44

Intitial commit

Browse files

Files changed (8) hide show

.gitignore +3 -0
app.py +56 -0
ui/coming_soon.py +40 -0
ui/evaluation.py +61 -0
ui/leaderboard.py +72 -0
ui/metrics.py +27 -0
ui/submission.py +29 -0
utils.py +28 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+__pycache__/*
+*.pyc
+ui/__pycache__

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import pandas as pd
+import gradio as gr
+from ui.leaderboard import render_leader_board, render_info_html, render_citation
+from ui.evaluation import render_eval_info
+from ui.coming_soon import render_coming_soon
+from ui.submission import render_submission_page
+import os
+from utils import load_leaderboard, custom_css
+from huggingface_hub import snapshot_download
+import gradio as gr
+import os
+import json
+REPO_ID = os.getenv('REPO_ID')
+DB_ERR_PATH = f'./data/data/leaderboard_err.csv'
+CITATIONS_PATH = f'./data/data/model_citations.json'
+if not os.path.exists('./data/data'):
+    snapshot_download(repo_id=REPO_ID,
+                      repo_type="dataset", local_dir='./data/data')
+with open(CITATIONS_PATH, 'r') as f:
+    model_citations = json.load(f)
+# Load leaderboard data
+leaderboard_df_err = load_leaderboard(DB_ERR_PATH)
+def create_ui():
+    with gr.Blocks(theme=gr.themes.Soft(text_size=gr.themes.sizes.text_md), css=custom_css) as demo:
+        # gr.Markdown("# Speech Deep Fake Arena")
+        gr.Image('./data/data/df_arena_2.jpg', interactive=False,
+                 show_fullscreen_button=False, show_share_button=False, show_label=False)
+        with gr.Tabs():
+            with gr.Tab("🏆 Leaderboard"):
+                with gr.Column():
+                    render_info_html()
+                    gr.Markdown("Table for Equal Error Rate (EER %) for different systems")
+                    render_leader_board(leaderboard_df_err, model_citations)  # Adjust this to work with Gradio components
+                    render_citation()
+            with gr.Tab("📊 Metrics"):
+                render_eval_info()
+            with gr.Tab("📤 Submit your own system !"):
+                render_submission_page()
+            with gr.Tab("🔜 Coming Soon"):
+                render_coming_soon()
+    return demo
+# Launch the app
+create_ui().launch()

ui/coming_soon.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import gradio as gr
+def render_coming_soon():
+    text = r"""
+    ### **1. More evaluation metrics**
+    - Accuracy
+    - Precision, Recall and F1 Score
+    - minDCF
+    #### **2. More Datasets and Models:**
+    **Datasets:**
+    - MLAAD
+    - Latin-American-Spanish-Deepfake-Dataset
+    - CodecFake-Omni
+    - Hindi audio-video-Deepfake
+    - SpoofCeleb
+    - VoiceWukong
+    - Codecfake Yi Lu et.al.
+    - CodecFake Haibin Wu et.al.
+    - LRPD
+    - EmoFake
+    **Models:**
+    - Wav2Vec2-AASIST
+    - RawNet3
+    - AASIST2
+    #### **3. Top performing DF systems live demo**
+    Run inference using your own audio samples on top performing DF systems. Get probability scores for each system
+    """
+    return gr.Markdown(text)

ui/evaluation.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import gradio as gr
+def render_eval_info():
+    text = r"""
+            We use **Equal Error Rate (EER %)**  a standard method used in bimoretric and anti-spoofing systems.
+            ### **What is EER?**
+            Equal Error Rate (EER) is a performance metric used to evaluate biometric systems. It represents the point at which the **False Acceptance Rate (FAR)** and **False Rejection Rate (FRR)** are equal. A lower EER indicates a more accurate system.
+            #### **False Acceptance Rate (FAR)**
+            FAR is the proportion of **unauthorized** users incorrectly accepted by the system.
+            $FAR = \frac{\text{False Acceptances}}{\text{Total Imposter Attempts}}$
+            #### **False Rejection Rate (FRR)**
+            FRR is the proportion of **genuine** users incorrectly rejected by the system.
+            $FRR = \frac{\text{False Rejections}}{\text{Total Genuine Attempts}}$
+            - EER is the point at which FAR and FRR are equal.
+            ### How to compute your own EER score file ?
+            In order to streamline the evaluation process across many models and datasets, we
+            have developed df_arena_toolkit which can be used to compute score files for evaluation.
+            The tool can be found at https://github.com/Speech-Arena/speech_df_arena.
+            ### Usage
+            #### 1. Data Preparation
+            Create metadata.csv for your desired dataset with below format:
+            ```
+            file_name,label
+            /path/to/audio1,spoof
+            /path/to/audio2,bonafide
+            ...
+            ```
+            NOTE : The labels should contain "spoof" for spoofed samples and "bonafide" for real samples.
+                All the file_name paths should be absolute
+            #### 2. Evaluation
+            Example usage :
+            ```py
+            python evaluation.py --model_name wavlm_ecapa
+                                --batch_size 32
+                                --protocol_file_path /path/to/metadata.csv
+                                --model_path /path/to/model.ckpt
+                                --out_score_file_name scores.txt
+                                --trim pad
+                                --num workers 4
+            ```
+            NOTES
+            - Checkpoints and config files for the open source systems used in our benchmark can be found at :
+            - Example inference command for every model can be found at :
+"""
+    return gr.Markdown(text, latex_delimiters=[{ "left": "$", "right": "$", "display": True }])

ui/leaderboard.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import pandas as pd
+import gradio as gr
+from utils import load_leaderboard
+import numpy as np
+from huggingface_hub import snapshot_download
+def make_clickable(url, name):
+    return f'<a href="{url}" target="_blank">{name}</a>'
+def render_info_html():
+    # info_text = "With the growing advent of machine-generated speech, the scientific community is responding with exciting resources " \
+    #              "to detect deep fakes. With research moving at such a rapid pace, it becomes challenging to keep track of generalizability " \
+    #              "of SOTA DF detection systems. This leaderboard thus presents a comprehensive benchmark of 10 SOTA speech antispoofing " \
+    #              "systems across 13 popular speech deep fake detection datasets."
+    info_text = """The advent of machine generated speech  calls for dedicated research to develop countermeasure systems to protect against their misuse.
+                The speech DF arena leaderboard provides a standardized platform to compare different speech deepfake detection approaches and ranks them on Huggingface.
+                By assessing models across diverse datasets and attack scenarios, the speech DF arena leaderboard aims to help researchers and developers enhance the reliability and robustness of deepfake detection systems
+                , ensuring safer and more trustworthy audio communications. We report the average EER (lower the better).
+                Models are ranked based on their Average EER, from lowest to highest. Check the Metrics tab to understand how the models are evaluated.
+                If you want results for a model that is not listed here, you can submit a request for it to be included through under "submit your own system" tab.
+                The leaderboard currently focuses on English and Chinese speech deepfake detection datasets."""
+    # HTML formatted info text
+    return gr.Markdown(info_text)
+def highlight_min(s, props=''):
+    return np.where(s == np.nanmin(s.values), props, '')
+def render_leader_board(leaderboard_df, model_citations):
+    if not leaderboard_df.empty:
+        print(leaderboard_df.shape)
+        leaderboard_df.insert(2, 'Average EER(%)', leaderboard_df.iloc[:, 3:].mean(axis=1))
+        leaderboard_df = leaderboard_df.sort_values(by="Average EER(%)", ascending=True).reset_index(drop=True)
+        # Assign rank emojis 🥇🥈🥉
+        leaderboard_df["System"] = leaderboard_df["System"].apply(lambda x: f"[{x}]({model_citations.get(x, '#')})")
+        emojis = ["🥇", "🥈", "🥉"]
+        leaderboard_df.loc[0, "System"] = f"{emojis[0]} {leaderboard_df.System[0]}"
+        leaderboard_df.loc[1, "System"] = f"{emojis[1]} {leaderboard_df.System[1]}"
+        leaderboard_df.loc[2, "System"] = f"{emojis[2]} {leaderboard_df.System[2]}"
+        columns_to_style = [col for col in leaderboard_df.columns if col != 'Training Data']
+        styler = (
+            leaderboard_df
+            .style \
+            .format(precision=2)
+            .apply(highlight_min, props='color:green', axis=0, subset=columns_to_style)
+        )
+        return gr.Dataframe(styler, datatype=['markdown'] * 2 + ['number']  * 16)
+    return gr.HTML(value="<p>No data available in the leaderboard.</p>")
+def render_citation():
+    return gr.Markdown(r"""
+        If you use Speech DF Arena in your work, it can be cited as:
+        ```bibtex
+        @misc{speecharena-df-leaderboard,
+            title        = {Speech Arena: DeepFake Leaderboard},
+            author       = {Speech Arena},
+            year         = 2025,
+            publisher    = {Hugging Face},
+            howpublished = "\url{link}"
+        }
+        ```""")

ui/metrics.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import gradio as gr
+def render_metrics():
+    text = r"""
+    We use **Equal Error Rate (EER %)**  a standard method used in bimoretric and anti-spoofing systems.
+    ### **What is EER?**
+    Equal Error Rate (EER) is a performance metric used to evaluate biometric systems. It represents the point at which the **False Acceptance Rate (FAR)** and **False Rejection Rate (FRR)** are equal. A lower EER indicates a more accurate system.
+    #### **False Acceptance Rate (FAR)**
+    FAR is the proportion of **unauthorized** users incorrectly accepted by the system.
+    $FAR = \frac{\text{False Acceptances}}{\text{Total Imposter Attempts}}$
+    A high FAR means the system is too lenient, allowing unauthorized access.
+    #### **False Rejection Rate (FRR)**
+    FRR is the proportion of **genuine** users incorrectly rejected by the system.
+    $FRR = \frac{\text{False Rejections}}{\text{Total Genuine Attempts}}$
+    A high FRR means the system is too strict, denying access to legitimate users.
+    ### EER is the point at which FAR and FRR are equal.
+    """
+    return gr.Markdown(text, latex_delimiters=[ {"left": "$", "right": "$", "display": False }])

ui/submission.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import gradio as gr
+def render_submission_page():
+    text =  r"""Want to submit your own system to the leaderboard? Submit
+    all the scores files for your system across evaluation sets of all the below supported datasets at
+    <[email protected]> and we will handle the rest. Score files can be generated using the df_arena_toolkit.
+    - [ASVSpoof2019](https://www.asvspoof.org/index2019.html)
+    - [ASVSpoof2021LA](https://www.asvspoof.org/index2021.html)
+    - [ASVSpoof2021DF](https://www.asvspoof.org/index2021.html)
+    - [ASVSpoof2024-Dev](https://www.asvspoof.org/workshop2024)
+    - [ASVSpoof2024-Eval](https://www.asvspoof.org/workshop2024)
+    - [FakeOrReal](https://bil.eecs.yorku.ca/datasets/)
+    - [codecfake3](https://github.com/xieyuankun/Codecfake)
+    - [ADD2022 Track 1](http://addchallenge.cn/add2022)
+    - [ADD2022 Track 3](http://addchallenge.cn/add2022)
+    - [ADD 2023 R1](http://addchallenge.cn/add2023)
+    - [ADD2023 R2](http://addchallenge.cn/add2023)
+    - [DFADD](https://github.com/isjwdu/DFADD)
+    - [LibriVoc](https://github.com/csun22/Synthetic-Voice-Detection-Vocoder-Artifacts)
+    - [SONAR](https://github.com/Jessegator/SONAR)
+  """
+    return gr.Markdown(text)

utils.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import sqlite3
+import pandas as pd
+def load_leaderboard(db_path):
+    df = pd.read_csv(db_path)  # Update table name if needed
+    return df
+custom_css = """
+h1,  {
+    font-size: 48px !important; /* Increase heading sizes */
+    line-height: 2.0 !important; /* Increase line spacing */
+    text-align: center !important; /* Center align headings */
+}
+.gradio-container {
+    padding: 30px !important;  /* Increase padding around the UI */
+}
+.markdown-body p {
+    font-size: 28px !important; /* Increase text size */
+    line-height: 2.0 !important; /* More space between lines */
+}
+.gradio-container .gr-block {
+    margin-bottom: 20px !important; /* Add more space between elements */
+}
+"""