Spaces:

NLP-UNED
/

dipromats2024-task2-leaderboard

Sleeping

App Files Files Community

anselp commited on Jan 28

Commit

7d55365

verified ·

1 Parent(s): d9c89bb

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -62

app.py CHANGED Viewed

@@ -2,38 +2,92 @@ import gradio as gr
 import json
 import os
 import random
-import datasets #load_dataset, save_to_disk, load_from_disk
-# Primero crea el token en los Settings de tu Usuario
-# Después cópialo a Secrets en los Settings del Space
-HF_TOKEN = os.environ['HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN']
-# Use the Hugging Face dataset
 DATASET_NAME = "NLP-UNED/dipromats2024-t2_leaderboard-data"
-SPLIT = 'results'
 try:
-    dataset = datasets.load_dataset(DATASET_NAME, token=HF_TOKEN)
 except Exception as e:
-    print(f"Error loading dataset: {e}")
-    dataset = datasets.Dataset.from_dict({"email": [], "team_name": [], "run_id": [], "description": [], "lenient_f1": [], "strict_f1": [], "average_f1": []})
-    dataset = datasets.DatasetDict({SPLIT: dataset})
 # Función para convertir el dataset en tabla
-def data_to_table():
-    global dataset
     table_data = []
-    for item in dataset[SPLIT]:
         table_data.append([item.get("team_name", ""), item.get("run_id", ""),
                            item.get("lenient_f1", ""), item.get("strict_f1", ""), item.get("average_f1", "")])
     return table_data
 # Función para subir los resultados al leaderboard
-def update_leaderboard(email, team_input, run_id, description, lenient_f1, strict_f1, average_f1):
-    global datataset
-    new_data = dataset[SPLIT].add_item({
         "email": email,
         "team_name": team_input,
         "run_id": run_id,
@@ -42,13 +96,21 @@ def update_leaderboard(email, team_input, run_id, description, lenient_f1, stric
         "strict_f1": strict_f1,
         "average_f1": average_f1
     })
-    dataset[SPLIT] = new_data
-    new_data.push_to_hub(DATASET_NAME, split=SPLIT, token=HF_TOKEN)
-    return data_to_table(), gr.Tabs(selected=0), gr.Button(visible=True), gr.Button(visible=False), "", "", "", "", None, None, None, None
 # Función para evaluar los resultados
-def evaluate_results(file_path):
     lenient_f1 = random.random()
     strict_f1 = random.random()
     average_f1 = (lenient_f1 + strict_f1) / 2
@@ -56,33 +118,18 @@ def evaluate_results(file_path):
 # Función para procesar el archivo de resultados
-def process_file(file_path, team_input, run_id, description, email):
     warn = False
     if not file_path:
         gr.Warning("File cannot be blank")
         warn=True
-    if not team_input:
-        gr.Warning("Team name cannot be blank")
-        warn=True
-    if not run_id:
-        gr.Warning("Run ID cannot be blank")
-        warn=True
-    if not file_path:
-        gr.Warning("File cannot be blank")
-        warn=True
-    if not description:
-        gr.Warning("Description cannot be blank")
-        warn=True
-    if not email:
-        gr.Warning("Email cannot be blank")
-        warn=True
     if warn:
-        return gr.Button(visible=True), gr.Button(visible=False), None, None, None
-    lenient_f1, strict_f1, average_f1 = evaluate_results(file_path)
-    return gr.Button(visible=False), gr.Button(visible=True), lenient_f1, strict_f1, average_f1
 # Main
@@ -93,20 +140,31 @@ with gr.Blocks() as leaderboard:
         """
         # Dipromats 2024 Task 2 Leaderboard
         # Automatic Detection of Narratives from Diplomats of Major Powers
-        This is...
-        You can...
         """)
     with gr.Tabs() as tabs:
-        # Tab Leaderboard
-        with gr.TabItem("Leaderboard", id=0):
             gr.Markdown(
                 """
-                #
-                # Leaderboard
                 """)
-            leaderboard_table = gr.Dataframe(headers=["Team", "Run ID", "Lenient F1", "Strict F1", "Average F1"],
-                        value=data_to_table(),
                         interactive=False)
         # Tab Evaluate
@@ -117,17 +175,11 @@ with gr.Blocks() as leaderboard:
                 Then you can decide to submit your results to the leaderboard or not.
                 Make sure that you upload a file with the json format described in...
                 """)
-            # Submission Form
             with gr.Row():
                 with gr.Column():
-                    with gr.Row():
-                        team_input = gr.Textbox(label="Team Name")
-                        run_id = gr.Textbox(label="Run ID")
-                    email_input = gr.Textbox(label="Email (only for submission verification, it won't be shown)")
-                description_input = gr.Textbox(label="System description", lines=6)
-            file_input = gr.File(label="Upload a JSON file", file_types=[".json"], type="filepath", file_count="single")
-            evaluate_button = gr.Button("Evaluate")
             # System results table
             with gr.Row(visible=True):
@@ -136,12 +188,22 @@ with gr.Blocks() as leaderboard:
                 average_f1 = gr.Number(label="Average F1", interactive=False)
             # Submit to leaderboard
-            submit_button = gr.Button("Submit to leaderboard", visible=False)
     evaluate_button.click(process_file,
-                        inputs=[file_input, team_input, run_id, description_input, email_input],
-                        outputs=[evaluate_button,submit_button,lenient_f1, strict_f1, average_f1])
     submit_button.click(update_leaderboard,
-                        inputs=[email_input, team_input, run_id, description_input, lenient_f1, strict_f1, average_f1],
-                        outputs=[leaderboard_table, tabs, evaluate_button, submit_button, team_input, run_id, description_input, email_input, file_input,lenient_f1, strict_f1, average_f1])
 leaderboard.launch()

 import json
 import os
 import random
+import datasets
+# Before, you must create a Token in User Settings to give read and write access only to the dataset
+try:
+    from google.colab import userdata
+    # Token must be copied and activated in Colab Secrets
+    HF_TOKEN = userdata.get('HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN')
+except:
+    # Assume running in HF Space
+    # Token must be copied in a Secret under Space Settings
+    #HF_TOKEN = os.environ['HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN']
+    HF_TOKEN = os.getenv('HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN')
+# Hugging Face dataset
 DATASET_NAME = "NLP-UNED/dipromats2024-t2_leaderboard-data"
+SPLIT_EN = 'results_en'
+SPLIT_ES = 'results_es'
+# Define the features with their correct data types
+FEATURES = datasets.Features({
+        "email": datasets.Value("string"),
+        "team_name": datasets.Value("string"),
+        "run_id": datasets.Value("string"),
+        "description": datasets.Value("string"),
+        "lenient_f1": datasets.Value("float64"),
+        "strict_f1": datasets.Value("float64"),
+        "average_f1": datasets.Value("float64") })
+# Load the English dataset or create an empty one instead
+try:
+    dataset_en = datasets.load_dataset(DATASET_NAME, split=SPLIT_EN, token=HF_TOKEN)
+except Exception as e:
+    print(f"Error loading English dataset: {e}")
+    dataset_en = datasets.Dataset.from_dict({"email": [], "team_name": [], "run_id": [], "description": [], "lenient_f1": [], "strict_f1": [], "average_f1": []}, features=FEATURES, split=SPLIT_EN)
+    dataset_en.push_to_hub(DATASET_NAME, split=SPLIT_EN, token=HF_TOKEN)
+# Load the Spanish dataset or create an empty one instead
 try:
+    dataset_es = datasets.load_dataset(DATASET_NAME, split=SPLIT_ES, token=HF_TOKEN)
 except Exception as e:
+    print(f"Error loading Spanish dataset: {e}")
+    dataset_es = datasets.Dataset.from_dict({"email": [], "team_name": [], "run_id": [], "description": [], "lenient_f1": [], "strict_f1": [], "average_f1": []}, features=FEATURES, split=SPLIT_ES)
+    dataset_es.push_to_hub(DATASET_NAME, split=SPLIT_ES, token=HF_TOKEN)
 # Función para convertir el dataset en tabla
+def data_to_table(dataset):
     table_data = []
+    for item in dataset:
         table_data.append([item.get("team_name", ""), item.get("run_id", ""),
                            item.get("lenient_f1", ""), item.get("strict_f1", ""), item.get("average_f1", "")])
     return table_data
 # Función para subir los resultados al leaderboard
+def update_leaderboard(lang, file_path, email, team_input, run_id, description, lenient_f1, strict_f1, average_f1):
+    global dataset_en
+    global dataset_es
+    if lang == "en":
+        dataset = dataset_en
+    else:
+        dataset = dataset_es
+    warn = False
+    if not email:
+        gr.Warning("Email cannot be blank")
+        warn=True
+    if not team_input:
+        gr.Warning("Team name cannot be blank")
+        warn=True
+    if not run_id:
+        gr.Warning("Run ID cannot be blank")
+        warn=True
+    if not file_path:
+        gr.Warning("File cannot be blank")
+        warn=True
+    if not description:
+        gr.Warning("Description cannot be blank")
+        warn=True
+    if warn:
+        return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=1), gr.Button(visible=False), gr.Column(visible=True), team_input, run_id, description, email, file_path, lenient_f1, strict_f1, average_f1
+    dataset = dataset.add_item({
         "email": email,
         "team_name": team_input,
         "run_id": run_id,
         "strict_f1": strict_f1,
         "average_f1": average_f1
     })
+    # Save change in database
+    dataset.push_to_hub(DATASET_NAME, token=HF_TOKEN)
+    # Update dataset in memory
+    if lang == "en":
+        dataset_en = dataset
+    else:
+        dataset_es = dataset
+    #output: leaderboard_table, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1
+    return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=0), gr.Button(visible=True), gr.Column(visible=False), "", "", "", "", None, None, None, None
 # Función para evaluar los resultados
+def evaluate_results(lang, file_path):
     lenient_f1 = random.random()
     strict_f1 = random.random()
     average_f1 = (lenient_f1 + strict_f1) / 2
 # Función para procesar el archivo de resultados
+def process_file(lang, file_path):
     warn = False
     if not file_path:
         gr.Warning("File cannot be blank")
         warn=True
     if warn:
+        return gr.Button(visible=True), gr.Row(visible=False), None, None, None
+    lenient_f1, strict_f1, average_f1 = evaluate_results(lang, file_path)
+    return gr.Button(visible=False), gr.Row(visible=True), lenient_f1, strict_f1, average_f1
 # Main
         """
         # Dipromats 2024 Task 2 Leaderboard
         # Automatic Detection of Narratives from Diplomats of Major Powers
+        These are the leaderboards for DIPROMATS 2024 Task 2 described in <a href=https://nlp.uned.es/dipromats2024>nlp.uned.es/dipromats2024</a>.
+        The Gold Standard is not publicly available so LLMs cannot be contamined with them.
+        However, you can submit your results here and get your system automatically evaluated.
+        Then you will have the choice to submit your results to the leaderboard.
         """)
     with gr.Tabs() as tabs:
+        # Tab English Leaderboard
+        with gr.TabItem("English Leaderboard", id=0):
             gr.Markdown(
                 """
+                # English Leaderboard
                 """)
+            leaderboard_table_en = gr.Dataframe(headers=["Team", "Run ID", "Lenient F1", "Strict F1", "Average F1"],
+                        value=data_to_table(dataset_en),
+                        interactive=False)
+        # Tab Spanish Leaderboard
+        with gr.TabItem("Spanish Leaderboard", id=2):
+            gr.Markdown(
+                """
+                # Spanish Leaderboard
+                """)
+            leaderboard_table_es = gr.Dataframe(headers=["Team", "Run ID", "Lenient F1", "Strict F1", "Average F1"],
+                        value=data_to_table(dataset_es),
                         interactive=False)
         # Tab Evaluate
                 Then you can decide to submit your results to the leaderboard or not.
                 Make sure that you upload a file with the json format described in...
                 """)
             with gr.Row():
+                file_input = gr.File(label="Upload a JSON file", file_types=[".json"], type="filepath", file_count="single")
                 with gr.Column():
+                    lang = gr.Dropdown(label="Language", choices=["en", "es"], interactive=True)
+                    evaluate_button = gr.Button("Evaluate")
             # System results table
             with gr.Row(visible=True):
                 average_f1 = gr.Number(label="Average F1", interactive=False)
             # Submit to leaderboard
+            with gr.Column(visible=False) as submission_col:
+                with gr.Row():
+                    with gr.Column():
+                        with gr.Row():
+                            team_input = gr.Textbox(label="Team Name")
+                            run_id = gr.Textbox(label="Run ID")
+                        email_input = gr.Textbox(label="Email (only for submission verification, it won't be shown)")
+                    description_input = gr.Textbox(label="System description", lines=6)
+                submit_button = gr.Button("Submit to leaderboard")
     evaluate_button.click(process_file,
+                        inputs=[lang, file_input],
+                        outputs=[evaluate_button, submission_col,lenient_f1, strict_f1, average_f1])
     submit_button.click(update_leaderboard,
+                        inputs=[lang, file_input, email_input, team_input, run_id, description_input, lenient_f1, strict_f1, average_f1],
+                        outputs=[leaderboard_table_en,leaderboard_table_es, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1])
 leaderboard.launch()