Spaces:

NLP-UNED
/

dipromats2024-task2-leaderboard

Sleeping

App Files Files Community

anselp commited on Feb 7

Commit

0e52835

verified ·

1 Parent(s): a2032b5

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -45

app.py CHANGED Viewed

@@ -4,25 +4,19 @@ import os
 import random
 import datasets
-# Before, you must create a Token in User Settings to give read and write access only to the dataset
-try:
-    from google.colab import userdata
-    # Token must be copied and activated in Colab Secrets
-    HF_TOKEN = userdata.get('HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN')
-except:
-    # Assume running in HF Space
-    # Token must be copied in a Secret under Space Settings
-    #HF_TOKEN = os.environ['HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN']
-    HF_TOKEN = os.getenv('HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN')
-# Hugging Face dataset
-DATASET_NAME = "NLP-UNED/dipromats2024-t2_leaderboard-data"
 SPLIT_EN = 'results_en'
 SPLIT_ES = 'results_es'
-# Define the features with their correct data types
-FEATURES = datasets.Features({
-        "email": datasets.Value("string"),
         "team_name": datasets.Value("string"),
         "run_id": datasets.Value("string"),
         "description": datasets.Value("string"),
@@ -30,24 +24,43 @@ FEATURES = datasets.Features({
         "strict_f1": datasets.Value("float64"),
         "average_f1": datasets.Value("float64") })
-# Load the English dataset or create an empty one instead
 try:
-    dataset_en = datasets.load_dataset(DATASET_NAME, split=SPLIT_EN, token=HF_TOKEN)
 except Exception as e:
-    print(f"Error loading English dataset: {e}")
-    dataset_en = datasets.Dataset.from_dict({"email": [], "team_name": [], "run_id": [], "description": [], "lenient_f1": [], "strict_f1": [], "average_f1": []}, features=FEATURES, split=SPLIT_EN)
-    dataset_en.push_to_hub(DATASET_NAME, split=SPLIT_EN, token=HF_TOKEN)
 # Load the Spanish dataset or create an empty one instead
 try:
-    dataset_es = datasets.load_dataset(DATASET_NAME, split=SPLIT_ES, token=HF_TOKEN)
 except Exception as e:
-    print(f"Error loading Spanish dataset: {e}")
-    dataset_es = datasets.Dataset.from_dict({"email": [], "team_name": [], "run_id": [], "description": [], "lenient_f1": [], "strict_f1": [], "average_f1": []}, features=FEATURES, split=SPLIT_ES)
-    dataset_es.push_to_hub(DATASET_NAME, split=SPLIT_ES, token=HF_TOKEN)
 # Función para convertir el dataset en tabla
 def data_to_table(dataset):
@@ -88,7 +101,6 @@ def update_leaderboard(lang, file_path, email, team_input, run_id, description,
         return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=1), gr.Button(visible=False), gr.Column(visible=True), team_input, run_id, description, email, file_path, lenient_f1, strict_f1, average_f1
     dataset = dataset.add_item({
-        "email": email,
         "team_name": team_input,
         "run_id": run_id,
         "description": description,
@@ -97,7 +109,7 @@ def update_leaderboard(lang, file_path, email, team_input, run_id, description,
         "average_f1": average_f1
     })
     # Save change in database
-    dataset.push_to_hub(DATASET_NAME, token=HF_TOKEN)
     # Update dataset in memory
     if lang == "en":
@@ -108,26 +120,29 @@ def update_leaderboard(lang, file_path, email, team_input, run_id, description,
     #output: leaderboard_table, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1
     return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=0), gr.Button(visible=True), gr.Column(visible=False), "", "", "", "", None, None, None, None
-# Función para evaluar los resultados
-def evaluate_results(lang, file_path):
-    lenient_f1 = random.random()
-    strict_f1 = random.random()
-    average_f1 = (lenient_f1 + strict_f1) / 2
-    return lenient_f1, strict_f1, average_f1
 # Función para procesar el archivo de resultados
 def process_file(lang, file_path):
-    warn = False
     if not file_path:
         gr.Warning("File cannot be blank")
-        warn=True
-    if warn:
         return gr.Button(visible=True), gr.Row(visible=False), None, None, None
-    lenient_f1, strict_f1, average_f1 = evaluate_results(lang, file_path)
     return gr.Button(visible=False), gr.Row(visible=True), lenient_f1, strict_f1, average_f1
@@ -201,9 +216,10 @@ with gr.Blocks() as leaderboard:
     evaluate_button.click(process_file,
                         inputs=[lang, file_input],
                         outputs=[evaluate_button, submission_col,lenient_f1, strict_f1, average_f1])
     submit_button.click(update_leaderboard,
                         inputs=[lang, file_input, email_input, team_input, run_id, description_input, lenient_f1, strict_f1, average_f1],
-                        outputs=[leaderboard_table_en,leaderboard_table_es, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1])
-leaderboard.launch()

 import random
 import datasets
+from dipromats_evaluation_v2 import evaluate_results
+# CONSTANTS
+# Hugging Face datasets
+DATASET_GOLD = "NLP-UNED/dipromats2024-t2_leaderboard-gold"
+FILE_GOLD = 'gold_test.json'
+DATASET_RESULTS = "NLP-UNED/dipromats2024-t2_leaderboard-results"
 SPLIT_EN = 'results_en'
 SPLIT_ES = 'results_es'
+FEATURES_RESULTS = datasets.Features({
         "team_name": datasets.Value("string"),
         "run_id": datasets.Value("string"),
         "description": datasets.Value("string"),
         "strict_f1": datasets.Value("float64"),
         "average_f1": datasets.Value("float64") })
+EMPTY_RESULT={"team_name": [], "run_id": [], "description": [], "lenient_f1": [], "strict_f1": [], "average_f1": []}
+# Before, you must create the Tokens in HF User Settings to give read and write access only to the datasets
 try:
+    from google.colab import userdata
+    # Token must be copied and activated in Colab Secrets
+    HF_TOKEN_GOLD = userdata.get('HF_DIPROMATS2024_T2_GOLD_TOKEN')
+    HF_TOKEN_RESULTS = userdata.get('HF_DIPROMATS2024_T2_RESULTS_TOKEN')
+except:
+    # Assume running in HF Space
+    # Tokens must be copied in Secrets under Space Settings
+    HF_TOKEN_GOLD = os.getenv('HF_DIPROMATS2024_T2_GOLD_TOKEN')
+    HF_TOKEN_RESULTS = os.getenv('HF_DIPROMATS2024_T2_RESULTS_TOKEN')
+# LOAD DATASETS
+# Load the Gold Standard data
+# FILE_GOLD was uploaded directly through HF web, and the default split is train
+dataset_gold = datasets.load_dataset(DATASET_GOLD, split='train', data_files=FILE_GOLD, token=HF_TOKEN_GOLD)
+# Load the English dataset or create an empty one instead
+try:
+    dataset_en = datasets.load_dataset(DATASET_RESULTS, split=SPLIT_EN)
 except Exception as e:
+    print(f"Error loading English dataset: {e}. Creating it...")
+    dataset_en = datasets.Dataset.from_dict(EMPTY_RESULT, features=FEATURES_RESULTS, split=SPLIT_EN)
+    dataset_en.push_to_hub(DATASET_RESULTS, split=SPLIT_EN, token=HF_TOKEN_RESULTS)
 # Load the Spanish dataset or create an empty one instead
 try:
+    dataset_es = datasets.load_dataset(DATASET_RESULTS, split=SPLIT_ES)
 except Exception as e:
+    print(f"Error loading Spanish dataset: {e}. Creating it...")
+    dataset_es = datasets.Dataset.from_dict(EMPTY_RESULT, features=FEATURES_RESULTS, split=SPLIT_ES)
+    dataset_es.push_to_hub(DATASET_RESULTS, split=SPLIT_ES, token=HF_TOKEN_RESULTS)
+# AUX FUNCTIONS
 # Función para convertir el dataset en tabla
 def data_to_table(dataset):
         return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=1), gr.Button(visible=False), gr.Column(visible=True), team_input, run_id, description, email, file_path, lenient_f1, strict_f1, average_f1
     dataset = dataset.add_item({
         "team_name": team_input,
         "run_id": run_id,
         "description": description,
         "average_f1": average_f1
     })
     # Save change in database
+    dataset.push_to_hub(DATASET_RESULTS, token=HF_TOKEN_RESULTS)
     # Update dataset in memory
     if lang == "en":
     #output: leaderboard_table, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1
     return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=0), gr.Button(visible=True), gr.Column(visible=False), "", "", "", "", None, None, None, None
 # Función para procesar el archivo de resultados
 def process_file(lang, file_path):
+    global dataset_gold
     if not file_path:
         gr.Warning("File cannot be blank")
+        return gr.Button(visible=True), gr.Row(visible=False), None, None, None
+    with open(file_path, 'r') as f:
+        test = json.load(f)
+    try:
+        results = evaluate_results(lang, dataset_gold, test)
+        #print(results)
+    except Exception as e:
+        gr.Warning("Invalid JSON file or Incorrect Language")
+        print(f"Error in function evaluate_results: {e}.")
+        print(dataset_gold)
         return gr.Button(visible=True), gr.Row(visible=False), None, None, None
+    lenient_f1 = results['lenient']['micro']['scores']['f1-score']
+    strict_f1 = results['strict']['micro']['scores']['f1-score']
+    average_f1 = (lenient_f1 + strict_f1) / 2
     return gr.Button(visible=False), gr.Row(visible=True), lenient_f1, strict_f1, average_f1
     evaluate_button.click(process_file,
                         inputs=[lang, file_input],
                         outputs=[evaluate_button, submission_col,lenient_f1, strict_f1, average_f1])
     submit_button.click(update_leaderboard,
                         inputs=[lang, file_input, email_input, team_input, run_id, description_input, lenient_f1, strict_f1, average_f1],
+                        outputs=[leaderboard_table_en, leaderboard_table_es, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1])
+leaderboard.launch()