Update app.py
Browse files
app.py
CHANGED
@@ -4,25 +4,19 @@ import os
|
|
4 |
import random
|
5 |
import datasets
|
6 |
|
7 |
-
|
8 |
-
try:
|
9 |
-
from google.colab import userdata
|
10 |
-
# Token must be copied and activated in Colab Secrets
|
11 |
-
HF_TOKEN = userdata.get('HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN')
|
12 |
-
except:
|
13 |
-
# Assume running in HF Space
|
14 |
-
# Token must be copied in a Secret under Space Settings
|
15 |
-
#HF_TOKEN = os.environ['HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN']
|
16 |
-
HF_TOKEN = os.getenv('HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN')
|
17 |
|
18 |
-
#
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
SPLIT_EN = 'results_en'
|
21 |
SPLIT_ES = 'results_es'
|
22 |
-
|
23 |
-
# Define the features with their correct data types
|
24 |
-
FEATURES = datasets.Features({
|
25 |
-
"email": datasets.Value("string"),
|
26 |
"team_name": datasets.Value("string"),
|
27 |
"run_id": datasets.Value("string"),
|
28 |
"description": datasets.Value("string"),
|
@@ -30,24 +24,43 @@ FEATURES = datasets.Features({
|
|
30 |
"strict_f1": datasets.Value("float64"),
|
31 |
"average_f1": datasets.Value("float64") })
|
32 |
|
33 |
-
|
|
|
|
|
34 |
try:
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
except Exception as e:
|
38 |
-
print(f"Error loading English dataset: {e}")
|
39 |
-
dataset_en = datasets.Dataset.from_dict(
|
40 |
-
dataset_en.push_to_hub(
|
41 |
|
42 |
# Load the Spanish dataset or create an empty one instead
|
43 |
try:
|
44 |
-
dataset_es = datasets.load_dataset(
|
45 |
-
|
46 |
except Exception as e:
|
47 |
-
print(f"Error loading Spanish dataset: {e}")
|
48 |
-
dataset_es = datasets.Dataset.from_dict(
|
49 |
-
dataset_es.push_to_hub(
|
50 |
|
|
|
51 |
|
52 |
# Funci贸n para convertir el dataset en tabla
|
53 |
def data_to_table(dataset):
|
@@ -88,7 +101,6 @@ def update_leaderboard(lang, file_path, email, team_input, run_id, description,
|
|
88 |
return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=1), gr.Button(visible=False), gr.Column(visible=True), team_input, run_id, description, email, file_path, lenient_f1, strict_f1, average_f1
|
89 |
|
90 |
dataset = dataset.add_item({
|
91 |
-
"email": email,
|
92 |
"team_name": team_input,
|
93 |
"run_id": run_id,
|
94 |
"description": description,
|
@@ -97,7 +109,7 @@ def update_leaderboard(lang, file_path, email, team_input, run_id, description,
|
|
97 |
"average_f1": average_f1
|
98 |
})
|
99 |
# Save change in database
|
100 |
-
dataset.push_to_hub(
|
101 |
|
102 |
# Update dataset in memory
|
103 |
if lang == "en":
|
@@ -108,26 +120,29 @@ def update_leaderboard(lang, file_path, email, team_input, run_id, description,
|
|
108 |
#output: leaderboard_table, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1
|
109 |
return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=0), gr.Button(visible=True), gr.Column(visible=False), "", "", "", "", None, None, None, None
|
110 |
|
111 |
-
|
112 |
-
# Funci贸n para evaluar los resultados
|
113 |
-
def evaluate_results(lang, file_path):
|
114 |
-
lenient_f1 = random.random()
|
115 |
-
strict_f1 = random.random()
|
116 |
-
average_f1 = (lenient_f1 + strict_f1) / 2
|
117 |
-
return lenient_f1, strict_f1, average_f1
|
118 |
-
|
119 |
-
|
120 |
# Funci贸n para procesar el archivo de resultados
|
121 |
def process_file(lang, file_path):
|
122 |
-
|
|
|
123 |
if not file_path:
|
124 |
gr.Warning("File cannot be blank")
|
125 |
-
|
126 |
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
return gr.Button(visible=True), gr.Row(visible=False), None, None, None
|
129 |
|
130 |
-
lenient_f1
|
|
|
|
|
131 |
|
132 |
return gr.Button(visible=False), gr.Row(visible=True), lenient_f1, strict_f1, average_f1
|
133 |
|
@@ -201,9 +216,10 @@ with gr.Blocks() as leaderboard:
|
|
201 |
evaluate_button.click(process_file,
|
202 |
inputs=[lang, file_input],
|
203 |
outputs=[evaluate_button, submission_col,lenient_f1, strict_f1, average_f1])
|
204 |
-
|
205 |
submit_button.click(update_leaderboard,
|
206 |
inputs=[lang, file_input, email_input, team_input, run_id, description_input, lenient_f1, strict_f1, average_f1],
|
207 |
-
outputs=[leaderboard_table_en,leaderboard_table_es, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1])
|
208 |
-
|
209 |
-
leaderboard.launch()
|
|
|
|
4 |
import random
|
5 |
import datasets
|
6 |
|
7 |
+
from dipromats_evaluation_v2 import evaluate_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# CONSTANTS
|
10 |
+
|
11 |
+
# Hugging Face datasets
|
12 |
+
|
13 |
+
DATASET_GOLD = "NLP-UNED/dipromats2024-t2_leaderboard-gold"
|
14 |
+
FILE_GOLD = 'gold_test.json'
|
15 |
+
|
16 |
+
DATASET_RESULTS = "NLP-UNED/dipromats2024-t2_leaderboard-results"
|
17 |
SPLIT_EN = 'results_en'
|
18 |
SPLIT_ES = 'results_es'
|
19 |
+
FEATURES_RESULTS = datasets.Features({
|
|
|
|
|
|
|
20 |
"team_name": datasets.Value("string"),
|
21 |
"run_id": datasets.Value("string"),
|
22 |
"description": datasets.Value("string"),
|
|
|
24 |
"strict_f1": datasets.Value("float64"),
|
25 |
"average_f1": datasets.Value("float64") })
|
26 |
|
27 |
+
EMPTY_RESULT={"team_name": [], "run_id": [], "description": [], "lenient_f1": [], "strict_f1": [], "average_f1": []}
|
28 |
+
|
29 |
+
# Before, you must create the Tokens in HF User Settings to give read and write access only to the datasets
|
30 |
try:
|
31 |
+
from google.colab import userdata
|
32 |
+
# Token must be copied and activated in Colab Secrets
|
33 |
+
HF_TOKEN_GOLD = userdata.get('HF_DIPROMATS2024_T2_GOLD_TOKEN')
|
34 |
+
HF_TOKEN_RESULTS = userdata.get('HF_DIPROMATS2024_T2_RESULTS_TOKEN')
|
35 |
+
except:
|
36 |
+
# Assume running in HF Space
|
37 |
+
# Tokens must be copied in Secrets under Space Settings
|
38 |
+
HF_TOKEN_GOLD = os.getenv('HF_DIPROMATS2024_T2_GOLD_TOKEN')
|
39 |
+
HF_TOKEN_RESULTS = os.getenv('HF_DIPROMATS2024_T2_RESULTS_TOKEN')
|
40 |
|
41 |
+
# LOAD DATASETS
|
42 |
+
|
43 |
+
# Load the Gold Standard data
|
44 |
+
# FILE_GOLD was uploaded directly through HF web, and the default split is train
|
45 |
+
dataset_gold = datasets.load_dataset(DATASET_GOLD, split='train', data_files=FILE_GOLD, token=HF_TOKEN_GOLD)
|
46 |
+
|
47 |
+
# Load the English dataset or create an empty one instead
|
48 |
+
try:
|
49 |
+
dataset_en = datasets.load_dataset(DATASET_RESULTS, split=SPLIT_EN)
|
50 |
except Exception as e:
|
51 |
+
print(f"Error loading English dataset: {e}. Creating it...")
|
52 |
+
dataset_en = datasets.Dataset.from_dict(EMPTY_RESULT, features=FEATURES_RESULTS, split=SPLIT_EN)
|
53 |
+
dataset_en.push_to_hub(DATASET_RESULTS, split=SPLIT_EN, token=HF_TOKEN_RESULTS)
|
54 |
|
55 |
# Load the Spanish dataset or create an empty one instead
|
56 |
try:
|
57 |
+
dataset_es = datasets.load_dataset(DATASET_RESULTS, split=SPLIT_ES)
|
|
|
58 |
except Exception as e:
|
59 |
+
print(f"Error loading Spanish dataset: {e}. Creating it...")
|
60 |
+
dataset_es = datasets.Dataset.from_dict(EMPTY_RESULT, features=FEATURES_RESULTS, split=SPLIT_ES)
|
61 |
+
dataset_es.push_to_hub(DATASET_RESULTS, split=SPLIT_ES, token=HF_TOKEN_RESULTS)
|
62 |
|
63 |
+
# AUX FUNCTIONS
|
64 |
|
65 |
# Funci贸n para convertir el dataset en tabla
|
66 |
def data_to_table(dataset):
|
|
|
101 |
return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=1), gr.Button(visible=False), gr.Column(visible=True), team_input, run_id, description, email, file_path, lenient_f1, strict_f1, average_f1
|
102 |
|
103 |
dataset = dataset.add_item({
|
|
|
104 |
"team_name": team_input,
|
105 |
"run_id": run_id,
|
106 |
"description": description,
|
|
|
109 |
"average_f1": average_f1
|
110 |
})
|
111 |
# Save change in database
|
112 |
+
dataset.push_to_hub(DATASET_RESULTS, token=HF_TOKEN_RESULTS)
|
113 |
|
114 |
# Update dataset in memory
|
115 |
if lang == "en":
|
|
|
120 |
#output: leaderboard_table, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1
|
121 |
return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=0), gr.Button(visible=True), gr.Column(visible=False), "", "", "", "", None, None, None, None
|
122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
# Funci贸n para procesar el archivo de resultados
|
124 |
def process_file(lang, file_path):
|
125 |
+
global dataset_gold
|
126 |
+
|
127 |
if not file_path:
|
128 |
gr.Warning("File cannot be blank")
|
129 |
+
return gr.Button(visible=True), gr.Row(visible=False), None, None, None
|
130 |
|
131 |
+
with open(file_path, 'r') as f:
|
132 |
+
test = json.load(f)
|
133 |
+
|
134 |
+
try:
|
135 |
+
results = evaluate_results(lang, dataset_gold, test)
|
136 |
+
#print(results)
|
137 |
+
except Exception as e:
|
138 |
+
gr.Warning("Invalid JSON file or Incorrect Language")
|
139 |
+
print(f"Error in function evaluate_results: {e}.")
|
140 |
+
print(dataset_gold)
|
141 |
return gr.Button(visible=True), gr.Row(visible=False), None, None, None
|
142 |
|
143 |
+
lenient_f1 = results['lenient']['micro']['scores']['f1-score']
|
144 |
+
strict_f1 = results['strict']['micro']['scores']['f1-score']
|
145 |
+
average_f1 = (lenient_f1 + strict_f1) / 2
|
146 |
|
147 |
return gr.Button(visible=False), gr.Row(visible=True), lenient_f1, strict_f1, average_f1
|
148 |
|
|
|
216 |
evaluate_button.click(process_file,
|
217 |
inputs=[lang, file_input],
|
218 |
outputs=[evaluate_button, submission_col,lenient_f1, strict_f1, average_f1])
|
219 |
+
|
220 |
submit_button.click(update_leaderboard,
|
221 |
inputs=[lang, file_input, email_input, team_input, run_id, description_input, lenient_f1, strict_f1, average_f1],
|
222 |
+
outputs=[leaderboard_table_en, leaderboard_table_es, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1])
|
223 |
+
|
224 |
+
leaderboard.launch()
|
225 |
+
|