Update app.py
Browse files
app.py
CHANGED
@@ -2,38 +2,92 @@ import gradio as gr
|
|
2 |
import json
|
3 |
import os
|
4 |
import random
|
5 |
-
import datasets
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
DATASET_NAME = "NLP-UNED/dipromats2024-t2_leaderboard-data"
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
try:
|
16 |
-
|
17 |
|
18 |
except Exception as e:
|
19 |
-
print(f"Error loading dataset: {e}")
|
20 |
-
|
21 |
-
|
|
|
22 |
|
23 |
# Funci贸n para convertir el dataset en tabla
|
24 |
-
def data_to_table():
|
25 |
-
global dataset
|
26 |
table_data = []
|
27 |
-
for item in dataset
|
28 |
table_data.append([item.get("team_name", ""), item.get("run_id", ""),
|
29 |
item.get("lenient_f1", ""), item.get("strict_f1", ""), item.get("average_f1", "")])
|
30 |
return table_data
|
31 |
|
32 |
|
33 |
# Funci贸n para subir los resultados al leaderboard
|
34 |
-
def update_leaderboard(email, team_input, run_id, description, lenient_f1, strict_f1, average_f1):
|
35 |
-
global
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
"email": email,
|
38 |
"team_name": team_input,
|
39 |
"run_id": run_id,
|
@@ -42,13 +96,21 @@ def update_leaderboard(email, team_input, run_id, description, lenient_f1, stric
|
|
42 |
"strict_f1": strict_f1,
|
43 |
"average_f1": average_f1
|
44 |
})
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
|
50 |
# Funci贸n para evaluar los resultados
|
51 |
-
def evaluate_results(file_path):
|
52 |
lenient_f1 = random.random()
|
53 |
strict_f1 = random.random()
|
54 |
average_f1 = (lenient_f1 + strict_f1) / 2
|
@@ -56,33 +118,18 @@ def evaluate_results(file_path):
|
|
56 |
|
57 |
|
58 |
# Funci贸n para procesar el archivo de resultados
|
59 |
-
def process_file(
|
60 |
warn = False
|
61 |
if not file_path:
|
62 |
gr.Warning("File cannot be blank")
|
63 |
warn=True
|
64 |
-
if not team_input:
|
65 |
-
gr.Warning("Team name cannot be blank")
|
66 |
-
warn=True
|
67 |
-
if not run_id:
|
68 |
-
gr.Warning("Run ID cannot be blank")
|
69 |
-
warn=True
|
70 |
-
if not file_path:
|
71 |
-
gr.Warning("File cannot be blank")
|
72 |
-
warn=True
|
73 |
-
if not description:
|
74 |
-
gr.Warning("Description cannot be blank")
|
75 |
-
warn=True
|
76 |
-
if not email:
|
77 |
-
gr.Warning("Email cannot be blank")
|
78 |
-
warn=True
|
79 |
|
80 |
if warn:
|
81 |
-
return gr.Button(visible=True), gr.
|
82 |
|
83 |
-
lenient_f1, strict_f1, average_f1 = evaluate_results(file_path)
|
84 |
|
85 |
-
return gr.Button(visible=False), gr.
|
86 |
|
87 |
|
88 |
# Main
|
@@ -93,20 +140,31 @@ with gr.Blocks() as leaderboard:
|
|
93 |
"""
|
94 |
# Dipromats 2024 Task 2 Leaderboard
|
95 |
# Automatic Detection of Narratives from Diplomats of Major Powers
|
96 |
-
|
97 |
-
|
|
|
|
|
98 |
""")
|
99 |
with gr.Tabs() as tabs:
|
100 |
|
101 |
-
# Tab Leaderboard
|
102 |
-
with gr.TabItem("Leaderboard", id=0):
|
103 |
gr.Markdown(
|
104 |
"""
|
105 |
-
#
|
106 |
-
# Leaderboard
|
107 |
""")
|
108 |
-
|
109 |
-
value=data_to_table(),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
interactive=False)
|
111 |
|
112 |
# Tab Evaluate
|
@@ -117,17 +175,11 @@ with gr.Blocks() as leaderboard:
|
|
117 |
Then you can decide to submit your results to the leaderboard or not.
|
118 |
Make sure that you upload a file with the json format described in...
|
119 |
""")
|
120 |
-
|
121 |
-
# Submission Form
|
122 |
with gr.Row():
|
|
|
123 |
with gr.Column():
|
124 |
-
|
125 |
-
|
126 |
-
run_id = gr.Textbox(label="Run ID")
|
127 |
-
email_input = gr.Textbox(label="Email (only for submission verification, it won't be shown)")
|
128 |
-
description_input = gr.Textbox(label="System description", lines=6)
|
129 |
-
file_input = gr.File(label="Upload a JSON file", file_types=[".json"], type="filepath", file_count="single")
|
130 |
-
evaluate_button = gr.Button("Evaluate")
|
131 |
|
132 |
# System results table
|
133 |
with gr.Row(visible=True):
|
@@ -136,12 +188,22 @@ with gr.Blocks() as leaderboard:
|
|
136 |
average_f1 = gr.Number(label="Average F1", interactive=False)
|
137 |
|
138 |
# Submit to leaderboard
|
139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
|
141 |
evaluate_button.click(process_file,
|
142 |
-
inputs=[
|
143 |
-
outputs=[evaluate_button,
|
|
|
144 |
submit_button.click(update_leaderboard,
|
145 |
-
inputs=[email_input, team_input, run_id, description_input, lenient_f1, strict_f1, average_f1],
|
146 |
-
outputs=[
|
|
|
147 |
leaderboard.launch()
|
|
|
2 |
import json
|
3 |
import os
|
4 |
import random
|
5 |
+
import datasets
|
6 |
|
7 |
+
# Before, you must create a Token in User Settings to give read and write access only to the dataset
|
8 |
+
try:
|
9 |
+
from google.colab import userdata
|
10 |
+
# Token must be copied and activated in Colab Secrets
|
11 |
+
HF_TOKEN = userdata.get('HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN')
|
12 |
+
except:
|
13 |
+
# Assume running in HF Space
|
14 |
+
# Token must be copied in a Secret under Space Settings
|
15 |
+
#HF_TOKEN = os.environ['HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN']
|
16 |
+
HF_TOKEN = os.getenv('HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN')
|
17 |
+
|
18 |
+
# Hugging Face dataset
|
19 |
DATASET_NAME = "NLP-UNED/dipromats2024-t2_leaderboard-data"
|
20 |
+
SPLIT_EN = 'results_en'
|
21 |
+
SPLIT_ES = 'results_es'
|
22 |
+
|
23 |
+
# Define the features with their correct data types
|
24 |
+
FEATURES = datasets.Features({
|
25 |
+
"email": datasets.Value("string"),
|
26 |
+
"team_name": datasets.Value("string"),
|
27 |
+
"run_id": datasets.Value("string"),
|
28 |
+
"description": datasets.Value("string"),
|
29 |
+
"lenient_f1": datasets.Value("float64"),
|
30 |
+
"strict_f1": datasets.Value("float64"),
|
31 |
+
"average_f1": datasets.Value("float64") })
|
32 |
+
|
33 |
+
# Load the English dataset or create an empty one instead
|
34 |
+
try:
|
35 |
+
dataset_en = datasets.load_dataset(DATASET_NAME, split=SPLIT_EN, token=HF_TOKEN)
|
36 |
|
37 |
+
except Exception as e:
|
38 |
+
print(f"Error loading English dataset: {e}")
|
39 |
+
dataset_en = datasets.Dataset.from_dict({"email": [], "team_name": [], "run_id": [], "description": [], "lenient_f1": [], "strict_f1": [], "average_f1": []}, features=FEATURES, split=SPLIT_EN)
|
40 |
+
dataset_en.push_to_hub(DATASET_NAME, split=SPLIT_EN, token=HF_TOKEN)
|
41 |
+
|
42 |
+
# Load the Spanish dataset or create an empty one instead
|
43 |
try:
|
44 |
+
dataset_es = datasets.load_dataset(DATASET_NAME, split=SPLIT_ES, token=HF_TOKEN)
|
45 |
|
46 |
except Exception as e:
|
47 |
+
print(f"Error loading Spanish dataset: {e}")
|
48 |
+
dataset_es = datasets.Dataset.from_dict({"email": [], "team_name": [], "run_id": [], "description": [], "lenient_f1": [], "strict_f1": [], "average_f1": []}, features=FEATURES, split=SPLIT_ES)
|
49 |
+
dataset_es.push_to_hub(DATASET_NAME, split=SPLIT_ES, token=HF_TOKEN)
|
50 |
+
|
51 |
|
52 |
# Funci贸n para convertir el dataset en tabla
|
53 |
+
def data_to_table(dataset):
|
|
|
54 |
table_data = []
|
55 |
+
for item in dataset:
|
56 |
table_data.append([item.get("team_name", ""), item.get("run_id", ""),
|
57 |
item.get("lenient_f1", ""), item.get("strict_f1", ""), item.get("average_f1", "")])
|
58 |
return table_data
|
59 |
|
60 |
|
61 |
# Funci贸n para subir los resultados al leaderboard
|
62 |
+
def update_leaderboard(lang, file_path, email, team_input, run_id, description, lenient_f1, strict_f1, average_f1):
|
63 |
+
global dataset_en
|
64 |
+
global dataset_es
|
65 |
+
if lang == "en":
|
66 |
+
dataset = dataset_en
|
67 |
+
else:
|
68 |
+
dataset = dataset_es
|
69 |
+
|
70 |
+
warn = False
|
71 |
+
if not email:
|
72 |
+
gr.Warning("Email cannot be blank")
|
73 |
+
warn=True
|
74 |
+
if not team_input:
|
75 |
+
gr.Warning("Team name cannot be blank")
|
76 |
+
warn=True
|
77 |
+
if not run_id:
|
78 |
+
gr.Warning("Run ID cannot be blank")
|
79 |
+
warn=True
|
80 |
+
if not file_path:
|
81 |
+
gr.Warning("File cannot be blank")
|
82 |
+
warn=True
|
83 |
+
if not description:
|
84 |
+
gr.Warning("Description cannot be blank")
|
85 |
+
warn=True
|
86 |
+
|
87 |
+
if warn:
|
88 |
+
return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=1), gr.Button(visible=False), gr.Column(visible=True), team_input, run_id, description, email, file_path, lenient_f1, strict_f1, average_f1
|
89 |
+
|
90 |
+
dataset = dataset.add_item({
|
91 |
"email": email,
|
92 |
"team_name": team_input,
|
93 |
"run_id": run_id,
|
|
|
96 |
"strict_f1": strict_f1,
|
97 |
"average_f1": average_f1
|
98 |
})
|
99 |
+
# Save change in database
|
100 |
+
dataset.push_to_hub(DATASET_NAME, token=HF_TOKEN)
|
101 |
+
|
102 |
+
# Update dataset in memory
|
103 |
+
if lang == "en":
|
104 |
+
dataset_en = dataset
|
105 |
+
else:
|
106 |
+
dataset_es = dataset
|
107 |
+
|
108 |
+
#output: leaderboard_table, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1
|
109 |
+
return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=0), gr.Button(visible=True), gr.Column(visible=False), "", "", "", "", None, None, None, None
|
110 |
|
111 |
|
112 |
# Funci贸n para evaluar los resultados
|
113 |
+
def evaluate_results(lang, file_path):
|
114 |
lenient_f1 = random.random()
|
115 |
strict_f1 = random.random()
|
116 |
average_f1 = (lenient_f1 + strict_f1) / 2
|
|
|
118 |
|
119 |
|
120 |
# Funci贸n para procesar el archivo de resultados
|
121 |
+
def process_file(lang, file_path):
|
122 |
warn = False
|
123 |
if not file_path:
|
124 |
gr.Warning("File cannot be blank")
|
125 |
warn=True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
if warn:
|
128 |
+
return gr.Button(visible=True), gr.Row(visible=False), None, None, None
|
129 |
|
130 |
+
lenient_f1, strict_f1, average_f1 = evaluate_results(lang, file_path)
|
131 |
|
132 |
+
return gr.Button(visible=False), gr.Row(visible=True), lenient_f1, strict_f1, average_f1
|
133 |
|
134 |
|
135 |
# Main
|
|
|
140 |
"""
|
141 |
# Dipromats 2024 Task 2 Leaderboard
|
142 |
# Automatic Detection of Narratives from Diplomats of Major Powers
|
143 |
+
These are the leaderboards for DIPROMATS 2024 Task 2 described in <a href=https://nlp.uned.es/dipromats2024>nlp.uned.es/dipromats2024</a>.
|
144 |
+
The Gold Standard is not publicly available so LLMs cannot be contamined with them.
|
145 |
+
However, you can submit your results here and get your system automatically evaluated.
|
146 |
+
Then you will have the choice to submit your results to the leaderboard.
|
147 |
""")
|
148 |
with gr.Tabs() as tabs:
|
149 |
|
150 |
+
# Tab English Leaderboard
|
151 |
+
with gr.TabItem("English Leaderboard", id=0):
|
152 |
gr.Markdown(
|
153 |
"""
|
154 |
+
# English Leaderboard
|
|
|
155 |
""")
|
156 |
+
leaderboard_table_en = gr.Dataframe(headers=["Team", "Run ID", "Lenient F1", "Strict F1", "Average F1"],
|
157 |
+
value=data_to_table(dataset_en),
|
158 |
+
interactive=False)
|
159 |
+
|
160 |
+
# Tab Spanish Leaderboard
|
161 |
+
with gr.TabItem("Spanish Leaderboard", id=2):
|
162 |
+
gr.Markdown(
|
163 |
+
"""
|
164 |
+
# Spanish Leaderboard
|
165 |
+
""")
|
166 |
+
leaderboard_table_es = gr.Dataframe(headers=["Team", "Run ID", "Lenient F1", "Strict F1", "Average F1"],
|
167 |
+
value=data_to_table(dataset_es),
|
168 |
interactive=False)
|
169 |
|
170 |
# Tab Evaluate
|
|
|
175 |
Then you can decide to submit your results to the leaderboard or not.
|
176 |
Make sure that you upload a file with the json format described in...
|
177 |
""")
|
|
|
|
|
178 |
with gr.Row():
|
179 |
+
file_input = gr.File(label="Upload a JSON file", file_types=[".json"], type="filepath", file_count="single")
|
180 |
with gr.Column():
|
181 |
+
lang = gr.Dropdown(label="Language", choices=["en", "es"], interactive=True)
|
182 |
+
evaluate_button = gr.Button("Evaluate")
|
|
|
|
|
|
|
|
|
|
|
183 |
|
184 |
# System results table
|
185 |
with gr.Row(visible=True):
|
|
|
188 |
average_f1 = gr.Number(label="Average F1", interactive=False)
|
189 |
|
190 |
# Submit to leaderboard
|
191 |
+
with gr.Column(visible=False) as submission_col:
|
192 |
+
with gr.Row():
|
193 |
+
with gr.Column():
|
194 |
+
with gr.Row():
|
195 |
+
team_input = gr.Textbox(label="Team Name")
|
196 |
+
run_id = gr.Textbox(label="Run ID")
|
197 |
+
email_input = gr.Textbox(label="Email (only for submission verification, it won't be shown)")
|
198 |
+
description_input = gr.Textbox(label="System description", lines=6)
|
199 |
+
submit_button = gr.Button("Submit to leaderboard")
|
200 |
|
201 |
evaluate_button.click(process_file,
|
202 |
+
inputs=[lang, file_input],
|
203 |
+
outputs=[evaluate_button, submission_col,lenient_f1, strict_f1, average_f1])
|
204 |
+
|
205 |
submit_button.click(update_leaderboard,
|
206 |
+
inputs=[lang, file_input, email_input, team_input, run_id, description_input, lenient_f1, strict_f1, average_f1],
|
207 |
+
outputs=[leaderboard_table_en,leaderboard_table_es, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1])
|
208 |
+
|
209 |
leaderboard.launch()
|