anselp commited on
Commit
7d55365
verified
1 Parent(s): d9c89bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -62
app.py CHANGED
@@ -2,38 +2,92 @@ import gradio as gr
2
  import json
3
  import os
4
  import random
5
- import datasets #load_dataset, save_to_disk, load_from_disk
6
 
7
- # Primero crea el token en los Settings de tu Usuario
8
- # Despu茅s c贸pialo a Secrets en los Settings del Space
9
- HF_TOKEN = os.environ['HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN']
10
-
11
- # Use the Hugging Face dataset
 
 
 
 
 
 
 
12
  DATASET_NAME = "NLP-UNED/dipromats2024-t2_leaderboard-data"
13
- SPLIT = 'results'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
 
 
 
 
 
 
15
  try:
16
- dataset = datasets.load_dataset(DATASET_NAME, token=HF_TOKEN)
17
 
18
  except Exception as e:
19
- print(f"Error loading dataset: {e}")
20
- dataset = datasets.Dataset.from_dict({"email": [], "team_name": [], "run_id": [], "description": [], "lenient_f1": [], "strict_f1": [], "average_f1": []})
21
- dataset = datasets.DatasetDict({SPLIT: dataset})
 
22
 
23
  # Funci贸n para convertir el dataset en tabla
24
- def data_to_table():
25
- global dataset
26
  table_data = []
27
- for item in dataset[SPLIT]:
28
  table_data.append([item.get("team_name", ""), item.get("run_id", ""),
29
  item.get("lenient_f1", ""), item.get("strict_f1", ""), item.get("average_f1", "")])
30
  return table_data
31
 
32
 
33
  # Funci贸n para subir los resultados al leaderboard
34
- def update_leaderboard(email, team_input, run_id, description, lenient_f1, strict_f1, average_f1):
35
- global datataset
36
- new_data = dataset[SPLIT].add_item({
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  "email": email,
38
  "team_name": team_input,
39
  "run_id": run_id,
@@ -42,13 +96,21 @@ def update_leaderboard(email, team_input, run_id, description, lenient_f1, stric
42
  "strict_f1": strict_f1,
43
  "average_f1": average_f1
44
  })
45
- dataset[SPLIT] = new_data
46
- new_data.push_to_hub(DATASET_NAME, split=SPLIT, token=HF_TOKEN)
47
- return data_to_table(), gr.Tabs(selected=0), gr.Button(visible=True), gr.Button(visible=False), "", "", "", "", None, None, None, None
 
 
 
 
 
 
 
 
48
 
49
 
50
  # Funci贸n para evaluar los resultados
51
- def evaluate_results(file_path):
52
  lenient_f1 = random.random()
53
  strict_f1 = random.random()
54
  average_f1 = (lenient_f1 + strict_f1) / 2
@@ -56,33 +118,18 @@ def evaluate_results(file_path):
56
 
57
 
58
  # Funci贸n para procesar el archivo de resultados
59
- def process_file(file_path, team_input, run_id, description, email):
60
  warn = False
61
  if not file_path:
62
  gr.Warning("File cannot be blank")
63
  warn=True
64
- if not team_input:
65
- gr.Warning("Team name cannot be blank")
66
- warn=True
67
- if not run_id:
68
- gr.Warning("Run ID cannot be blank")
69
- warn=True
70
- if not file_path:
71
- gr.Warning("File cannot be blank")
72
- warn=True
73
- if not description:
74
- gr.Warning("Description cannot be blank")
75
- warn=True
76
- if not email:
77
- gr.Warning("Email cannot be blank")
78
- warn=True
79
 
80
  if warn:
81
- return gr.Button(visible=True), gr.Button(visible=False), None, None, None
82
 
83
- lenient_f1, strict_f1, average_f1 = evaluate_results(file_path)
84
 
85
- return gr.Button(visible=False), gr.Button(visible=True), lenient_f1, strict_f1, average_f1
86
 
87
 
88
  # Main
@@ -93,20 +140,31 @@ with gr.Blocks() as leaderboard:
93
  """
94
  # Dipromats 2024 Task 2 Leaderboard
95
  # Automatic Detection of Narratives from Diplomats of Major Powers
96
- This is...
97
- You can...
 
 
98
  """)
99
  with gr.Tabs() as tabs:
100
 
101
- # Tab Leaderboard
102
- with gr.TabItem("Leaderboard", id=0):
103
  gr.Markdown(
104
  """
105
- #
106
- # Leaderboard
107
  """)
108
- leaderboard_table = gr.Dataframe(headers=["Team", "Run ID", "Lenient F1", "Strict F1", "Average F1"],
109
- value=data_to_table(),
 
 
 
 
 
 
 
 
 
 
110
  interactive=False)
111
 
112
  # Tab Evaluate
@@ -117,17 +175,11 @@ with gr.Blocks() as leaderboard:
117
  Then you can decide to submit your results to the leaderboard or not.
118
  Make sure that you upload a file with the json format described in...
119
  """)
120
-
121
- # Submission Form
122
  with gr.Row():
 
123
  with gr.Column():
124
- with gr.Row():
125
- team_input = gr.Textbox(label="Team Name")
126
- run_id = gr.Textbox(label="Run ID")
127
- email_input = gr.Textbox(label="Email (only for submission verification, it won't be shown)")
128
- description_input = gr.Textbox(label="System description", lines=6)
129
- file_input = gr.File(label="Upload a JSON file", file_types=[".json"], type="filepath", file_count="single")
130
- evaluate_button = gr.Button("Evaluate")
131
 
132
  # System results table
133
  with gr.Row(visible=True):
@@ -136,12 +188,22 @@ with gr.Blocks() as leaderboard:
136
  average_f1 = gr.Number(label="Average F1", interactive=False)
137
 
138
  # Submit to leaderboard
139
- submit_button = gr.Button("Submit to leaderboard", visible=False)
 
 
 
 
 
 
 
 
140
 
141
  evaluate_button.click(process_file,
142
- inputs=[file_input, team_input, run_id, description_input, email_input],
143
- outputs=[evaluate_button,submit_button,lenient_f1, strict_f1, average_f1])
 
144
  submit_button.click(update_leaderboard,
145
- inputs=[email_input, team_input, run_id, description_input, lenient_f1, strict_f1, average_f1],
146
- outputs=[leaderboard_table, tabs, evaluate_button, submit_button, team_input, run_id, description_input, email_input, file_input,lenient_f1, strict_f1, average_f1])
 
147
  leaderboard.launch()
 
2
  import json
3
  import os
4
  import random
5
+ import datasets
6
 
7
+ # Before, you must create a Token in User Settings to give read and write access only to the dataset
8
+ try:
9
+ from google.colab import userdata
10
+ # Token must be copied and activated in Colab Secrets
11
+ HF_TOKEN = userdata.get('HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN')
12
+ except:
13
+ # Assume running in HF Space
14
+ # Token must be copied in a Secret under Space Settings
15
+ #HF_TOKEN = os.environ['HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN']
16
+ HF_TOKEN = os.getenv('HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN')
17
+
18
+ # Hugging Face dataset
19
  DATASET_NAME = "NLP-UNED/dipromats2024-t2_leaderboard-data"
20
+ SPLIT_EN = 'results_en'
21
+ SPLIT_ES = 'results_es'
22
+
23
+ # Define the features with their correct data types
24
+ FEATURES = datasets.Features({
25
+ "email": datasets.Value("string"),
26
+ "team_name": datasets.Value("string"),
27
+ "run_id": datasets.Value("string"),
28
+ "description": datasets.Value("string"),
29
+ "lenient_f1": datasets.Value("float64"),
30
+ "strict_f1": datasets.Value("float64"),
31
+ "average_f1": datasets.Value("float64") })
32
+
33
+ # Load the English dataset or create an empty one instead
34
+ try:
35
+ dataset_en = datasets.load_dataset(DATASET_NAME, split=SPLIT_EN, token=HF_TOKEN)
36
 
37
+ except Exception as e:
38
+ print(f"Error loading English dataset: {e}")
39
+ dataset_en = datasets.Dataset.from_dict({"email": [], "team_name": [], "run_id": [], "description": [], "lenient_f1": [], "strict_f1": [], "average_f1": []}, features=FEATURES, split=SPLIT_EN)
40
+ dataset_en.push_to_hub(DATASET_NAME, split=SPLIT_EN, token=HF_TOKEN)
41
+
42
+ # Load the Spanish dataset or create an empty one instead
43
  try:
44
+ dataset_es = datasets.load_dataset(DATASET_NAME, split=SPLIT_ES, token=HF_TOKEN)
45
 
46
  except Exception as e:
47
+ print(f"Error loading Spanish dataset: {e}")
48
+ dataset_es = datasets.Dataset.from_dict({"email": [], "team_name": [], "run_id": [], "description": [], "lenient_f1": [], "strict_f1": [], "average_f1": []}, features=FEATURES, split=SPLIT_ES)
49
+ dataset_es.push_to_hub(DATASET_NAME, split=SPLIT_ES, token=HF_TOKEN)
50
+
51
 
52
  # Funci贸n para convertir el dataset en tabla
53
+ def data_to_table(dataset):
 
54
  table_data = []
55
+ for item in dataset:
56
  table_data.append([item.get("team_name", ""), item.get("run_id", ""),
57
  item.get("lenient_f1", ""), item.get("strict_f1", ""), item.get("average_f1", "")])
58
  return table_data
59
 
60
 
61
  # Funci贸n para subir los resultados al leaderboard
62
+ def update_leaderboard(lang, file_path, email, team_input, run_id, description, lenient_f1, strict_f1, average_f1):
63
+ global dataset_en
64
+ global dataset_es
65
+ if lang == "en":
66
+ dataset = dataset_en
67
+ else:
68
+ dataset = dataset_es
69
+
70
+ warn = False
71
+ if not email:
72
+ gr.Warning("Email cannot be blank")
73
+ warn=True
74
+ if not team_input:
75
+ gr.Warning("Team name cannot be blank")
76
+ warn=True
77
+ if not run_id:
78
+ gr.Warning("Run ID cannot be blank")
79
+ warn=True
80
+ if not file_path:
81
+ gr.Warning("File cannot be blank")
82
+ warn=True
83
+ if not description:
84
+ gr.Warning("Description cannot be blank")
85
+ warn=True
86
+
87
+ if warn:
88
+ return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=1), gr.Button(visible=False), gr.Column(visible=True), team_input, run_id, description, email, file_path, lenient_f1, strict_f1, average_f1
89
+
90
+ dataset = dataset.add_item({
91
  "email": email,
92
  "team_name": team_input,
93
  "run_id": run_id,
 
96
  "strict_f1": strict_f1,
97
  "average_f1": average_f1
98
  })
99
+ # Save change in database
100
+ dataset.push_to_hub(DATASET_NAME, token=HF_TOKEN)
101
+
102
+ # Update dataset in memory
103
+ if lang == "en":
104
+ dataset_en = dataset
105
+ else:
106
+ dataset_es = dataset
107
+
108
+ #output: leaderboard_table, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1
109
+ return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=0), gr.Button(visible=True), gr.Column(visible=False), "", "", "", "", None, None, None, None
110
 
111
 
112
  # Funci贸n para evaluar los resultados
113
+ def evaluate_results(lang, file_path):
114
  lenient_f1 = random.random()
115
  strict_f1 = random.random()
116
  average_f1 = (lenient_f1 + strict_f1) / 2
 
118
 
119
 
120
  # Funci贸n para procesar el archivo de resultados
121
+ def process_file(lang, file_path):
122
  warn = False
123
  if not file_path:
124
  gr.Warning("File cannot be blank")
125
  warn=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  if warn:
128
+ return gr.Button(visible=True), gr.Row(visible=False), None, None, None
129
 
130
+ lenient_f1, strict_f1, average_f1 = evaluate_results(lang, file_path)
131
 
132
+ return gr.Button(visible=False), gr.Row(visible=True), lenient_f1, strict_f1, average_f1
133
 
134
 
135
  # Main
 
140
  """
141
  # Dipromats 2024 Task 2 Leaderboard
142
  # Automatic Detection of Narratives from Diplomats of Major Powers
143
+ These are the leaderboards for DIPROMATS 2024 Task 2 described in <a href=https://nlp.uned.es/dipromats2024>nlp.uned.es/dipromats2024</a>.
144
+ The Gold Standard is not publicly available so LLMs cannot be contamined with them.
145
+ However, you can submit your results here and get your system automatically evaluated.
146
+ Then you will have the choice to submit your results to the leaderboard.
147
  """)
148
  with gr.Tabs() as tabs:
149
 
150
+ # Tab English Leaderboard
151
+ with gr.TabItem("English Leaderboard", id=0):
152
  gr.Markdown(
153
  """
154
+ # English Leaderboard
 
155
  """)
156
+ leaderboard_table_en = gr.Dataframe(headers=["Team", "Run ID", "Lenient F1", "Strict F1", "Average F1"],
157
+ value=data_to_table(dataset_en),
158
+ interactive=False)
159
+
160
+ # Tab Spanish Leaderboard
161
+ with gr.TabItem("Spanish Leaderboard", id=2):
162
+ gr.Markdown(
163
+ """
164
+ # Spanish Leaderboard
165
+ """)
166
+ leaderboard_table_es = gr.Dataframe(headers=["Team", "Run ID", "Lenient F1", "Strict F1", "Average F1"],
167
+ value=data_to_table(dataset_es),
168
  interactive=False)
169
 
170
  # Tab Evaluate
 
175
  Then you can decide to submit your results to the leaderboard or not.
176
  Make sure that you upload a file with the json format described in...
177
  """)
 
 
178
  with gr.Row():
179
+ file_input = gr.File(label="Upload a JSON file", file_types=[".json"], type="filepath", file_count="single")
180
  with gr.Column():
181
+ lang = gr.Dropdown(label="Language", choices=["en", "es"], interactive=True)
182
+ evaluate_button = gr.Button("Evaluate")
 
 
 
 
 
183
 
184
  # System results table
185
  with gr.Row(visible=True):
 
188
  average_f1 = gr.Number(label="Average F1", interactive=False)
189
 
190
  # Submit to leaderboard
191
+ with gr.Column(visible=False) as submission_col:
192
+ with gr.Row():
193
+ with gr.Column():
194
+ with gr.Row():
195
+ team_input = gr.Textbox(label="Team Name")
196
+ run_id = gr.Textbox(label="Run ID")
197
+ email_input = gr.Textbox(label="Email (only for submission verification, it won't be shown)")
198
+ description_input = gr.Textbox(label="System description", lines=6)
199
+ submit_button = gr.Button("Submit to leaderboard")
200
 
201
  evaluate_button.click(process_file,
202
+ inputs=[lang, file_input],
203
+ outputs=[evaluate_button, submission_col,lenient_f1, strict_f1, average_f1])
204
+
205
  submit_button.click(update_leaderboard,
206
+ inputs=[lang, file_input, email_input, team_input, run_id, description_input, lenient_f1, strict_f1, average_f1],
207
+ outputs=[leaderboard_table_en,leaderboard_table_es, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1])
208
+
209
  leaderboard.launch()