anselp commited on
Commit
0e52835
verified
1 Parent(s): a2032b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -45
app.py CHANGED
@@ -4,25 +4,19 @@ import os
4
  import random
5
  import datasets
6
 
7
- # Before, you must create a Token in User Settings to give read and write access only to the dataset
8
- try:
9
- from google.colab import userdata
10
- # Token must be copied and activated in Colab Secrets
11
- HF_TOKEN = userdata.get('HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN')
12
- except:
13
- # Assume running in HF Space
14
- # Token must be copied in a Secret under Space Settings
15
- #HF_TOKEN = os.environ['HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN']
16
- HF_TOKEN = os.getenv('HF_DIPROMATS2024_T2_LEADERBOARD_TOKEN')
17
 
18
- # Hugging Face dataset
19
- DATASET_NAME = "NLP-UNED/dipromats2024-t2_leaderboard-data"
 
 
 
 
 
 
20
  SPLIT_EN = 'results_en'
21
  SPLIT_ES = 'results_es'
22
-
23
- # Define the features with their correct data types
24
- FEATURES = datasets.Features({
25
- "email": datasets.Value("string"),
26
  "team_name": datasets.Value("string"),
27
  "run_id": datasets.Value("string"),
28
  "description": datasets.Value("string"),
@@ -30,24 +24,43 @@ FEATURES = datasets.Features({
30
  "strict_f1": datasets.Value("float64"),
31
  "average_f1": datasets.Value("float64") })
32
 
33
- # Load the English dataset or create an empty one instead
 
 
34
  try:
35
- dataset_en = datasets.load_dataset(DATASET_NAME, split=SPLIT_EN, token=HF_TOKEN)
 
 
 
 
 
 
 
 
36
 
 
 
 
 
 
 
 
 
 
37
  except Exception as e:
38
- print(f"Error loading English dataset: {e}")
39
- dataset_en = datasets.Dataset.from_dict({"email": [], "team_name": [], "run_id": [], "description": [], "lenient_f1": [], "strict_f1": [], "average_f1": []}, features=FEATURES, split=SPLIT_EN)
40
- dataset_en.push_to_hub(DATASET_NAME, split=SPLIT_EN, token=HF_TOKEN)
41
 
42
  # Load the Spanish dataset or create an empty one instead
43
  try:
44
- dataset_es = datasets.load_dataset(DATASET_NAME, split=SPLIT_ES, token=HF_TOKEN)
45
-
46
  except Exception as e:
47
- print(f"Error loading Spanish dataset: {e}")
48
- dataset_es = datasets.Dataset.from_dict({"email": [], "team_name": [], "run_id": [], "description": [], "lenient_f1": [], "strict_f1": [], "average_f1": []}, features=FEATURES, split=SPLIT_ES)
49
- dataset_es.push_to_hub(DATASET_NAME, split=SPLIT_ES, token=HF_TOKEN)
50
 
 
51
 
52
  # Funci贸n para convertir el dataset en tabla
53
  def data_to_table(dataset):
@@ -88,7 +101,6 @@ def update_leaderboard(lang, file_path, email, team_input, run_id, description,
88
  return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=1), gr.Button(visible=False), gr.Column(visible=True), team_input, run_id, description, email, file_path, lenient_f1, strict_f1, average_f1
89
 
90
  dataset = dataset.add_item({
91
- "email": email,
92
  "team_name": team_input,
93
  "run_id": run_id,
94
  "description": description,
@@ -97,7 +109,7 @@ def update_leaderboard(lang, file_path, email, team_input, run_id, description,
97
  "average_f1": average_f1
98
  })
99
  # Save change in database
100
- dataset.push_to_hub(DATASET_NAME, token=HF_TOKEN)
101
 
102
  # Update dataset in memory
103
  if lang == "en":
@@ -108,26 +120,29 @@ def update_leaderboard(lang, file_path, email, team_input, run_id, description,
108
  #output: leaderboard_table, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1
109
  return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=0), gr.Button(visible=True), gr.Column(visible=False), "", "", "", "", None, None, None, None
110
 
111
-
112
- # Funci贸n para evaluar los resultados
113
- def evaluate_results(lang, file_path):
114
- lenient_f1 = random.random()
115
- strict_f1 = random.random()
116
- average_f1 = (lenient_f1 + strict_f1) / 2
117
- return lenient_f1, strict_f1, average_f1
118
-
119
-
120
  # Funci贸n para procesar el archivo de resultados
121
  def process_file(lang, file_path):
122
- warn = False
 
123
  if not file_path:
124
  gr.Warning("File cannot be blank")
125
- warn=True
126
 
127
- if warn:
 
 
 
 
 
 
 
 
 
128
  return gr.Button(visible=True), gr.Row(visible=False), None, None, None
129
 
130
- lenient_f1, strict_f1, average_f1 = evaluate_results(lang, file_path)
 
 
131
 
132
  return gr.Button(visible=False), gr.Row(visible=True), lenient_f1, strict_f1, average_f1
133
 
@@ -201,9 +216,10 @@ with gr.Blocks() as leaderboard:
201
  evaluate_button.click(process_file,
202
  inputs=[lang, file_input],
203
  outputs=[evaluate_button, submission_col,lenient_f1, strict_f1, average_f1])
204
-
205
  submit_button.click(update_leaderboard,
206
  inputs=[lang, file_input, email_input, team_input, run_id, description_input, lenient_f1, strict_f1, average_f1],
207
- outputs=[leaderboard_table_en,leaderboard_table_es, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1])
208
-
209
- leaderboard.launch()
 
 
4
  import random
5
  import datasets
6
 
7
+ from dipromats_evaluation_v2 import evaluate_results
 
 
 
 
 
 
 
 
 
8
 
9
+ # CONSTANTS
10
+
11
+ # Hugging Face datasets
12
+
13
+ DATASET_GOLD = "NLP-UNED/dipromats2024-t2_leaderboard-gold"
14
+ FILE_GOLD = 'gold_test.json'
15
+
16
+ DATASET_RESULTS = "NLP-UNED/dipromats2024-t2_leaderboard-results"
17
  SPLIT_EN = 'results_en'
18
  SPLIT_ES = 'results_es'
19
+ FEATURES_RESULTS = datasets.Features({
 
 
 
20
  "team_name": datasets.Value("string"),
21
  "run_id": datasets.Value("string"),
22
  "description": datasets.Value("string"),
 
24
  "strict_f1": datasets.Value("float64"),
25
  "average_f1": datasets.Value("float64") })
26
 
27
+ EMPTY_RESULT={"team_name": [], "run_id": [], "description": [], "lenient_f1": [], "strict_f1": [], "average_f1": []}
28
+
29
+ # Before, you must create the Tokens in HF User Settings to give read and write access only to the datasets
30
  try:
31
+ from google.colab import userdata
32
+ # Token must be copied and activated in Colab Secrets
33
+ HF_TOKEN_GOLD = userdata.get('HF_DIPROMATS2024_T2_GOLD_TOKEN')
34
+ HF_TOKEN_RESULTS = userdata.get('HF_DIPROMATS2024_T2_RESULTS_TOKEN')
35
+ except:
36
+ # Assume running in HF Space
37
+ # Tokens must be copied in Secrets under Space Settings
38
+ HF_TOKEN_GOLD = os.getenv('HF_DIPROMATS2024_T2_GOLD_TOKEN')
39
+ HF_TOKEN_RESULTS = os.getenv('HF_DIPROMATS2024_T2_RESULTS_TOKEN')
40
 
41
+ # LOAD DATASETS
42
+
43
+ # Load the Gold Standard data
44
+ # FILE_GOLD was uploaded directly through HF web, and the default split is train
45
+ dataset_gold = datasets.load_dataset(DATASET_GOLD, split='train', data_files=FILE_GOLD, token=HF_TOKEN_GOLD)
46
+
47
+ # Load the English dataset or create an empty one instead
48
+ try:
49
+ dataset_en = datasets.load_dataset(DATASET_RESULTS, split=SPLIT_EN)
50
  except Exception as e:
51
+ print(f"Error loading English dataset: {e}. Creating it...")
52
+ dataset_en = datasets.Dataset.from_dict(EMPTY_RESULT, features=FEATURES_RESULTS, split=SPLIT_EN)
53
+ dataset_en.push_to_hub(DATASET_RESULTS, split=SPLIT_EN, token=HF_TOKEN_RESULTS)
54
 
55
  # Load the Spanish dataset or create an empty one instead
56
  try:
57
+ dataset_es = datasets.load_dataset(DATASET_RESULTS, split=SPLIT_ES)
 
58
  except Exception as e:
59
+ print(f"Error loading Spanish dataset: {e}. Creating it...")
60
+ dataset_es = datasets.Dataset.from_dict(EMPTY_RESULT, features=FEATURES_RESULTS, split=SPLIT_ES)
61
+ dataset_es.push_to_hub(DATASET_RESULTS, split=SPLIT_ES, token=HF_TOKEN_RESULTS)
62
 
63
+ # AUX FUNCTIONS
64
 
65
  # Funci贸n para convertir el dataset en tabla
66
  def data_to_table(dataset):
 
101
  return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=1), gr.Button(visible=False), gr.Column(visible=True), team_input, run_id, description, email, file_path, lenient_f1, strict_f1, average_f1
102
 
103
  dataset = dataset.add_item({
 
104
  "team_name": team_input,
105
  "run_id": run_id,
106
  "description": description,
 
109
  "average_f1": average_f1
110
  })
111
  # Save change in database
112
+ dataset.push_to_hub(DATASET_RESULTS, token=HF_TOKEN_RESULTS)
113
 
114
  # Update dataset in memory
115
  if lang == "en":
 
120
  #output: leaderboard_table, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1
121
  return data_to_table(dataset_en), data_to_table(dataset_es), gr.Tabs(selected=0), gr.Button(visible=True), gr.Column(visible=False), "", "", "", "", None, None, None, None
122
 
 
 
 
 
 
 
 
 
 
123
  # Funci贸n para procesar el archivo de resultados
124
  def process_file(lang, file_path):
125
+ global dataset_gold
126
+
127
  if not file_path:
128
  gr.Warning("File cannot be blank")
129
+ return gr.Button(visible=True), gr.Row(visible=False), None, None, None
130
 
131
+ with open(file_path, 'r') as f:
132
+ test = json.load(f)
133
+
134
+ try:
135
+ results = evaluate_results(lang, dataset_gold, test)
136
+ #print(results)
137
+ except Exception as e:
138
+ gr.Warning("Invalid JSON file or Incorrect Language")
139
+ print(f"Error in function evaluate_results: {e}.")
140
+ print(dataset_gold)
141
  return gr.Button(visible=True), gr.Row(visible=False), None, None, None
142
 
143
+ lenient_f1 = results['lenient']['micro']['scores']['f1-score']
144
+ strict_f1 = results['strict']['micro']['scores']['f1-score']
145
+ average_f1 = (lenient_f1 + strict_f1) / 2
146
 
147
  return gr.Button(visible=False), gr.Row(visible=True), lenient_f1, strict_f1, average_f1
148
 
 
216
  evaluate_button.click(process_file,
217
  inputs=[lang, file_input],
218
  outputs=[evaluate_button, submission_col,lenient_f1, strict_f1, average_f1])
219
+
220
  submit_button.click(update_leaderboard,
221
  inputs=[lang, file_input, email_input, team_input, run_id, description_input, lenient_f1, strict_f1, average_f1],
222
+ outputs=[leaderboard_table_en, leaderboard_table_es, tabs, evaluate_button, submission_col, team_input, run_id, description_input, email_input, file_input, lenient_f1, strict_f1, average_f1])
223
+
224
+ leaderboard.launch()
225
+