Julio Cesar Contreras Huerta commited on
Commit
1a0754f
·
1 Parent(s): e6039dd
Files changed (2) hide show
  1. app.py +80 -203
  2. evaluate.py +34 -0
app.py CHANGED
@@ -1,204 +1,81 @@
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
- import pandas as pd
4
- from apscheduler.schedulers.background import BackgroundScheduler
5
- from huggingface_hub import snapshot_download
6
-
7
- from src.about import (
8
- CITATION_BUTTON_LABEL,
9
- CITATION_BUTTON_TEXT,
10
- EVALUATION_QUEUE_TEXT,
11
- INTRODUCTION_TEXT,
12
- LLM_BENCHMARKS_TEXT,
13
- TITLE,
14
- )
15
- from src.display.css_html_js import custom_css
16
- from src.display.utils import (
17
- BENCHMARK_COLS,
18
- COLS,
19
- EVAL_COLS,
20
- EVAL_TYPES,
21
- AutoEvalColumn,
22
- ModelType,
23
- fields,
24
- WeightType,
25
- Precision
26
- )
27
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
28
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
29
- from src.submission.submit import add_new_eval
30
-
31
-
32
- def restart_space():
33
- API.restart_space(repo_id=REPO_ID)
34
-
35
- ### Space initialisation
36
- try:
37
- print(EVAL_REQUESTS_PATH)
38
- snapshot_download(
39
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
40
- )
41
- except Exception:
42
- restart_space()
43
- try:
44
- print(EVAL_RESULTS_PATH)
45
- snapshot_download(
46
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
47
- )
48
- except Exception:
49
- restart_space()
50
-
51
-
52
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
53
-
54
- (
55
- finished_eval_queue_df,
56
- running_eval_queue_df,
57
- pending_eval_queue_df,
58
- ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
59
-
60
- def init_leaderboard(dataframe):
61
- if dataframe is None or dataframe.empty:
62
- raise ValueError("Leaderboard DataFrame is empty or None.")
63
- return Leaderboard(
64
- value=dataframe,
65
- datatype=[c.type for c in fields(AutoEvalColumn)],
66
- select_columns=SelectColumns(
67
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
- label="Select Columns to Display:",
70
- ),
71
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
72
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
- filter_columns=[
74
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
- ColumnFilter(
77
- AutoEvalColumn.params.name,
78
- type="slider",
79
- min=0.01,
80
- max=150,
81
- label="Select the number of parameters (B)",
82
- ),
83
- ColumnFilter(
84
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
- ),
86
- ],
87
- bool_checkboxgroup_label="Hide models",
88
- interactive=False,
89
- )
90
-
91
-
92
- demo = gr.Blocks(css=custom_css)
93
- with demo:
94
- gr.HTML(TITLE)
95
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
96
-
97
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
99
- leaderboard = init_leaderboard(LEADERBOARD_DF)
100
-
101
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
102
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
103
-
104
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
105
- with gr.Column():
106
- with gr.Row():
107
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
108
-
109
- with gr.Column():
110
- with gr.Accordion(
111
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
112
- open=False,
113
- ):
114
- with gr.Row():
115
- finished_eval_table = gr.components.Dataframe(
116
- value=finished_eval_queue_df,
117
- headers=EVAL_COLS,
118
- datatype=EVAL_TYPES,
119
- row_count=5,
120
- )
121
- with gr.Accordion(
122
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
123
- open=False,
124
- ):
125
- with gr.Row():
126
- running_eval_table = gr.components.Dataframe(
127
- value=running_eval_queue_df,
128
- headers=EVAL_COLS,
129
- datatype=EVAL_TYPES,
130
- row_count=5,
131
- )
132
-
133
- with gr.Accordion(
134
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
135
- open=False,
136
- ):
137
- with gr.Row():
138
- pending_eval_table = gr.components.Dataframe(
139
- value=pending_eval_queue_df,
140
- headers=EVAL_COLS,
141
- datatype=EVAL_TYPES,
142
- row_count=5,
143
- )
144
- with gr.Row():
145
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
146
-
147
- with gr.Row():
148
- with gr.Column():
149
- model_name_textbox = gr.Textbox(label="Model name")
150
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
151
- model_type = gr.Dropdown(
152
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
153
- label="Model type",
154
- multiselect=False,
155
- value=None,
156
- interactive=True,
157
- )
158
-
159
- with gr.Column():
160
- precision = gr.Dropdown(
161
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
162
- label="Precision",
163
- multiselect=False,
164
- value="float16",
165
- interactive=True,
166
- )
167
- weight_type = gr.Dropdown(
168
- choices=[i.value.name for i in WeightType],
169
- label="Weights type",
170
- multiselect=False,
171
- value="Original",
172
- interactive=True,
173
- )
174
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
175
-
176
- submit_button = gr.Button("Submit Eval")
177
- submission_result = gr.Markdown()
178
- submit_button.click(
179
- add_new_eval,
180
- [
181
- model_name_textbox,
182
- base_model_name_textbox,
183
- revision_name_textbox,
184
- precision,
185
- weight_type,
186
- model_type,
187
- ],
188
- submission_result,
189
- )
190
-
191
- with gr.Row():
192
- with gr.Accordion("📙 Citation", open=False):
193
- citation_button = gr.Textbox(
194
- value=CITATION_BUTTON_TEXT,
195
- label=CITATION_BUTTON_LABEL,
196
- lines=20,
197
- elem_id="citation-button",
198
- show_copy_button=True,
199
- )
200
-
201
- scheduler = BackgroundScheduler()
202
- scheduler.add_job(restart_space, "interval", seconds=1800)
203
- scheduler.start()
204
- demo.queue(default_concurrency_limit=40).launch()
 
1
  import gradio as gr
2
+ import requests
3
+ import os
4
+ import json
5
+ # from huggingface_hub import HfApi, HfFolder
6
+ # from evaluate import evaluate_prediction # importas tu función
7
+
8
+ REFERENCE_FILE_URL = "https://huggingface.co/datasets/juliocontrerash/my-challenge-data/resolve/main/reference.nc"
9
+ LOCAL_REF_PATH = "reference.nc"
10
+
11
+ def download_reference():
12
+ if not os.path.exists(LOCAL_REF_PATH):
13
+ r = requests.get(REFERENCE_FILE_URL)
14
+ with open(LOCAL_REF_PATH, 'wb') as f:
15
+ f.write(r.content)
16
+
17
+ download_reference() # bajamos la referencia al iniciar el Space
18
+
19
+ def evaluate_and_save(pred_file, participant_name):
20
+ """
21
+ 1. Guarda el archivo subido como local
22
+ 2. Llama a evaluate_prediction
23
+ 3. Registra los resultados en el dataset (opcional)
24
+ 4. Retorna alguna visualización / texto
25
+ """
26
+ if not pred_file:
27
+ return "No file uploaded", None
28
+
29
+ # Guardar local
30
+ pred_path = pred_file.name
31
+
32
+ # Evaluar
33
+ results = evaluate_prediction(pred_path, LOCAL_REF_PATH)
34
+
35
+ # Subir resultados a dataset en HF Hub (opcional)
36
+ # 1. Descarga submissions.jsonl
37
+ # 2. Añade una nueva línea con participant_name, results, time, etc.
38
+ # 3. `git push` o usar huggingface_hub para subir la versión actualizada
39
+
40
+ # Aqui creamos una grafica (opcional)
41
+ # Por ejemplo un plot con MRE_spectrum:
42
+ import matplotlib
43
+ matplotlib.use('Agg')
44
+ import matplotlib.pyplot as plt
45
+ import io
46
+ import base64
47
+ import numpy as np
48
+
49
+ mre_spectrum = results["mre_spectrum"]
50
+ plt.figure(figsize=(6,4))
51
+ plt.plot(np.arange(len(mre_spectrum)), mre_spectrum, label='MRE Spectrum')
52
+ plt.xlabel('Wavelength index')
53
+ plt.ylabel('Error')
54
+ plt.title('Spectral Error')
55
+ plt.legend()
56
+
57
+ buf = io.BytesIO()
58
+ plt.savefig(buf, format='png')
59
+ plt.close()
60
+ buf.seek(0)
61
+ img_str = base64.b64encode(buf.read())
62
+ img_str = "data:image/png;base64," + img_str.decode('utf-8')
63
+
64
+ message = f"Participant: {participant_name}\nMRE mean: {results['mre_mean']:.4f}\nRMSE: {results['rmse']:.4f}"
65
+ return message, img_str
66
+
67
+ with gr.Blocks() as demo:
68
+ gr.Markdown("# My Challenge\nSube tu archivo de predicciones para evaluar tu modelo.")
69
+ participant_name = gr.Textbox(label="Nombre del participante")
70
+ pred_file = gr.File(label="Subir archivo (csv, netcdf, etc.)")
71
+
72
+ output_message = gr.Textbox(label="Resultados")
73
+ output_image = gr.HTML(label="Gráfica")
74
+
75
+ submit_btn = gr.Button("Evaluar")
76
+
77
+ submit_btn.click(fn=evaluate_and_save,
78
+ inputs=[pred_file, participant_name],
79
+ outputs=[output_message, output_image])
80
+
81
+ demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
evaluate.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import xarray as xr # si usas netCDF
3
+ # o from netCDF4 import Dataset
4
+ # o import csv etc. según tu formato
5
+
6
+ def evaluate_prediction(pred_file_path, reference_file_path):
7
+ """
8
+ pred_file_path: str - Ruta al archivo subido por el participante
9
+ reference_file_path: str - Ruta a tu ground-truth, local o en la web
10
+ returns: dict - un diccionario con las métricas calculadas
11
+ """
12
+ # Ejemplo usando netCDF
13
+ pred_data = xr.open_dataset(pred_file_path)
14
+ ref_data = xr.open_dataset(reference_file_path)
15
+
16
+ # Asume que ambos tienen la misma dimensión "wavelength" o algo similar
17
+ pred_values = pred_data["spectrum"].values # shape (n_wavelengths,)
18
+ ref_values = ref_data["spectrum"].values # shape (n_wavelengths,)
19
+
20
+ # Calcular MRE por banda
21
+ mre = np.abs((pred_values - ref_values) / ref_values)
22
+
23
+ # MRE medio
24
+ mre_mean = mre.mean()
25
+
26
+ # Otras métricas
27
+ rmse = np.sqrt(((pred_values - ref_values)**2).mean())
28
+
29
+ # Retornar resultados en un dict
30
+ return {
31
+ "mre_mean": float(mre_mean),
32
+ "rmse": float(rmse),
33
+ "mre_spectrum": mre.tolist(), # El espectro de MRE completo
34
+ }