my-challenge

Runtime error

App Files Files Community

Julio Cesar Contreras Huerta commited on Mar 27

Commit

1a0754f

1 Parent(s): e6039dd

XCVXZCV

Browse files

Files changed (2) hide show

app.py +80 -203
evaluate.py +34 -0

app.py CHANGED Viewed

@@ -1,204 +1,81 @@
 import gradio as gr
-from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
-import pandas as pd
-from apscheduler.schedulers.background import BackgroundScheduler
-from huggingface_hub import snapshot_download
-from src.about import (
-    CITATION_BUTTON_LABEL,
-    CITATION_BUTTON_TEXT,
-    EVALUATION_QUEUE_TEXT,
-    INTRODUCTION_TEXT,
-    LLM_BENCHMARKS_TEXT,
-    TITLE,
-)
-from src.display.css_html_js import custom_css
-from src.display.utils import (
-    BENCHMARK_COLS,
-    COLS,
-    EVAL_COLS,
-    EVAL_TYPES,
-    AutoEvalColumn,
-    ModelType,
-    fields,
-    WeightType,
-    Precision
-)
-from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
-from src.populate import get_evaluation_queue_df, get_leaderboard_df
-from src.submission.submit import add_new_eval
-def restart_space():
-    API.restart_space(repo_id=REPO_ID)
-### Space initialisation
-try:
-    print(EVAL_REQUESTS_PATH)
-    snapshot_download(
-        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-try:
-    print(EVAL_RESULTS_PATH)
-    snapshot_download(
-        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
-(
-    finished_eval_queue_df,
-    running_eval_queue_df,
-    pending_eval_queue_df,
-) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
-def init_leaderboard(dataframe):
-    if dataframe is None or dataframe.empty:
-        raise ValueError("Leaderboard DataFrame is empty or None.")
-    return Leaderboard(
-        value=dataframe,
-        datatype=[c.type for c in fields(AutoEvalColumn)],
-        select_columns=SelectColumns(
-            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
-            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
-            label="Select Columns to Display:",
-        ),
-        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
-        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
-        filter_columns=[
-            ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
-            ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
-            ColumnFilter(
-                AutoEvalColumn.params.name,
-                type="slider",
-                min=0.01,
-                max=150,
-                label="Select the number of parameters (B)",
-            ),
-            ColumnFilter(
-                AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
-            ),
-        ],
-        bool_checkboxgroup_label="Hide models",
-        interactive=False,
-    )
-demo = gr.Blocks(css=custom_css)
-with demo:
-    gr.HTML(TITLE)
-    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
-    with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
-            leaderboard = init_leaderboard(LEADERBOARD_DF)
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
-            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
-            with gr.Column():
-                with gr.Row():
-                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-                with gr.Column():
-                    with gr.Accordion(
-                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            finished_eval_table = gr.components.Dataframe(
-                                value=finished_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            running_eval_table = gr.components.Dataframe(
-                                value=running_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            pending_eval_table = gr.components.Dataframe(
-                                value=pending_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-            with gr.Row():
-                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
-            with gr.Row():
-                with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name")
-                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
-                    model_type = gr.Dropdown(
-                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
-                        label="Model type",
-                        multiselect=False,
-                        value=None,
-                        interactive=True,
-                    )
-                with gr.Column():
-                    precision = gr.Dropdown(
-                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
-                        label="Precision",
-                        multiselect=False,
-                        value="float16",
-                        interactive=True,
-                    )
-                    weight_type = gr.Dropdown(
-                        choices=[i.value.name for i in WeightType],
-                        label="Weights type",
-                        multiselect=False,
-                        value="Original",
-                        interactive=True,
-                    )
-                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            submit_button = gr.Button("Submit Eval")
-            submission_result = gr.Markdown()
-            submit_button.click(
-                add_new_eval,
-                [
-                    model_name_textbox,
-                    base_model_name_textbox,
-                    revision_name_textbox,
-                    precision,
-                    weight_type,
-                    model_type,
-                ],
-                submission_result,
-            )
-    with gr.Row():
-        with gr.Accordion("📙 Citation", open=False):
-            citation_button = gr.Textbox(
-                value=CITATION_BUTTON_TEXT,
-                label=CITATION_BUTTON_LABEL,
-                lines=20,
-                elem_id="citation-button",
-                show_copy_button=True,
-            )
-scheduler = BackgroundScheduler()
-scheduler.add_job(restart_space, "interval", seconds=1800)
-scheduler.start()
-demo.queue(default_concurrency_limit=40).launch()

 import gradio as gr
+import requests
+import os
+import json
+# from huggingface_hub import HfApi, HfFolder
+# from evaluate import evaluate_prediction  # importas tu función
+REFERENCE_FILE_URL = "https://huggingface.co/datasets/juliocontrerash/my-challenge-data/resolve/main/reference.nc"
+LOCAL_REF_PATH = "reference.nc"
+def download_reference():
+    if not os.path.exists(LOCAL_REF_PATH):
+        r = requests.get(REFERENCE_FILE_URL)
+        with open(LOCAL_REF_PATH, 'wb') as f:
+            f.write(r.content)
+download_reference()  # bajamos la referencia al iniciar el Space
+def evaluate_and_save(pred_file, participant_name):
+    """
+    1. Guarda el archivo subido como local
+    2. Llama a evaluate_prediction
+    3. Registra los resultados en el dataset (opcional)
+    4. Retorna alguna visualización / texto
+    """
+    if not pred_file:
+        return "No file uploaded", None
+    # Guardar local
+    pred_path = pred_file.name
+    # Evaluar
+    results = evaluate_prediction(pred_path, LOCAL_REF_PATH)
+    # Subir resultados a dataset en HF Hub (opcional)
+    # 1. Descarga submissions.jsonl
+    # 2. Añade una nueva línea con participant_name, results, time, etc.
+    # 3. `git push` o usar huggingface_hub para subir la versión actualizada
+    # Aqui creamos una grafica (opcional)
+    # Por ejemplo un plot con MRE_spectrum:
+    import matplotlib
+    matplotlib.use('Agg')
+    import matplotlib.pyplot as plt
+    import io
+    import base64
+    import numpy as np
+    mre_spectrum = results["mre_spectrum"]
+    plt.figure(figsize=(6,4))
+    plt.plot(np.arange(len(mre_spectrum)), mre_spectrum, label='MRE Spectrum')
+    plt.xlabel('Wavelength index')
+    plt.ylabel('Error')
+    plt.title('Spectral Error')
+    plt.legend()
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png')
+    plt.close()
+    buf.seek(0)
+    img_str = base64.b64encode(buf.read())
+    img_str = "data:image/png;base64," + img_str.decode('utf-8')
+    message = f"Participant: {participant_name}\nMRE mean: {results['mre_mean']:.4f}\nRMSE: {results['rmse']:.4f}"
+    return message, img_str
+with gr.Blocks() as demo:
+    gr.Markdown("# My Challenge\nSube tu archivo de predicciones para evaluar tu modelo.")
+    participant_name = gr.Textbox(label="Nombre del participante")
+    pred_file = gr.File(label="Subir archivo (csv, netcdf, etc.)")
+    output_message = gr.Textbox(label="Resultados")
+    output_image = gr.HTML(label="Gráfica")
+    submit_btn = gr.Button("Evaluar")
+    submit_btn.click(fn=evaluate_and_save,
+                     inputs=[pred_file, participant_name],
+                     outputs=[output_message, output_image])
+demo.launch()

evaluate.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import numpy as np
+import xarray as xr  # si usas netCDF
+# o from netCDF4 import Dataset
+# o import csv etc. según tu formato
+def evaluate_prediction(pred_file_path, reference_file_path):
+    """
+    pred_file_path: str - Ruta al archivo subido por el participante
+    reference_file_path: str - Ruta a tu ground-truth, local o en la web
+    returns: dict - un diccionario con las métricas calculadas
+    """
+    # Ejemplo usando netCDF
+    pred_data = xr.open_dataset(pred_file_path)
+    ref_data  = xr.open_dataset(reference_file_path)
+    # Asume que ambos tienen la misma dimensión "wavelength" o algo similar
+    pred_values = pred_data["spectrum"].values  # shape (n_wavelengths,)
+    ref_values  = ref_data["spectrum"].values   # shape (n_wavelengths,)
+    # Calcular MRE por banda
+    mre = np.abs((pred_values - ref_values) / ref_values)
+    # MRE medio
+    mre_mean = mre.mean()
+    # Otras métricas
+    rmse = np.sqrt(((pred_values - ref_values)**2).mean())
+    # Retornar resultados en un dict
+    return {
+        "mre_mean": float(mre_mean),
+        "rmse": float(rmse),
+        "mre_spectrum": mre.tolist(),  # El espectro de MRE completo
+    }