Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat: add submition implementation
Browse files
app.py
CHANGED
|
@@ -299,14 +299,22 @@ with demo:
|
|
| 299 |
interactive=True,
|
| 300 |
label="AIR-Bench Version")
|
| 301 |
with gr.Column():
|
| 302 |
-
|
| 303 |
with gr.Column():
|
| 304 |
model_url = gr.Textbox(label="Model URL")
|
| 305 |
with gr.Row():
|
| 306 |
file_output = gr.File()
|
| 307 |
with gr.Row():
|
| 308 |
-
upload_button = gr.UploadButton("Click to submit evaluation", file_count="
|
| 309 |
-
upload_button.upload(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
|
| 311 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
|
| 312 |
gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
|
| 299 |
interactive=True,
|
| 300 |
label="AIR-Bench Version")
|
| 301 |
with gr.Column():
|
| 302 |
+
model_name = gr.Textbox(label="Model name")
|
| 303 |
with gr.Column():
|
| 304 |
model_url = gr.Textbox(label="Model URL")
|
| 305 |
with gr.Row():
|
| 306 |
file_output = gr.File()
|
| 307 |
with gr.Row():
|
| 308 |
+
upload_button = gr.UploadButton("Click to submit evaluation", file_count="single")
|
| 309 |
+
upload_button.upload(
|
| 310 |
+
upload_file,
|
| 311 |
+
[
|
| 312 |
+
upload_button,
|
| 313 |
+
model_name,
|
| 314 |
+
model_url,
|
| 315 |
+
benchmark_version,
|
| 316 |
+
],
|
| 317 |
+
file_output)
|
| 318 |
|
| 319 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
|
| 320 |
gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text")
|
utils.py
CHANGED
|
@@ -1,10 +1,17 @@
|
|
|
|
|
| 1 |
from typing import List
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
import pandas as pd
|
| 4 |
|
| 5 |
from src.benchmarks import BENCHMARK_COLS_QA, BENCHMARK_COLS_LONG_DOC, BenchmarksQA, BenchmarksLongDoc
|
| 6 |
from src.display.utils import AutoEvalColumnQA, AutoEvalColumnLongDoc, COLS_QA, COLS_LONG_DOC, COL_NAME_RANK, COL_NAME_AVG, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL
|
| 7 |
from src.leaderboard.read_evals import FullEvalResult, get_leaderboard_df
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
|
@@ -131,12 +138,40 @@ def update_metric(
|
|
| 131 |
)
|
| 132 |
|
| 133 |
|
| 134 |
-
def upload_file(
|
| 135 |
-
|
| 136 |
-
print(f"file uploaded: {
|
| 137 |
-
#
|
| 138 |
-
#
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
from typing import List
|
| 3 |
+
import os
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
import pytz
|
| 8 |
|
| 9 |
import pandas as pd
|
| 10 |
|
| 11 |
from src.benchmarks import BENCHMARK_COLS_QA, BENCHMARK_COLS_LONG_DOC, BenchmarksQA, BenchmarksLongDoc
|
| 12 |
from src.display.utils import AutoEvalColumnQA, AutoEvalColumnLongDoc, COLS_QA, COLS_LONG_DOC, COL_NAME_RANK, COL_NAME_AVG, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL
|
| 13 |
from src.leaderboard.read_evals import FullEvalResult, get_leaderboard_df
|
| 14 |
+
from src.envs import API, SEARCH_RESULTS_REPO, CACHE_PATH
|
| 15 |
|
| 16 |
|
| 17 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
|
|
|
| 138 |
)
|
| 139 |
|
| 140 |
|
| 141 |
+
def upload_file(
|
| 142 |
+
filepath: str, model: str, model_url: str, version: str="AIR-Bench_24.04"):
|
| 143 |
+
print(f"file uploaded: {filepath}")
|
| 144 |
+
# model = "bge-small-en-v1.5"
|
| 145 |
+
# version = "AIR-Bench_24.04"
|
| 146 |
+
if not filepath.endswith(".zip"):
|
| 147 |
+
print(f"file uploading aborted. wrong file type: {filepath}")
|
| 148 |
+
return filepath
|
| 149 |
+
|
| 150 |
+
# rename the uploaded file
|
| 151 |
+
input_fp = Path(filepath)
|
| 152 |
+
timezone = pytz.timezone('UTC')
|
| 153 |
+
timestamp = datetime.now(timezone).strftime('%Y%m%d%H%M%S')
|
| 154 |
+
output_fn = f"{timestamp}-{input_fp.name}"
|
| 155 |
+
input_folder_path = input_fp.parent
|
| 156 |
+
API.upload_file(
|
| 157 |
+
path_or_fileobj=filepath,
|
| 158 |
+
path_in_repo=f"{version}/{model}/{output_fn}",
|
| 159 |
+
repo_id=SEARCH_RESULTS_REPO,
|
| 160 |
+
repo_type="dataset",
|
| 161 |
+
commit_message=f"feat: submit {model} to evaluate")
|
| 162 |
+
|
| 163 |
+
output_config_fn = f"{output_fn.removesuffix('.zip')}.json"
|
| 164 |
+
output_config = {
|
| 165 |
+
"model_name": f"{model}",
|
| 166 |
+
"model_url": f"{model_url}",
|
| 167 |
+
"version": f"{version}"
|
| 168 |
+
}
|
| 169 |
+
with open(input_folder_path / output_config_fn, "w") as f:
|
| 170 |
+
json.dump(output_config, f, ensure_ascii=False)
|
| 171 |
+
API.upload_file(
|
| 172 |
+
path_or_fileobj=input_folder_path / output_config_fn,
|
| 173 |
+
path_in_repo= f"{version}/{model}/{output_config_fn}",
|
| 174 |
+
repo_id=SEARCH_RESULTS_REPO,
|
| 175 |
+
repo_type="dataset",
|
| 176 |
+
commit_message=f"feat: submit {model} config")
|
| 177 |
+
return filepath
|