Spaces:
Runtime error
Runtime error
refactor: remove the legacy directory
Browse files- app.py +1 -1
- src/{leaderboard/read_evals.py → read_evals.py} +0 -0
- tests/src/leaderboard/test_read_evals.py +1 -1
- utils.py +15 -12
app.py
CHANGED
|
@@ -12,7 +12,7 @@ from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, L
|
|
| 12 |
DEFAULT_METRIC
|
| 13 |
from src.display.css_html_js import custom_css
|
| 14 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
| 15 |
-
from src.
|
| 16 |
from utils import update_table, update_metric, update_table_long_doc, upload_file, get_default_cols, submit_results
|
| 17 |
|
| 18 |
|
|
|
|
| 12 |
DEFAULT_METRIC
|
| 13 |
from src.display.css_html_js import custom_css
|
| 14 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
| 15 |
+
from src.read_evals import get_raw_eval_results, get_leaderboard_df
|
| 16 |
from utils import update_table, update_metric, update_table_long_doc, upload_file, get_default_cols, submit_results
|
| 17 |
|
| 18 |
|
src/{leaderboard/read_evals.py → read_evals.py}
RENAMED
|
File without changes
|
tests/src/leaderboard/test_read_evals.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
from pathlib import Path
|
| 2 |
|
| 3 |
-
from src.
|
| 4 |
|
| 5 |
cur_fp = Path(__file__)
|
| 6 |
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
|
| 3 |
+
from src.read_evals import FullEvalResult, get_raw_eval_results, get_leaderboard_df
|
| 4 |
|
| 5 |
cur_fp = Path(__file__)
|
| 6 |
|
utils.py
CHANGED
|
@@ -1,16 +1,16 @@
|
|
| 1 |
import json
|
| 2 |
-
from typing import List
|
| 3 |
-
import os
|
| 4 |
from datetime import datetime, timezone
|
| 5 |
from pathlib import Path
|
|
|
|
| 6 |
|
| 7 |
import pandas as pd
|
| 8 |
|
| 9 |
from src.benchmarks import BENCHMARK_COLS_QA, BENCHMARK_COLS_LONG_DOC, BenchmarksQA, BenchmarksLongDoc
|
| 10 |
-
from src.display.utils import COLS_QA, TYPES_QA, COLS_LONG_DOC, TYPES_LONG_DOC, COL_NAME_RANK, COL_NAME_AVG, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL, COL_NAME_REVISION, COL_NAME_TIMESTAMP, AutoEvalColumnQA, AutoEvalColumnLongDoc, get_default_auto_eval_column_dict
|
| 11 |
-
from src.leaderboard.read_evals import FullEvalResult, get_leaderboard_df
|
| 12 |
-
from src.envs import API, SEARCH_RESULTS_REPO, CACHE_PATH
|
| 13 |
from src.display.formatting import styled_message, styled_error
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
|
@@ -43,7 +43,7 @@ def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
|
| 43 |
return df[(df[COL_NAME_RETRIEVAL_MODEL].str.contains(query, case=False))]
|
| 44 |
|
| 45 |
|
| 46 |
-
def get_default_cols(task: str, columns: list, add_fix_cols: bool=True) -> list:
|
| 47 |
cols = []
|
| 48 |
types = []
|
| 49 |
if task == "qa":
|
|
@@ -69,8 +69,8 @@ def get_default_cols(task: str, columns: list, add_fix_cols: bool=True) -> list:
|
|
| 69 |
types = FIXED_COLS_TYPES + types
|
| 70 |
return cols, types
|
| 71 |
|
| 72 |
-
fixed_cols = get_default_auto_eval_column_dict()[:-2]
|
| 73 |
|
|
|
|
| 74 |
|
| 75 |
FIXED_COLS = [c.name for _, _, c in fixed_cols]
|
| 76 |
FIXED_COLS_TYPES = [c.type for _, _, c in fixed_cols]
|
|
@@ -160,6 +160,7 @@ def upload_file(filepath: str):
|
|
| 160 |
return filepath
|
| 161 |
return filepath
|
| 162 |
|
|
|
|
| 163 |
from huggingface_hub import ModelCard
|
| 164 |
from huggingface_hub.utils import EntryNotFoundError
|
| 165 |
|
|
@@ -177,7 +178,7 @@ def get_iso_format_timestamp():
|
|
| 177 |
return iso_format_timestamp, filename_friendly_timestamp
|
| 178 |
|
| 179 |
|
| 180 |
-
def submit_results(filepath: str, model: str, model_url: str, version: str="AIR-Bench_24.04", anonymous=False):
|
| 181 |
if not filepath.endswith(".zip"):
|
| 182 |
return styled_error(f"file uploading aborted. wrong file type: {filepath}")
|
| 183 |
|
|
@@ -187,15 +188,17 @@ def submit_results(filepath: str, model: str, model_url: str, version: str="AIR-
|
|
| 187 |
|
| 188 |
# validate model url
|
| 189 |
if not model_url.startswith("https://huggingface.co/"):
|
| 190 |
-
return styled_error(
|
|
|
|
| 191 |
|
| 192 |
# validate model card
|
| 193 |
-
repo_id=model_url.removeprefix("https://huggingface.co/")
|
| 194 |
try:
|
| 195 |
card = ModelCard.load(repo_id)
|
| 196 |
except EntryNotFoundError as e:
|
| 197 |
print(e)
|
| 198 |
-
return styled_error(
|
|
|
|
| 199 |
|
| 200 |
# rename the uploaded file
|
| 201 |
input_fp = Path(filepath)
|
|
@@ -223,7 +226,7 @@ def submit_results(filepath: str, model: str, model_url: str, version: str="AIR-
|
|
| 223 |
json.dump(output_config, f, ensure_ascii=False)
|
| 224 |
API.upload_file(
|
| 225 |
path_or_fileobj=input_folder_path / output_config_fn,
|
| 226 |
-
path_in_repo=
|
| 227 |
repo_id=SEARCH_RESULTS_REPO,
|
| 228 |
repo_type="dataset",
|
| 229 |
commit_message=f"feat: submit {model} config")
|
|
|
|
| 1 |
import json
|
|
|
|
|
|
|
| 2 |
from datetime import datetime, timezone
|
| 3 |
from pathlib import Path
|
| 4 |
+
from typing import List
|
| 5 |
|
| 6 |
import pandas as pd
|
| 7 |
|
| 8 |
from src.benchmarks import BENCHMARK_COLS_QA, BENCHMARK_COLS_LONG_DOC, BenchmarksQA, BenchmarksLongDoc
|
|
|
|
|
|
|
|
|
|
| 9 |
from src.display.formatting import styled_message, styled_error
|
| 10 |
+
from src.display.utils import COLS_QA, TYPES_QA, COLS_LONG_DOC, TYPES_LONG_DOC, COL_NAME_RANK, COL_NAME_AVG, \
|
| 11 |
+
COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL, get_default_auto_eval_column_dict
|
| 12 |
+
from src.envs import API, SEARCH_RESULTS_REPO
|
| 13 |
+
from src.read_evals import FullEvalResult, get_leaderboard_df
|
| 14 |
|
| 15 |
|
| 16 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
|
|
|
| 43 |
return df[(df[COL_NAME_RETRIEVAL_MODEL].str.contains(query, case=False))]
|
| 44 |
|
| 45 |
|
| 46 |
+
def get_default_cols(task: str, columns: list, add_fix_cols: bool = True) -> list:
|
| 47 |
cols = []
|
| 48 |
types = []
|
| 49 |
if task == "qa":
|
|
|
|
| 69 |
types = FIXED_COLS_TYPES + types
|
| 70 |
return cols, types
|
| 71 |
|
|
|
|
| 72 |
|
| 73 |
+
fixed_cols = get_default_auto_eval_column_dict()[:-2]
|
| 74 |
|
| 75 |
FIXED_COLS = [c.name for _, _, c in fixed_cols]
|
| 76 |
FIXED_COLS_TYPES = [c.type for _, _, c in fixed_cols]
|
|
|
|
| 160 |
return filepath
|
| 161 |
return filepath
|
| 162 |
|
| 163 |
+
|
| 164 |
from huggingface_hub import ModelCard
|
| 165 |
from huggingface_hub.utils import EntryNotFoundError
|
| 166 |
|
|
|
|
| 178 |
return iso_format_timestamp, filename_friendly_timestamp
|
| 179 |
|
| 180 |
|
| 181 |
+
def submit_results(filepath: str, model: str, model_url: str, version: str = "AIR-Bench_24.04", anonymous=False):
|
| 182 |
if not filepath.endswith(".zip"):
|
| 183 |
return styled_error(f"file uploading aborted. wrong file type: {filepath}")
|
| 184 |
|
|
|
|
| 188 |
|
| 189 |
# validate model url
|
| 190 |
if not model_url.startswith("https://huggingface.co/"):
|
| 191 |
+
return styled_error(
|
| 192 |
+
f"failed to submit. Model url must be a link to a valid HuggingFace model on HuggingFace space. Illegal model url: {model_url}")
|
| 193 |
|
| 194 |
# validate model card
|
| 195 |
+
repo_id = model_url.removeprefix("https://huggingface.co/")
|
| 196 |
try:
|
| 197 |
card = ModelCard.load(repo_id)
|
| 198 |
except EntryNotFoundError as e:
|
| 199 |
print(e)
|
| 200 |
+
return styled_error(
|
| 201 |
+
f"failed to submit. Model url must be a link to a valid HuggingFace model on HuggingFace space. Could not get model {repo_id}")
|
| 202 |
|
| 203 |
# rename the uploaded file
|
| 204 |
input_fp = Path(filepath)
|
|
|
|
| 226 |
json.dump(output_config, f, ensure_ascii=False)
|
| 227 |
API.upload_file(
|
| 228 |
path_or_fileobj=input_folder_path / output_config_fn,
|
| 229 |
+
path_in_repo=f"{version}/{model}/{output_config_fn}",
|
| 230 |
repo_id=SEARCH_RESULTS_REPO,
|
| 231 |
repo_type="dataset",
|
| 232 |
commit_message=f"feat: submit {model} config")
|