Clémentine
commited on
Commit
·
c1b8a96
1
Parent(s):
910a08e
doc
Browse files- src/leaderboard/read_evals.py +2 -0
- src/populate.py +2 -0
- src/submission/check_validity.py +2 -0
src/leaderboard/read_evals.py
CHANGED
|
@@ -14,6 +14,8 @@ from src.submission.check_validity import is_model_on_hub
|
|
| 14 |
|
| 15 |
@dataclass
|
| 16 |
class EvalResult:
|
|
|
|
|
|
|
| 17 |
eval_name: str # org_model_precision (uid)
|
| 18 |
full_model: str # org/model (path on hub)
|
| 19 |
org: str
|
|
|
|
| 14 |
|
| 15 |
@dataclass
|
| 16 |
class EvalResult:
|
| 17 |
+
"""Represents one full evaluation. Built from a combination of the result and request file for a given run.
|
| 18 |
+
"""
|
| 19 |
eval_name: str # org_model_precision (uid)
|
| 20 |
full_model: str # org/model (path on hub)
|
| 21 |
org: str
|
src/populate.py
CHANGED
|
@@ -9,6 +9,7 @@ from src.leaderboard.read_evals import get_raw_eval_results
|
|
| 9 |
|
| 10 |
|
| 11 |
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
|
|
|
| 12 |
raw_data = get_raw_eval_results(results_path, requests_path)
|
| 13 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 14 |
|
|
@@ -22,6 +23,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
| 22 |
|
| 23 |
|
| 24 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
|
|
| 25 |
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
|
| 26 |
all_evals = []
|
| 27 |
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
| 12 |
+
"""Creates a dataframe from all the individual experiment results"""
|
| 13 |
raw_data = get_raw_eval_results(results_path, requests_path)
|
| 14 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 15 |
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
| 26 |
+
"""Creates the different dataframes for the evaluation queues requestes"""
|
| 27 |
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
|
| 28 |
all_evals = []
|
| 29 |
|
src/submission/check_validity.py
CHANGED
|
@@ -32,6 +32,7 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
|
|
| 32 |
return True, ""
|
| 33 |
|
| 34 |
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
|
|
|
|
| 35 |
try:
|
| 36 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
| 37 |
if test_tokenizer:
|
|
@@ -74,6 +75,7 @@ def get_model_arch(model_info: ModelInfo):
|
|
| 74 |
return model_info.config.get("architectures", "Unknown")
|
| 75 |
|
| 76 |
def already_submitted_models(requested_models_dir: str) -> set[str]:
|
|
|
|
| 77 |
depth = 1
|
| 78 |
file_names = []
|
| 79 |
users_to_submission_dates = defaultdict(list)
|
|
|
|
| 32 |
return True, ""
|
| 33 |
|
| 34 |
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
|
| 35 |
+
"""Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
|
| 36 |
try:
|
| 37 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
| 38 |
if test_tokenizer:
|
|
|
|
| 75 |
return model_info.config.get("architectures", "Unknown")
|
| 76 |
|
| 77 |
def already_submitted_models(requested_models_dir: str) -> set[str]:
|
| 78 |
+
"""Gather a list of already submitted models to avoid duplicates"""
|
| 79 |
depth = 1
|
| 80 |
file_names = []
|
| 81 |
users_to_submission_dates = defaultdict(list)
|