Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaia
commited on
Commit
·
dadbd30
1
Parent(s):
705a80c
improved logging
Browse files
src/leaderboard/filter_models.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
|
|
| 1 |
from src.display.formatting import model_hyperlink
|
| 2 |
from src.display.utils import AutoEvalColumn
|
| 3 |
|
|
|
|
|
|
|
| 4 |
# Models which have been flagged by users as being problematic for a reason or another
|
| 5 |
# (Model name to forum discussion link)
|
| 6 |
FLAGGED_MODELS = {
|
|
@@ -137,10 +140,8 @@ def flag_models(leaderboard_data: list[dict]):
|
|
| 137 |
flag_key = "merged"
|
| 138 |
else:
|
| 139 |
flag_key = model_data[AutoEvalColumn.fullname.name]
|
| 140 |
-
|
| 141 |
-
print(f"model check: {flag_key}")
|
| 142 |
if flag_key in FLAGGED_MODELS:
|
| 143 |
-
|
| 144 |
issue_num = FLAGGED_MODELS[flag_key].split("/")[-1]
|
| 145 |
issue_link = model_hyperlink(
|
| 146 |
FLAGGED_MODELS[flag_key],
|
|
|
|
| 1 |
+
import logging
|
| 2 |
from src.display.formatting import model_hyperlink
|
| 3 |
from src.display.utils import AutoEvalColumn
|
| 4 |
|
| 5 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 6 |
+
|
| 7 |
# Models which have been flagged by users as being problematic for a reason or another
|
| 8 |
# (Model name to forum discussion link)
|
| 9 |
FLAGGED_MODELS = {
|
|
|
|
| 140 |
flag_key = "merged"
|
| 141 |
else:
|
| 142 |
flag_key = model_data[AutoEvalColumn.fullname.name]
|
|
|
|
|
|
|
| 143 |
if flag_key in FLAGGED_MODELS:
|
| 144 |
+
logging.info(f"Flagged model: {flag_key}")
|
| 145 |
issue_num = FLAGGED_MODELS[flag_key].split("/")[-1]
|
| 146 |
issue_link = model_hyperlink(
|
| 147 |
FLAGGED_MODELS[flag_key],
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import json
|
| 2 |
from pathlib import Path
|
|
|
|
| 3 |
from json import JSONDecodeError
|
| 4 |
import logging
|
| 5 |
import math
|
|
@@ -96,6 +97,11 @@ class EvalResult:
|
|
| 96 |
"""Finds the relevant request file for the current model and updates info with it."""
|
| 97 |
try:
|
| 98 |
request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
with open(request_file, "r") as f:
|
| 100 |
request = json.load(f)
|
| 101 |
|
|
@@ -108,7 +114,7 @@ class EvalResult:
|
|
| 108 |
|
| 109 |
except FileNotFoundError:
|
| 110 |
self.status = "FAILED"
|
| 111 |
-
logging.error(f"Request file not found for {self.org}/{self.model}")
|
| 112 |
except JSONDecodeError:
|
| 113 |
self.status = "FAILED"
|
| 114 |
logging.error(f"Error decoding JSON from the request file for {self.org}/{self.model}")
|
|
@@ -173,32 +179,40 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
| 173 |
request_files.sort(reverse=True)
|
| 174 |
|
| 175 |
# Select the correct request file based on 'status' and 'precision'
|
|
|
|
| 176 |
for request_file in request_files:
|
| 177 |
with request_file.open("r") as f:
|
| 178 |
req_content = json.load(f)
|
| 179 |
if req_content["status"] == "FINISHED" and req_content["precision"] == precision.split(".")[-1]:
|
| 180 |
-
|
| 181 |
|
| 182 |
# Return empty string if no file found that matches criteria
|
| 183 |
-
return
|
| 184 |
|
| 185 |
def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
|
| 186 |
"""From the path of the results folder root, extract all needed info for results"""
|
| 187 |
model_result_filepaths = []
|
| 188 |
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
continue
|
| 193 |
|
| 194 |
-
# Sort the files by
|
| 195 |
try:
|
| 196 |
-
files.sort(key=lambda x: x.
|
| 197 |
except dateutil.parser._parser.ParserError:
|
| 198 |
files = [files[-1]]
|
| 199 |
|
| 200 |
for file in files:
|
| 201 |
-
|
|
|
|
|
|
|
| 202 |
|
| 203 |
with open(dynamic_path) as f:
|
| 204 |
dynamic_data = json.load(f)
|
|
@@ -208,8 +222,6 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
|
|
| 208 |
# Creation of result
|
| 209 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 210 |
eval_result.update_with_request_file(requests_path)
|
| 211 |
-
if eval_result.full_model == "databricks/dbrx-base":
|
| 212 |
-
print("WE HERE")
|
| 213 |
if eval_result.full_model in dynamic_data:
|
| 214 |
eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
|
| 215 |
# Hardcoding because of gating problem
|
|
|
|
| 1 |
import json
|
| 2 |
from pathlib import Path
|
| 3 |
+
import dateutil.parser
|
| 4 |
from json import JSONDecodeError
|
| 5 |
import logging
|
| 6 |
import math
|
|
|
|
| 97 |
"""Finds the relevant request file for the current model and updates info with it."""
|
| 98 |
try:
|
| 99 |
request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
|
| 100 |
+
if request_file is None:
|
| 101 |
+
logging.warning(f"No request file for {self.org}/{self.model}")
|
| 102 |
+
self.status = "FAILED"
|
| 103 |
+
return
|
| 104 |
+
|
| 105 |
with open(request_file, "r") as f:
|
| 106 |
request = json.load(f)
|
| 107 |
|
|
|
|
| 114 |
|
| 115 |
except FileNotFoundError:
|
| 116 |
self.status = "FAILED"
|
| 117 |
+
logging.error(f"Request file: {request_file} not found for {self.org}/{self.model}")
|
| 118 |
except JSONDecodeError:
|
| 119 |
self.status = "FAILED"
|
| 120 |
logging.error(f"Error decoding JSON from the request file for {self.org}/{self.model}")
|
|
|
|
| 179 |
request_files.sort(reverse=True)
|
| 180 |
|
| 181 |
# Select the correct request file based on 'status' and 'precision'
|
| 182 |
+
request_file = None
|
| 183 |
for request_file in request_files:
|
| 184 |
with request_file.open("r") as f:
|
| 185 |
req_content = json.load(f)
|
| 186 |
if req_content["status"] == "FINISHED" and req_content["precision"] == precision.split(".")[-1]:
|
| 187 |
+
request_file = str(request_file)
|
| 188 |
|
| 189 |
# Return empty string if no file found that matches criteria
|
| 190 |
+
return request_file
|
| 191 |
|
| 192 |
def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
|
| 193 |
"""From the path of the results folder root, extract all needed info for results"""
|
| 194 |
model_result_filepaths = []
|
| 195 |
|
| 196 |
+
results_path = Path(results_path)
|
| 197 |
+
|
| 198 |
+
for root in results_path.rglob('*'):
|
| 199 |
+
# root is now a Path object representing directories
|
| 200 |
+
files = list(root.glob('*.json')) # Collect all .json files directly
|
| 201 |
+
|
| 202 |
+
# Check if the directory is empty or contains no .json files
|
| 203 |
+
if not files:
|
| 204 |
continue
|
| 205 |
|
| 206 |
+
# Sort the files by extracting the datetime from filenames assumed to be of the form "results_YYYYMMDD.json"
|
| 207 |
try:
|
| 208 |
+
files.sort(key=lambda x: x.stem.removeprefix("results_"))
|
| 209 |
except dateutil.parser._parser.ParserError:
|
| 210 |
files = [files[-1]]
|
| 211 |
|
| 212 |
for file in files:
|
| 213 |
+
# Construct file path correctly, ensuring no duplication of path parts
|
| 214 |
+
model_result_filepath = file.resolve()
|
| 215 |
+
model_result_filepaths.append(model_result_filepath)
|
| 216 |
|
| 217 |
with open(dynamic_path) as f:
|
| 218 |
dynamic_data = json.load(f)
|
|
|
|
| 222 |
# Creation of result
|
| 223 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 224 |
eval_result.update_with_request_file(requests_path)
|
|
|
|
|
|
|
| 225 |
if eval_result.full_model in dynamic_data:
|
| 226 |
eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
|
| 227 |
# Hardcoding because of gating problem
|