Spaces:

open-llm-leaderboard
/

open_llm_leaderboard

Running on CPU Upgrade

App Files Files Community

1147

Alina Lozovskaia commited on Apr 24, 2024

Commit

dadbd30

1 Parent(s): 705a80c

improved logging

Browse files

Files changed (2) hide show

src/leaderboard/filter_models.py +4 -3
src/leaderboard/read_evals.py +23 -11

src/leaderboard/filter_models.py CHANGED Viewed

@@ -1,6 +1,9 @@
 from src.display.formatting import model_hyperlink
 from src.display.utils import AutoEvalColumn
 # Models which have been flagged by users as being problematic for a reason or another
 # (Model name to forum discussion link)
 FLAGGED_MODELS = {
@@ -137,10 +140,8 @@ def flag_models(leaderboard_data: list[dict]):
             flag_key = "merged"
         else:
             flag_key = model_data[AutoEvalColumn.fullname.name]
-        print(f"model check: {flag_key}")
         if flag_key in FLAGGED_MODELS:
-            print(f"Flagged model: {flag_key}")
             issue_num = FLAGGED_MODELS[flag_key].split("/")[-1]
             issue_link = model_hyperlink(
                 FLAGGED_MODELS[flag_key],

+import logging
 from src.display.formatting import model_hyperlink
 from src.display.utils import AutoEvalColumn
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Models which have been flagged by users as being problematic for a reason or another
 # (Model name to forum discussion link)
 FLAGGED_MODELS = {
             flag_key = "merged"
         else:
             flag_key = model_data[AutoEvalColumn.fullname.name]
         if flag_key in FLAGGED_MODELS:
+            logging.info(f"Flagged model: {flag_key}")
             issue_num = FLAGGED_MODELS[flag_key].split("/")[-1]
             issue_link = model_hyperlink(
                 FLAGGED_MODELS[flag_key],

src/leaderboard/read_evals.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 from pathlib import Path
 from json import JSONDecodeError
 import logging
 import math
@@ -96,6 +97,11 @@ class EvalResult:
         """Finds the relevant request file for the current model and updates info with it."""
         try:
             request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
             with open(request_file, "r") as f:
                 request = json.load(f)
@@ -108,7 +114,7 @@ class EvalResult:
         except FileNotFoundError:
             self.status = "FAILED"
-            logging.error(f"Request file not found for {self.org}/{self.model}")
         except JSONDecodeError:
             self.status = "FAILED"
             logging.error(f"Error decoding JSON from the request file for {self.org}/{self.model}")
@@ -173,32 +179,40 @@ def get_request_file_for_model(requests_path, model_name, precision):
     request_files.sort(reverse=True)
     # Select the correct request file based on 'status' and 'precision'
     for request_file in request_files:
         with request_file.open("r") as f:
             req_content = json.load(f)
             if req_content["status"] == "FINISHED" and req_content["precision"] == precision.split(".")[-1]:
-                return str(request_file)
     # Return empty string if no file found that matches criteria
-    return ""
 def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
     """From the path of the results folder root, extract all needed info for results"""
     model_result_filepaths = []
-    for root, _, files in os.walk(results_path):
-        # We should only have json files in model results
-        if len(files) == 0 or any([not f.endswith(".json") for f in files]):
             continue
-        # Sort the files by date
         try:
-            files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
         except dateutil.parser._parser.ParserError:
             files = [files[-1]]
         for file in files:
-            model_result_filepaths.append(os.path.join(root, file))
     with open(dynamic_path) as f:
         dynamic_data = json.load(f)
@@ -208,8 +222,6 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
         # Creation of result
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
         eval_result.update_with_request_file(requests_path)
-        if eval_result.full_model == "databricks/dbrx-base":
-            print("WE HERE")
         if eval_result.full_model in dynamic_data:
             eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
             # Hardcoding because of gating problem

 import json
 from pathlib import Path
+import dateutil.parser
 from json import JSONDecodeError
 import logging
 import math
         """Finds the relevant request file for the current model and updates info with it."""
         try:
             request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
+            if request_file is None:
+                logging.warning(f"No request file for {self.org}/{self.model}")
+                self.status = "FAILED"
+                return
             with open(request_file, "r") as f:
                 request = json.load(f)
         except FileNotFoundError:
             self.status = "FAILED"
+            logging.error(f"Request file: {request_file} not found for {self.org}/{self.model}")
         except JSONDecodeError:
             self.status = "FAILED"
             logging.error(f"Error decoding JSON from the request file for {self.org}/{self.model}")
     request_files.sort(reverse=True)
     # Select the correct request file based on 'status' and 'precision'
+    request_file = None
     for request_file in request_files:
         with request_file.open("r") as f:
             req_content = json.load(f)
             if req_content["status"] == "FINISHED" and req_content["precision"] == precision.split(".")[-1]:
+                request_file = str(request_file)
     # Return empty string if no file found that matches criteria
+    return request_file
 def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
     """From the path of the results folder root, extract all needed info for results"""
     model_result_filepaths = []
+    results_path = Path(results_path)
+    for root in results_path.rglob('*'):
+        # root is now a Path object representing directories
+        files = list(root.glob('*.json'))  # Collect all .json files directly
+        # Check if the directory is empty or contains no .json files
+        if not files:
             continue
+        # Sort the files by extracting the datetime from filenames assumed to be of the form "results_YYYYMMDD.json"
         try:
+            files.sort(key=lambda x: x.stem.removeprefix("results_"))
         except dateutil.parser._parser.ParserError:
             files = [files[-1]]
         for file in files:
+            # Construct file path correctly, ensuring no duplication of path parts
+            model_result_filepath = file.resolve()
+            model_result_filepaths.append(model_result_filepath)
     with open(dynamic_path) as f:
         dynamic_data = json.load(f)
         # Creation of result
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
         eval_result.update_with_request_file(requests_path)
         if eval_result.full_model in dynamic_data:
             eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
             # Hardcoding because of gating problem