fixing html files handling when loading

Files changed (10) hide show

requests/HuggingFaceTB/SmolLM2-135M-Instruct_eval_request.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "model": "HuggingFaceTB/SmolLM2-135M-Instruct",
-  "model_sha": "e2c3f7557efbdec707ae3a336371d169783f1da1",
-  "status": "FINISHED",
-  "submitted_time": "2025-04-21T17:18:59Z",
-  "likes": 178,
-  "params": 0.135,
-  "license": "apache-2.0"
-}

requests/Qwen/Qwen2.5-0.5B-Instruct_eval_request.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "model": "Qwen/Qwen2.5-0.5B-Instruct",
-  "model_sha": "7ae557604adf67be50417f59c2c2f167def9a775",
-  "status": "FINISHED",
-  "submitted_time": "2025-04-21T14:43:01Z",
-  "likes": 310,
-  "params": 0.494,
-  "license": "apache-2.0"
-}

requests/open-ai/gpt-3.5-turbo_eval_request.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "model": "open-ai/gpt-3.5-turbo",
-  "model_sha": "NA",
-  "status": "FINISHED",
-  "submitted_time": "2025-04-26 16:56:14",
-  "likes": -1,
-  "params": 999,
-  "license": "closed"
-}

requests/openai-community/gpt2_eval_request.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "model": "openai-community/gpt2",
-  "model_sha": "607a30d783dfa663caf39e06633721c8d4cfcd7e",
-  "status": "FINISHED",
-  "submitted_time": "2025-04-21T14:50:23Z",
-  "likes": 2679,
-  "params": 0.137,
-  "license": "mit"
-}

results/HuggingFaceTB/SmolLM2-135M-Instruct_results_2025-04-21 17:27:52.203995.json DELETED Viewed

@@ -1,38 +0,0 @@
-{
-  "results": {
-    "average_score": 3.0,
-    "speed": 0.8272417944325482,
-    "contamination_score": 0,
-    "execution_time": 386.321918,
-    "errors": [],
-    "scores_by_category": [
-      {
-        "category": "Function Calling",
-        "average_score": 6.0,
-        "count": 2
-      },
-      {
-        "category": "Reasoning & Math",
-        "average_score": 1.0,
-        "count": 3
-      }
-    ],
-    "scores_by_format": [
-      {
-        "format": "Generation",
-        "average_score": 3.0,
-        "count": 5
-      }
-    ]
-  },
-  "config": {
-    "model": "HuggingFaceTB/SmolLM2-135M-Instruct",
-    "model_sha": "e2c3f7557efbdec707ae3a336371d169783f1da1",
-    "submitted_time": "2025-04-21T17:18:59Z",
-    "likes": 178,
-    "params": 0.135,
-    "license": "apache-2.0",
-    "model_source": "Hugging Face",
-    "model_category": "Nano"
-  }
-}

results/Qwen/Qwen2.5-0.5B-Instruct_results_2025-04-21 16:50:28.595317.json DELETED Viewed

@@ -1,38 +0,0 @@
-{
-  "results": {
-    "average_score":  6.0,
-    "speed": 5,
-    "contamination_score":  0,
-    "execution_time": 88.587424,
-    "errors": [],
-    "scores_by_category": [
-      {
-        "category": "Function Calling",
-        "average_score": 5.5,
-        "count": 2
-      },
-      {
-        "category": "Reasoning & Math",
-        "average_score": 1.6666666666666667,
-        "count": 3
-      }
-    ],
-    "scores_by_format": [
-      {
-        "format": "Generation",
-        "average_score": 3.2,
-        "count": 5
-      }
-    ]
-  },
-  "config": {
-    "model": "Qwen/Qwen2.5-0.5B-Instruct",
-    "model_sha": "7ae557604adf67be50417f59c2c2f167def9a775",
-    "model_source": "Hugging Face",
-    "model_category": "Nano",
-    "submitted_time": "2025-04-21T14:43:01Z",
-    "likes": 310,
-    "params": 0.494,
-    "license": "apache-2.0"
-  }
-}

results/open-ai/gpt-3.5-turbo_abb_benchmark_answers_2025-04-26 17:17:28.074158+00:00.html DELETED Viewed

The diff for this file is too large to render. See raw diff

results/open-ai/gpt-3.5-turbo_results_2025-04-26 17:17:26.272549+00:00.json DELETED Viewed

The diff for this file is too large to render. See raw diff

results/openai-community/gpt2_results_2025-04-21 16:59:47.547731.json DELETED Viewed

@@ -1,38 +0,0 @@
-{
-  "results": {
-    "average_score":  1.0,
-    "speed": 1.1064065631691649,
-    "contamination_score":  0,
-    "execution_time": 516.691865,
-    "errors": [],
-    "scores_by_category": [
-      {
-        "category": "Function Calling",
-        "average_score": 1.0,
-        "count": 2
-      },
-      {
-        "category": "Reasoning & Math",
-        "average_score": 1.0,
-        "count": 3
-      }
-    ],
-    "scores_by_format": [
-      {
-        "format": "Generation",
-        "average_score": 1.0,
-        "count": 5
-      }
-    ]
-  },
-  "config": {
-    "model": "openai-community/gpt2",
-    "model_sha": "607a30d783dfa663caf39e06633721c8d4cfcd7e",
-    "model_source": "Hugging Face",
-    "model_category": "Nano",
-    "submitted_time": "2025-04-21T14:50:23Z",
-    "likes": 2679,
-    "params": 0.137,
-    "license": "mit"
-  }
-}

src/leaderboard/read_evals.py CHANGED Viewed

@@ -168,16 +168,19 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
     model_result_filepaths = []
     for root, _, files in os.walk(results_path):
-        # We should only have json files in model results
-        if len(files) == 0 or any([not f.endswith(".json") for f in files]):
-            continue
         # Sort the files by date
         try:
             files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
-        except dateutil.parser._parser.ParserError:
             files = [files[-1]]
         for file in files:
             model_result_filepaths.append(os.path.join(root, file))

     model_result_filepaths = []
     for root, _, files in os.walk(results_path):
+        print("HERE",files)
+        # We should only have json files in model results ##we allow HTML files
+        #if len(files) == 0 or any([not f.endswith(".json") for f in files]):
+        #    continue
+        files = [f for f in files if f.endswith(".json")]
         # Sort the files by date
         try:
             files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
+        except dateutil.parser._parser.ParserError as e:
+            print("Error",e)
             files = [files[-1]]
+        print(files)
         for file in files:
             model_result_filepaths.append(os.path.join(root, file))