boatbomber commited on
Commit
bed23b0
·
1 Parent(s): 5a34fee

Add RobloxQA_OpenEnded

Browse files
src/about.py CHANGED
@@ -8,4 +8,6 @@ Tracking LLM capabilities regarding Roblox game development.
8
  Benchmarks:
9
 
10
  - [RobloxQA](https://huggingface.co/datasets/boatbomber/RobloxQA-v1.0): Multiple choice question answering about Roblox APIs and concepts.
 
 
11
  """
 
8
  Benchmarks:
9
 
10
  - [RobloxQA](https://huggingface.co/datasets/boatbomber/RobloxQA-v1.0): Multiple choice question answering about Roblox APIs and concepts.
11
+ - [RobloxQA_OpenEnded](https://huggingface.co/datasets/boatbomber/RobloxQA-v1.0): Question answering about Roblox APIs and concepts without giving the multiple choices. \
12
+ Correctness judged by an LLM by comparing the generated answer to the correct answer choice.
13
  """
src/leaderboard/populate.py CHANGED
@@ -42,7 +42,7 @@ def load_results() -> pd.DataFrame:
42
 
43
  for c in COLUMNS:
44
  if c.name not in evaluation:
45
- evaluation[c.name] = None
46
 
47
  data.append(evaluation)
48
 
 
42
 
43
  for c in COLUMNS:
44
  if c.name not in evaluation:
45
+ evaluation[c.name] = c.default
46
 
47
  data.append(evaluation)
48
 
src/leaderboard/utils.py CHANGED
@@ -1,4 +1,5 @@
1
  from dataclasses import dataclass
 
2
 
3
 
4
  @dataclass
@@ -9,6 +10,7 @@ class ColumnContent:
9
  hidden: bool = False
10
  never_hidden: bool = False
11
  searchable: bool = False
 
12
 
13
 
14
  ## Leaderboard columns
@@ -16,6 +18,7 @@ COLUMNS = [
16
  ColumnContent("Model", type="str", displayed_by_default=True, never_hidden=True, searchable=True),
17
  ColumnContent("Precision", type="str", displayed_by_default=False),
18
  ColumnContent("Params (B)", type="number", displayed_by_default=True),
19
- ColumnContent("Average", type="number", displayed_by_default=True),
20
- ColumnContent("RobloxQA", type="number", displayed_by_default=True),
 
21
  ]
 
1
  from dataclasses import dataclass
2
+ from typing import Any
3
 
4
 
5
  @dataclass
 
10
  hidden: bool = False
11
  never_hidden: bool = False
12
  searchable: bool = False
13
+ default: Any = None
14
 
15
 
16
  ## Leaderboard columns
 
18
  ColumnContent("Model", type="str", displayed_by_default=True, never_hidden=True, searchable=True),
19
  ColumnContent("Precision", type="str", displayed_by_default=False),
20
  ColumnContent("Params (B)", type="number", displayed_by_default=True),
21
+ ColumnContent("Average", type="number", displayed_by_default=True, default=0),
22
+ ColumnContent("RobloxQA", type="number", displayed_by_default=True, default=0),
23
+ ColumnContent("RobloxQA_OpenEnded", type="number", displayed_by_default=True, default=0),
24
  ]