boatbomber commited on
Commit
8fcbcdb
·
1 Parent(s): f6862c9

Update task description

Browse files
Files changed (1) hide show
  1. src/about.py +19 -6
src/about.py CHANGED
@@ -1,13 +1,26 @@
1
- # Your leaderboard name
2
  TITLE = """<h1 align="center" id="space-title">Roblox LLM Leaderboard</h1>"""
3
 
4
- # What does your leaderboard evaluate?
5
- INTRODUCTION_TEXT = """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  Tracking LLM capabilities regarding Roblox game development.
7
 
8
  ### Benchmarks:
9
 
10
- - [RobloxQA](https://huggingface.co/datasets/boatbomber/RobloxQA-v1.0): Multiple choice question answering about Roblox APIs and concepts.
11
- - [RobloxQA_OpenEnded](https://huggingface.co/datasets/boatbomber/RobloxQA-OpenEnded-v1.0): Question answering about Roblox APIs and concepts without multiple choices. \
12
- Correctness judged by an LLM by comparing the generated answer to the correct answer.
13
  """
 
 
1
  TITLE = """<h1 align="center" id="space-title">Roblox LLM Leaderboard</h1>"""
2
 
3
+
4
+ TASK_DESCRIPTIONS = [
5
+ {
6
+ "name": "RobloxQA",
7
+ "link": "https://huggingface.co/datasets/boatbomber/RobloxQA-v1.0",
8
+ "description": "Multiple choice question answering about Roblox APIs and concepts.",
9
+ },
10
+ {
11
+ "name": "RobloxQA_OpenEnded",
12
+ "link": "https://huggingface.co/datasets/boatbomber/RobloxQA-OpenEnded-v1.0",
13
+ "description": (
14
+ "Question answering about Roblox APIs and concepts without multiple choices. "
15
+ "Response correctness judged by an ensemble of reasoning LLMs by comparing the generated answer to the correct answer."
16
+ ),
17
+ },
18
+ ]
19
+
20
+ INTRODUCTION_TEXT = f"""
21
  Tracking LLM capabilities regarding Roblox game development.
22
 
23
  ### Benchmarks:
24
 
25
+ {"\n".join([f"- [{task['name']}]({task['link']}): {task['description']}" for task in TASK_DESCRIPTIONS])}
 
 
26
  """