Clémentine
commited on
Commit
·
6e9fc26
1
Parent(s):
2749dc3
updated tooltips with correct citations
Browse files
frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/tooltips.js
CHANGED
|
@@ -103,7 +103,7 @@ export const COLUMN_TOOLTIPS = {
|
|
| 103 |
},
|
| 104 |
]),
|
| 105 |
|
| 106 |
-
MUSR: createTooltipContent("Multistep Soft Reasoning (
|
| 107 |
{
|
| 108 |
label: "Scope",
|
| 109 |
description: "Reasoning and understanding on/of long texts",
|
|
|
|
| 103 |
},
|
| 104 |
]),
|
| 105 |
|
| 106 |
+
MUSR: createTooltipContent("Multistep Soft Reasoning (MuSR):", [
|
| 107 |
{
|
| 108 |
label: "Scope",
|
| 109 |
description: "Reasoning and understanding on/of long texts",
|
frontend/src/pages/QuotePage/QuotePage.js
CHANGED
|
@@ -74,7 +74,7 @@ const benchmarks = [
|
|
| 74 |
url: "https://arxiv.org/abs/2311.07911",
|
| 75 |
},
|
| 76 |
{
|
| 77 |
-
title: "
|
| 78 |
authors: "Suzgun et al.",
|
| 79 |
citation: `@misc{suzgun2022challengingbigbenchtaskschainofthought,
|
| 80 |
title={Challenging BIG-Bench Tasks and Whether Chain-of-Thought Can Solve Them},
|
|
@@ -88,7 +88,7 @@ const benchmarks = [
|
|
| 88 |
url: "https://arxiv.org/abs/2210.09261",
|
| 89 |
},
|
| 90 |
{
|
| 91 |
-
title: "MATH:
|
| 92 |
authors: "Hendrycks et al.",
|
| 93 |
citation: `@misc{hendrycks2021measuringmathematicalproblemsolving,
|
| 94 |
title={Measuring Mathematical Problem Solving With the MATH Dataset},
|
|
@@ -130,7 +130,7 @@ const benchmarks = [
|
|
| 130 |
url: "https://arxiv.org/abs/2310.16049",
|
| 131 |
},
|
| 132 |
{
|
| 133 |
-
title: "MMLU-Pro:
|
| 134 |
authors: "Wang et al.",
|
| 135 |
citation: `@misc{wang2024mmluprorobustchallengingmultitask,
|
| 136 |
title={MMLU-Pro: A More Robust and Challenging Multi-Task Language Understanding Benchmark},
|
|
|
|
| 74 |
url: "https://arxiv.org/abs/2311.07911",
|
| 75 |
},
|
| 76 |
{
|
| 77 |
+
title: "BBH: Big-Bench Hard",
|
| 78 |
authors: "Suzgun et al.",
|
| 79 |
citation: `@misc{suzgun2022challengingbigbenchtaskschainofthought,
|
| 80 |
title={Challenging BIG-Bench Tasks and Whether Chain-of-Thought Can Solve Them},
|
|
|
|
| 88 |
url: "https://arxiv.org/abs/2210.09261",
|
| 89 |
},
|
| 90 |
{
|
| 91 |
+
title: "MATH: Mathematics Aptitude Test of Heuristics - Level 5",
|
| 92 |
authors: "Hendrycks et al.",
|
| 93 |
citation: `@misc{hendrycks2021measuringmathematicalproblemsolving,
|
| 94 |
title={Measuring Mathematical Problem Solving With the MATH Dataset},
|
|
|
|
| 130 |
url: "https://arxiv.org/abs/2310.16049",
|
| 131 |
},
|
| 132 |
{
|
| 133 |
+
title: "MMLU-Pro: Massive Multitask Language Understanding Professional",
|
| 134 |
authors: "Wang et al.",
|
| 135 |
citation: `@misc{wang2024mmluprorobustchallengingmultitask,
|
| 136 |
title={MMLU-Pro: A More Robust and Challenging Multi-Task Language Understanding Benchmark},
|