Update app.py
Browse files
app.py
CHANGED
@@ -14,19 +14,6 @@ CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
|
|
14 |
}"""
|
15 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
16 |
|
17 |
-
|
18 |
-
head_style = """
|
19 |
-
<style>
|
20 |
-
@media (min-width: 1536px)
|
21 |
-
{
|
22 |
-
.gradio-container {
|
23 |
-
min-width: var(--size-full) !important;
|
24 |
-
}
|
25 |
-
}
|
26 |
-
</style>
|
27 |
-
"""
|
28 |
-
|
29 |
-
|
30 |
DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/dev-assets/hf-research/"
|
31 |
|
32 |
def findfile():
|
@@ -44,14 +31,12 @@ def findfile():
|
|
44 |
return model_info, results
|
45 |
|
46 |
|
47 |
-
MAIN_LEADERBOARD_DESCRIPTION = """##
|
48 |
-
|
49 |
-
- The datasets selected so far include General Knowledge Reasoning (MMLU-Pro/GPQA-Diamond), Logical Reasoning (BBH), Mathematical Reasoning (MATH-500, AIME), Code Completion (LiveCodeBench, HumanEval), and Instruction Following (IFEval).
|
50 |
-
- Currently, the evaluation primarily targets chat models, with updates featuring the latest community models at irregular intervals.
|
51 |
-
- Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆.
|
52 |
-
"""
|
53 |
|
|
|
54 |
|
|
|
55 |
|
56 |
|
57 |
def create_interface():
|
|
|
14 |
}"""
|
15 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/dev-assets/hf-research/"
|
18 |
|
19 |
def findfile():
|
|
|
31 |
return model_info, results
|
32 |
|
33 |
|
34 |
+
MAIN_LEADERBOARD_DESCRIPTION = """## Compass Academic Leaderboard
|
35 |
+
--WIP--
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
"""
|
38 |
|
39 |
+
Initial_title = 'Compass Academic Leaderboard'
|
40 |
|
41 |
|
42 |
def create_interface():
|