Spaces:
Sleeping
Sleeping
Commit
·
6d8d412
1
Parent(s):
efa3bcc
update scripts
Browse files- src/display/about.py +22 -22
src/display/about.py
CHANGED
|
@@ -12,26 +12,26 @@ class Tasks(Enum):
|
|
| 12 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 13 |
Overall = Task("overall_js_divergence", "overall_js_divergence", "Overall Humanlike %")
|
| 14 |
Overall_ci = Task("overall_confidence_interval", "overall_confidence_interval", "Overall CI")
|
| 15 |
-
E1 = Task("E1", "E1", "
|
| 16 |
-
E1_ci = Task("E1_ci", "E1_ci", "
|
| 17 |
-
E2 = Task("E2", "E2", "
|
| 18 |
-
E2_ci = Task("E2_ci", "E2_ci", "
|
| 19 |
-
E3 = Task("E3", "E3", "
|
| 20 |
-
E3_ci = Task("E3_ci", "E3_ci", "
|
| 21 |
-
E4 = Task("E4", "E4", "
|
| 22 |
-
E4_ci = Task("E4_ci", "E4_ci", "
|
| 23 |
-
E5 = Task("E5", "E5", "
|
| 24 |
-
E5_ci = Task("E5_ci", "E5_ci", "
|
| 25 |
-
E6 = Task("E6", "E6", "
|
| 26 |
-
E6_ci = Task("E6_ci", "E6_ci", "
|
| 27 |
-
E7 = Task("E7", "E7", "
|
| 28 |
-
E7_ci = Task("E7_ci", "E7_ci", "
|
| 29 |
-
E8 = Task("E8", "E8", "
|
| 30 |
-
E8_ci = Task("E8_ci", "E8_ci", "
|
| 31 |
-
E9 = Task("E9", "E9", "
|
| 32 |
-
E9_ci = Task("E9_ci", "E9_ci", "
|
| 33 |
-
E10 = Task("E10", "E10", "
|
| 34 |
-
E10_ci = Task("E10_ci", "E10_ci", "
|
| 35 |
|
| 36 |
|
| 37 |
|
|
@@ -51,8 +51,8 @@ LLM_BENCHMARKS_TEXT = """
|
|
| 51 |
|
| 52 |
This study aims to compare the similarities between human and model responses in language use by employing ten psycholinguistic tasks:
|
| 53 |
|
| 54 |
-
1. **
|
| 55 |
-
2. **
|
| 56 |
3. **Word:** Word Length and Predictivity<br>
|
| 57 |
4. **Word:** Word Meaning Priming<br>
|
| 58 |
5. **Syntax:** Structural Priming<br>
|
|
|
|
| 12 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 13 |
Overall = Task("overall_js_divergence", "overall_js_divergence", "Overall Humanlike %")
|
| 14 |
Overall_ci = Task("overall_confidence_interval", "overall_confidence_interval", "Overall CI")
|
| 15 |
+
E1 = Task("E1", "E1", "Sound-1")
|
| 16 |
+
E1_ci = Task("E1_ci", "E1_ci", "Sound-1 CI")
|
| 17 |
+
E2 = Task("E2", "E2", "Sound-2")
|
| 18 |
+
E2_ci = Task("E2_ci", "E2_ci", "Sound-2 CI")
|
| 19 |
+
E3 = Task("E3", "E3", "Word-1")
|
| 20 |
+
E3_ci = Task("E3_ci", "E3_ci", "Word-1 CI")
|
| 21 |
+
E4 = Task("E4", "E4", "Word-2")
|
| 22 |
+
E4_ci = Task("E4_ci", "E4_ci", "Word-2 CI")
|
| 23 |
+
E5 = Task("E5", "E5", "Syntax-1")
|
| 24 |
+
E5_ci = Task("E5_ci", "E5_ci", "Syntax-1 CI")
|
| 25 |
+
E6 = Task("E6", "E6", "Syntax-2")
|
| 26 |
+
E6_ci = Task("E6_ci", "E6_ci", "Syntax-2 CI")
|
| 27 |
+
E7 = Task("E7", "E7", "Meaning-1")
|
| 28 |
+
E7_ci = Task("E7_ci", "E7_ci", "Meaning-1 CI")
|
| 29 |
+
E8 = Task("E8", "E8", "Meaning-2")
|
| 30 |
+
E8_ci = Task("E8_ci", "E8_ci", "Meaning-2 CI")
|
| 31 |
+
E9 = Task("E9", "E9", "Discourse-1")
|
| 32 |
+
E9_ci = Task("E9_ci", "E9_ci", "Discourse-1 CI")
|
| 33 |
+
E10 = Task("E10", "E10", "Discourse-2")
|
| 34 |
+
E10_ci = Task("E10_ci", "E10_ci", "Discourse-2 CI")
|
| 35 |
|
| 36 |
|
| 37 |
|
|
|
|
| 51 |
|
| 52 |
This study aims to compare the similarities between human and model responses in language use by employing ten psycholinguistic tasks:
|
| 53 |
|
| 54 |
+
1. **Sound:** Sound Shape Association<br>
|
| 55 |
+
2. **Sound:** Sound Gender Association<br>
|
| 56 |
3. **Word:** Word Length and Predictivity<br>
|
| 57 |
4. **Word:** Word Meaning Priming<br>
|
| 58 |
5. **Syntax:** Structural Priming<br>
|