Spaces:
Running
Running
mehran
commited on
Commit
·
352d5dc
1
Parent(s):
91ff46d
update
Browse files- about.py +1 -1
- app.py +1 -1
- leaderboard/leaderboard_config.yaml +2 -2
about.py
CHANGED
@@ -16,7 +16,7 @@ def render_about():
|
|
16 |
|
17 |
with gr.Accordion("1. PerCoR (Persian Commonsense Reasoning)", open=False):
|
18 |
gr.Markdown("""
|
19 |
-
|
20 |
""")
|
21 |
|
22 |
with gr.Accordion("2. Persian IFEval (Persian Instruction Following Evaluation)", open=False):
|
|
|
16 |
|
17 |
with gr.Accordion("1. PerCoR (Persian Commonsense Reasoning)", open=False):
|
18 |
gr.Markdown("""
|
19 |
+
PerCoR is the first large-scale Persian benchmark for evaluating models' ability in **commonsense reasoning** through multi-choice sentence completion. It includes over 106,000 samples from diverse domains such as news, religion, and lifestyle, extracted from more than 40 Persian websites. Innovative methods like "segmentation by conjunctions" were used to create coherent and diverse sentences and options, while the DRESS-AF technique helped generate challenging, human-solvable distractors.
|
20 |
""")
|
21 |
|
22 |
with gr.Accordion("2. Persian IFEval (Persian Instruction Following Evaluation)", open=False):
|
app.py
CHANGED
@@ -25,7 +25,7 @@ def create_app():
|
|
25 |
logger.info("Initializing MIZAN: A Persian LLM Leaderboard application...")
|
26 |
|
27 |
# Define the path to the leaderboard's configuration file
|
28 |
-
# This assumes app.py is in the project root, and
|
29 |
config_file_path = Path("leaderboard/leaderboard_config.yaml")
|
30 |
|
31 |
if not config_file_path.exists():
|
|
|
25 |
logger.info("Initializing MIZAN: A Persian LLM Leaderboard application...")
|
26 |
|
27 |
# Define the path to the leaderboard's configuration file
|
28 |
+
# This assumes app.py is in the project root, and leaderboard_config.yaml is inside the 'leaderboard' directory.
|
29 |
config_file_path = Path("leaderboard/leaderboard_config.yaml")
|
30 |
|
31 |
if not config_file_path.exists():
|
leaderboard/leaderboard_config.yaml
CHANGED
@@ -43,8 +43,8 @@ task_display_names:
|
|
43 |
translation-fa2en_fa2en: "Translation (fa2en)"
|
44 |
translation-ar2fa_ar2fa: "Translation (ar2fa)"
|
45 |
translation-fa2ar_fa2ar: "Translation (fa2ar)"
|
46 |
-
summarization_SamSUM-fa: "SamSum-Fa (
|
47 |
-
summarization_PnSummary: "PnSummary (
|
48 |
sentiment-analysis_deepsentipers: "DeepSentiPers (SA)"
|
49 |
sts_SynPerSTS: "SynPerSTS (STS)"
|
50 |
ner_arman: "Arman (NER)"
|
|
|
43 |
translation-fa2en_fa2en: "Translation (fa2en)"
|
44 |
translation-ar2fa_ar2fa: "Translation (ar2fa)"
|
45 |
translation-fa2ar_fa2ar: "Translation (fa2ar)"
|
46 |
+
summarization_SamSUM-fa: "SamSum-Fa (Summarization)"
|
47 |
+
summarization_PnSummary: "PnSummary (Summarization)"
|
48 |
sentiment-analysis_deepsentipers: "DeepSentiPers (SA)"
|
49 |
sts_SynPerSTS: "SynPerSTS (STS)"
|
50 |
ner_arman: "Arman (NER)"
|