Spaces:
Running
Running
mehran
commited on
Commit
·
fe79a14
1
Parent(s):
647131d
update
Browse files- README.md +4 -6
- __pycache__/about.cpython-310.pyc +0 -0
- __pycache__/submission.cpython-310.pyc +0 -0
- about.py +91 -0
- app.py +87 -0
- leaderboard/__init__.py +0 -0
- leaderboard/__pycache__/__init__.cpython-310.pyc +0 -0
- leaderboard/__pycache__/leaderboard.cpython-310.pyc +0 -0
- leaderboard/boards_data/MMLU.jsonl +29 -0
- leaderboard/boards_data/all.jsonl +29 -0
- leaderboard/boards_data/extractive-qa_PQuAD.jsonl +29 -0
- leaderboard/boards_data/ifeval.jsonl +29 -0
- leaderboard/boards_data/keyword-extraction_SynKeywords.jsonl +29 -0
- leaderboard/boards_data/mt_bench.jsonl +29 -0
- leaderboard/boards_data/ner_arman.jsonl +29 -0
- leaderboard/boards_data/nli_farstail.jsonl +29 -0
- leaderboard/boards_data/paraphrase-detection_FarsiParaphraseDetection.jsonl +29 -0
- leaderboard/boards_data/paraphrase-detection_parsinlu.jsonl +29 -0
- leaderboard/boards_data/persian_csr.jsonl +29 -0
- leaderboard/boards_data/persian_nlg.jsonl +29 -0
- leaderboard/boards_data/persian_nlu.jsonl +0 -0
- leaderboard/boards_data/question-generation_PersianQA.jsonl +29 -0
- leaderboard/boards_data/sentiment-analysis_deepsentipers.jsonl +29 -0
- leaderboard/boards_data/sts_FarSICK.jsonl +29 -0
- leaderboard/boards_data/sts_SynPerSTS.jsonl +29 -0
- leaderboard/boards_data/summarization_PnSummary.jsonl +29 -0
- leaderboard/boards_data/summarization_SamSUM-fa.jsonl +29 -0
- leaderboard/boards_data/tone-classification_SynTone.jsonl +29 -0
- leaderboard/boards_data/topic-classification_sid.jsonl +29 -0
- leaderboard/boards_data/translation-ar2fa_ar2fa.jsonl +29 -0
- leaderboard/boards_data/translation-en2fa_en2fa.jsonl +29 -0
- leaderboard/boards_data/translation-fa2ar_fa2ar.jsonl +29 -0
- leaderboard/boards_data/translation-fa2en_fa2en.jsonl +29 -0
- leaderboard/leaderboard.py +605 -0
- leaderboard/leaderboard_config.yaml +310 -0
- leaderboard/refresh.py +441 -0
- leaderboard/template_jsons/MMLU.json +21 -0
- leaderboard/template_jsons/MMLU_full.json +171 -0
- leaderboard/template_jsons/boolq.json +1 -0
- leaderboard/template_jsons/hamrah_mt_bench.json +8 -0
- leaderboard/template_jsons/ifeval.json +22 -0
- leaderboard/template_jsons/ifeval_full.json +66 -0
- leaderboard/template_jsons/mt_bench.json +24 -0
- leaderboard/template_jsons/mt_bench_full.json +60 -0
- leaderboard/template_jsons/persian_csr.json +47 -0
- leaderboard/template_jsons/persian_csr_full.json +117 -0
- leaderboard/template_jsons/persian_nlg.json +48 -0
- leaderboard/template_jsons/persian_nlu.json +93 -0
- leaderboard/template_jsons/piqa.json +1 -0
- submission.py +209 -0
README.md
CHANGED
@@ -1,14 +1,12 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
license: cc-by-nc-nd-4.0
|
11 |
-
short_description: 'MIZAN: A Persian LLM Leaderboard'
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Leaderboard
|
3 |
+
emoji: 🐨
|
4 |
+
colorFrom: red
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.21.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
|
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
__pycache__/about.cpython-310.pyc
ADDED
Binary file (6.88 kB). View file
|
|
__pycache__/submission.cpython-310.pyc
ADDED
Binary file (6.94 kB). View file
|
|
about.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# in about.py
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
def render_about():
|
6 |
+
with gr.Blocks() as about_page:
|
7 |
+
gr.Markdown("""
|
8 |
+
# About MIZAN: A Persian LLM Leaderboard
|
9 |
+
|
10 |
+
MIZAN: A Persian LLM Leaderboard is designed to establish a standard and comprehensive benchmark for evaluating Large Language Models (LLMs) in the Persian language. This project combines existing datasets, translates and localizes globally recognized benchmarks, and incorporates newly developed, Persian-specific datasets. MIZAN aims to provide a multi-dimensional assessment of models' capabilities across various linguistic, knowledge-based, and reasoning tasks. Its primary goal is to offer researchers, developers, and enthusiasts a transparent and reliable view of LLM performance in the Persian language landscape.
|
11 |
+
|
12 |
+
MIZAN provides a holistic view of models' strengths and weaknesses by assessing them across a suite of key tasks, contributing to the advancement of AI research for the Persian language.
|
13 |
+
""")
|
14 |
+
|
15 |
+
with gr.Accordion("MIZAN Benchmark Components Details", open=True): # Changed from PULL
|
16 |
+
|
17 |
+
with gr.Accordion("1. PerCoR (Persian Commonsense Reasoning)", open=False):
|
18 |
+
gr.Markdown("""
|
19 |
+
PersCoR is the first large-scale Persian benchmark for evaluating models' ability in **commonsense reasoning** through multi-choice sentence completion. It includes over 106,000 samples from diverse domains such as news, religion, and lifestyle, extracted from more than 40 Persian websites. Innovative methods like "segmentation by conjunctions" were used to create coherent and diverse sentences and options, while the DRESS-AF technique helped generate challenging, human-solvable distractors.
|
20 |
+
""")
|
21 |
+
|
22 |
+
with gr.Accordion("2. IFEval-fa (Persian Instruction Following Evaluation)", open=False):
|
23 |
+
gr.Markdown("""
|
24 |
+
This dataset is a Persian-adapted and localized version of **IFEval**, assessing models' proficiency in **accurately executing complex, multi-step instructions (Instruction Following)**. The translation process involved a hybrid machine-human approach, with prompts unsuitable for the Persian language being rewritten or removed.
|
25 |
+
""")
|
26 |
+
|
27 |
+
with gr.Accordion("3. MMLU-Fa (Persian Massive Multitask Language Understanding)", open=False):
|
28 |
+
gr.Markdown("""
|
29 |
+
MMLU-Fa is an expanded and localized version of the renowned **MMLU** benchmark, designed to measure **general and specialized knowledge** of models in Persian. Tailored to cover knowledge at various levels and relevant to the Iranian cultural context, it comprises three main sub-datasets:
|
30 |
+
<ul>
|
31 |
+
<li><strong>SPK (School Persian Knowledge):</strong> Contains 5,581 multiple-choice questions from the official Iranian school curriculum (grades 4-12) across 78 diverse subjects. Data was collected from the "Paadars" educational website and subsequently cleaned.</li>
|
32 |
+
<li><strong>UPK (University Persian Knowledge):</strong> Includes 7,793 multiple-choice questions from Master's and PhD entrance exams across 25 academic disciplines (e.g., medicine, engineering, humanities, arts). This data was extracted from exam booklets using OCR technology and cleaned by LLMs.</li>
|
33 |
+
<li><strong>GPK (General Persian Knowledge):</strong> Consists of 1,003 multiple-choice questions on 15 topics related to general knowledge specific to Iranian society (e.g., city souvenirs, religious edicts, national laws, famous personalities, cultural idioms). This data was generated using LLMs with specific prompts and reviewed by humans.</li>
|
34 |
+
</ul>
|
35 |
+
""")
|
36 |
+
|
37 |
+
with gr.Accordion("4. Persian MT-Bench (Persian Multi-Turn Benchmark)", open=False):
|
38 |
+
gr.Markdown("""
|
39 |
+
This is a localized version of the **MT-Bench** benchmark, evaluating models on **multi-turn question-answering and dialogue-based tasks**. Questions involve multi-step requests or require creative responses. In the Persian version, all samples were translated and rewritten by humans, and some were expanded to 3 or 4 turns. Two new topics were also added:
|
40 |
+
<ul>
|
41 |
+
<li><strong>Native Iranian Knowledge:</strong> Questions about cultural topics such as films, actors, and Iranian figures.</li>
|
42 |
+
<li><strong>Chat-Retrieval:</strong> Involves a multi-turn dialogue where the model must extract a relevant question and answer based on the user's needs.</li>
|
43 |
+
</ul>
|
44 |
+
""")
|
45 |
+
|
46 |
+
with gr.Accordion("5. Persian NLU (Persian Natural Language Understanding)", open=False):
|
47 |
+
gr.Markdown("""
|
48 |
+
This section comprises a collection of existing Persian benchmarks for evaluating various aspects of **Natural Language Understanding**. Key tasks and datasets include:
|
49 |
+
<ul>
|
50 |
+
<li><strong>Sentiment Analysis:</strong> DeepSentiPers</li>
|
51 |
+
<li><strong>Text Classification:</strong> Synthetic Persian Tone, SID</li>
|
52 |
+
<li><strong>Natural Language Inference (NLI):</strong> FarsTAIL</li>
|
53 |
+
<li><strong>Semantic Textual Similarity (STS):</strong> Synthetic Persian STS, FarSICK</li>
|
54 |
+
<li><strong>Named Entity Recognition (NER):</strong> Arman</li>
|
55 |
+
<li><strong>Paraphrase Detection:</strong> FarsiParaphraseDetection, ParsiNLU</li>
|
56 |
+
<li><strong>Extractive Question Answering (EQA):</strong> PQuAD</li>
|
57 |
+
<li><strong>Keyword Extraction:</strong> Synthetic Persian Keywords</li>
|
58 |
+
</ul>
|
59 |
+
""")
|
60 |
+
|
61 |
+
with gr.Accordion("6. Persian NLG (Persian Natural Language Generation)", open=False):
|
62 |
+
gr.Markdown("""
|
63 |
+
This section focuses on **Natural Language Generation**, covering tasks such as:
|
64 |
+
<ul>
|
65 |
+
<li><strong>Summarization:</strong> SamSUM-fa, PnSummary</li>
|
66 |
+
<li><strong>Machine Translation:</strong> TEP, MIZAN, EPOQUE</li>
|
67 |
+
<li><strong>Question Generation:</strong> PersianQA</li>
|
68 |
+
</ul>
|
69 |
+
The goal is to assess the generative capabilities of models.
|
70 |
+
""")
|
71 |
+
|
72 |
+
# with gr.Accordion("7. BoolQ-fa (Persian Boolean Question Answering)", open=False):
|
73 |
+
# gr.Markdown("""
|
74 |
+
# A Persian-adapted version of **BoolQ**, this benchmark evaluates a model's ability to answer **yes/no questions based on a given text**, testing common reasoning skills. Each instance includes a passage, a question about it, and a boolean answer.
|
75 |
+
# """)
|
76 |
+
|
77 |
+
# with gr.Accordion("8. PIQA-fa (Persian Physical Interaction Question Answering)", open=False):
|
78 |
+
# gr.Markdown("""
|
79 |
+
# This is a Persian version of **PIQA**, focusing on **physical reasoning and commonsense understanding** of real-world interactions. Each instance presents a goal or question along with two potential solutions, requiring the model to choose the more physically plausible option.
|
80 |
+
# """)
|
81 |
+
|
82 |
+
gr.Markdown("""
|
83 |
+
---
|
84 |
+
MIZAN is a significant step towards the scientific and localized evaluation of language models for Persian, aiming to serve as a valuable assessment reference for researchers, developers, and anyone interested in practical language models.
|
85 |
+
""")
|
86 |
+
|
87 |
+
return about_page
|
88 |
+
|
89 |
+
# To test this function directly (if in a separate file):
|
90 |
+
# if __name__ == '__main__':
|
91 |
+
# render_about().launch()
|
app.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from pathlib import Path
|
3 |
+
import logging
|
4 |
+
|
5 |
+
# Import LeaderboardApp from the correct location within the 'leaderboard' package
|
6 |
+
from leaderboard.leaderboard import LeaderboardApp
|
7 |
+
|
8 |
+
# Import UI rendering functions for other tabs
|
9 |
+
from about import render_about
|
10 |
+
from submission import render_submit
|
11 |
+
|
12 |
+
# --- Logging Setup (Optional but Recommended) ---
|
13 |
+
# You can centralize logging configuration here or ensure each module handles its own.
|
14 |
+
# For simplicity, if other modules already configure logging, this might not be strictly needed here.
|
15 |
+
logging.basicConfig(
|
16 |
+
level=logging.INFO,
|
17 |
+
format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
|
18 |
+
)
|
19 |
+
logger = logging.getLogger(__name__)
|
20 |
+
|
21 |
+
def create_app():
|
22 |
+
"""
|
23 |
+
Creates and configures the main Gradio application for MIZAN: A Persian LLM Leaderboard.
|
24 |
+
"""
|
25 |
+
logger.info("Initializing MIZAN: A Persian LLM Leaderboard application...")
|
26 |
+
|
27 |
+
# Define the path to the leaderboard's configuration file
|
28 |
+
# This assumes app.py is in the project root, and column_config.yaml is inside the 'leaderboard' directory.
|
29 |
+
config_file_path = Path("leaderboard/leaderboard_config.yaml")
|
30 |
+
|
31 |
+
if not config_file_path.exists():
|
32 |
+
logger.error(f"CRITICAL: Leaderboard configuration file not found at {config_file_path}. The application may not function correctly.")
|
33 |
+
# Optionally, you could raise an error here or return a Gradio interface indicating the error.
|
34 |
+
|
35 |
+
# Initialize the LeaderboardApp with the configuration path
|
36 |
+
leaderboard_processor = LeaderboardApp(config_path=config_file_path)
|
37 |
+
|
38 |
+
# Load and process data for the leaderboard
|
39 |
+
logger.info("Loading and processing leaderboard data...")
|
40 |
+
leaderboard_processor.load_data()
|
41 |
+
leaderboard_processor.handle_nulls_in_averages()
|
42 |
+
leaderboard_processor.generate_model_rankings()
|
43 |
+
# leaderboard_processor.apply_rankings_to_dataframes() # This might be redundant if generate_model_rankings covers it
|
44 |
+
# leaderboard_processor.format_dataframes()
|
45 |
+
logger.info("Leaderboard data processing complete.")
|
46 |
+
|
47 |
+
# Create the main Gradio interface using gr.Blocks
|
48 |
+
with gr.Blocks(title="MIZAN: A Persian LLM Leaderboard") as demo:
|
49 |
+
gr.Markdown("<h1 style='text-align: center; width: 100%; margin-bottom: 10px;'>🇮🇷 MIZAN: A Persian LLM Leaderboard</h1>")
|
50 |
+
gr.Markdown("""<p style='font-size: 1.1em; text-align: center; max-width: 800px; margin: 0 auto 20px auto;'>
|
51 |
+
MIZAN: A Persian LLM Leaderboard is a comprehensive benchmark for evaluating Large Language Models (LLMs) in Persian.
|
52 |
+
It combines existing datasets, translated benchmarks, and new Persian-specific data to assess LLM capabilities in understanding,
|
53 |
+
generation, reasoning, and knowledge relevant to the Persian language and culture.
|
54 |
+
MIZAN provides a standardized tool for researchers and developers to measure Persian LLM performance.
|
55 |
+
</p>""")
|
56 |
+
|
57 |
+
with gr.Tabs():
|
58 |
+
with gr.TabItem("LLM Benchmark"):
|
59 |
+
logger.info("Creating 'LLM Benchmark' tab content...")
|
60 |
+
# Embed the leaderboard interface generated by LeaderboardApp
|
61 |
+
# The create_gradio_interface method of LeaderboardApp should return a gr.Blocks or gr.Interface instance
|
62 |
+
leaderboard_processor.create_gradio_interface() # This directly adds its components to the current gr.Blocks scope
|
63 |
+
logger.info("'LLM Benchmark' tab content created.")
|
64 |
+
|
65 |
+
with gr.TabItem("About MIZAN"): # Changed from "About PULL"
|
66 |
+
logger.info("Creating 'About MIZAN' tab content...") # Changed from "About PULL"
|
67 |
+
render_about() # Call the function that returns the 'About' page Blocks
|
68 |
+
logger.info("'About MIZAN' tab content created.") # Changed from "About PULL"
|
69 |
+
|
70 |
+
with gr.TabItem("Request New Model"):
|
71 |
+
logger.info("Creating 'Submit Your Model' tab content...")
|
72 |
+
render_submit() # Call the function that returns the 'Submit' page Blocks
|
73 |
+
logger.info("'Submit Your Model' tab content created.")
|
74 |
+
|
75 |
+
logger.info("MIZAN: A Persian LLM Leaderboard application interface created.") # Changed from "PULL Leaderboard"
|
76 |
+
return demo
|
77 |
+
|
78 |
+
if __name__ == "__main__":
|
79 |
+
logger.info("Launching MIZAN: A Persian LLM Leaderboard application...") # Changed from "PULL Leaderboard"
|
80 |
+
pull_app = create_app() # Variable name 'pull_app' kept as is, but can be changed if desired e.g., to 'mizan_app'
|
81 |
+
pull_app.launch(
|
82 |
+
debug=True, # Enable Gradio debug mode for more detailed error messages in development
|
83 |
+
# share=True # Uncomment to create a public link (useful for temporary sharing)
|
84 |
+
# server_name="0.0.0.0" # Uncomment to make accessible on your local network
|
85 |
+
)
|
86 |
+
logger.info("MIZAN: A Persian LLM Leaderboard application has been launched.") # Changed from "PULL Leaderboard"
|
87 |
+
# Ensure there are no hidden/invalid characters after this line. A single newline character is standard.
|
leaderboard/__init__.py
ADDED
File without changes
|
leaderboard/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (190 Bytes). View file
|
|
leaderboard/__pycache__/leaderboard.cpython-310.pyc
ADDED
Binary file (21.4 kB). View file
|
|
leaderboard/boards_data/MMLU.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"deepseek-reasoner","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","acc":0.7808162132,"cinema_acc":0.425,"emergency_number_acc":0.6,"foods_acc":0.65,"games_acc":0.55,"herbal_drugs_acc":0.675,"places_acc":0.8476190476,"poetry_acc":0.625,"politicians_acc":0.9,"popular_people_acc":0.7897435897,"Government_law_acc":0.8913043478,"proverbs_acc":0.76,"religous_acc":0.9333333333,"social_manners_acc":0.8426966292,"souvenirs_acc":0.72,"sports_acc":0.4761904762,"GPK_acc":0.7268195414,"SPK_acc":0.7799677477,"UPK_acc":0.8758064516}
|
2 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7214996174,"cinema_acc":0.6875,"emergency_number_acc":0.7,"foods_acc":0.74,"games_acc":0.5,"herbal_drugs_acc":0.675,"places_acc":0.8476190476,"poetry_acc":0.8,"politicians_acc":0.95,"popular_people_acc":0.8615384615,"Government_law_acc":0.8913043478,"proverbs_acc":0.77,"religous_acc":0.9333333333,"social_manners_acc":0.9101123596,"souvenirs_acc":0.72,"sports_acc":0.6031746032,"GPK_acc":0.7936191426,"SPK_acc":0.7588245834,"UPK_acc":0.6854869755}
|
3 |
+
{"Model Name":"gpt-4.1","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7040411769,"cinema_acc":0.7625,"emergency_number_acc":0.9,"foods_acc":0.78,"games_acc":0.7,"herbal_drugs_acc":0.625,"places_acc":0.8666666667,"poetry_acc":0.875,"politicians_acc":0.85,"popular_people_acc":0.8461538462,"Government_law_acc":0.8913043478,"proverbs_acc":0.86,"religous_acc":0.8888888889,"social_manners_acc":0.8651685393,"souvenirs_acc":0.68,"sports_acc":0.4761904762,"GPK_acc":0.8005982054,"SPK_acc":0.7258555814,"UPK_acc":0.6759912742}
|
4 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6957640676,"cinema_acc":0.725,"emergency_number_acc":0.6,"foods_acc":0.79,"games_acc":0.5,"herbal_drugs_acc":0.75,"places_acc":0.8380952381,"poetry_acc":0.825,"politicians_acc":0.75,"popular_people_acc":0.7846153846,"Government_law_acc":0.9565217391,"proverbs_acc":0.78,"religous_acc":0.9111111111,"social_manners_acc":0.8539325843,"souvenirs_acc":0.74,"sports_acc":0.4761904762,"GPK_acc":0.7756729811,"SPK_acc":0.7263931195,"UPK_acc":0.6635442063}
|
5 |
+
{"Model Name":"gpt-4o","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6884607359,"cinema_acc":0.75,"emergency_number_acc":0.7,"foods_acc":0.78,"games_acc":0.6,"herbal_drugs_acc":0.7,"places_acc":0.8380952381,"poetry_acc":0.9,"politicians_acc":0.95,"popular_people_acc":0.8615384615,"Government_law_acc":0.9347826087,"proverbs_acc":0.8,"religous_acc":0.9333333333,"social_manners_acc":0.8426966292,"souvenirs_acc":0.66,"sports_acc":0.5555555556,"GPK_acc":0.8015952144,"SPK_acc":0.720121842,"UPK_acc":0.6512254587}
|
6 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6128538638,"cinema_acc":0.525,"emergency_number_acc":0.7,"foods_acc":0.73,"games_acc":0.55,"herbal_drugs_acc":0.625,"places_acc":0.8380952381,"poetry_acc":0.575,"politicians_acc":0.6,"popular_people_acc":0.7076923077,"Government_law_acc":0.847826087,"proverbs_acc":0.71,"religous_acc":0.6666666667,"social_manners_acc":0.8202247191,"souvenirs_acc":0.68,"sports_acc":0.4920634921,"GPK_acc":0.6949152542,"SPK_acc":0.6265902168,"UPK_acc":0.5924547671}
|
7 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","acc":0.5980651448,"cinema_acc":0.6,"emergency_number_acc":0.5,"foods_acc":0.67,"games_acc":0.65,"herbal_drugs_acc":0.675,"places_acc":0.8476190476,"poetry_acc":0.775,"politicians_acc":0.95,"popular_people_acc":0.8092783505,"Government_law_acc":0.8913043478,"proverbs_acc":0.78,"religous_acc":0.8666666667,"social_manners_acc":0.8988764045,"souvenirs_acc":0.68,"sports_acc":0.5396825397,"GPK_acc":0.7604790419,"SPK_acc":0.6417428725,"UPK_acc":0.5458980614}
|
8 |
+
{"Model Name":"deepseek-chat","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","acc":0.5908047576,"cinema_acc":0.5875,"emergency_number_acc":0.4,"foods_acc":0.56,"games_acc":0.55,"herbal_drugs_acc":0.75,"places_acc":0.8285714286,"poetry_acc":0.75,"politicians_acc":0.7,"popular_people_acc":0.7794871795,"Government_law_acc":0.8695652174,"proverbs_acc":0.78,"religous_acc":0.8444444444,"social_manners_acc":0.808988764,"souvenirs_acc":0.74,"sports_acc":0.5555555556,"GPK_acc":0.7288135593,"SPK_acc":0.6348324673,"UPK_acc":0.541511613}
|
9 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","acc":0.5714086374,"cinema_acc":0.5625,"emergency_number_acc":0.3,"foods_acc":0.56,"games_acc":0.6,"herbal_drugs_acc":0.575,"places_acc":0.8095238095,"poetry_acc":0.6,"politicians_acc":0.85,"popular_people_acc":0.7282051282,"Government_law_acc":0.8913043478,"proverbs_acc":0.7,"religous_acc":0.8222222222,"social_manners_acc":0.8539325843,"souvenirs_acc":0.6,"sports_acc":0.5555555556,"GPK_acc":0.6939182453,"SPK_acc":0.605489774,"UPK_acc":0.5310727179}
|
10 |
+
{"Model Name":"gpt-4o-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.56986854,"cinema_acc":0.6625,"emergency_number_acc":0.4,"foods_acc":0.71,"games_acc":0.45,"herbal_drugs_acc":0.675,"places_acc":0.7714285714,"poetry_acc":0.675,"politicians_acc":0.75,"popular_people_acc":0.6820512821,"Government_law_acc":0.8913043478,"proverbs_acc":0.75,"religous_acc":0.7777777778,"social_manners_acc":0.7865168539,"souvenirs_acc":0.68,"sports_acc":0.5555555556,"GPK_acc":0.7078763709,"SPK_acc":0.6075972048,"UPK_acc":0.5250866162}
|
11 |
+
{"Model Name":"Qwen3-32B","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","acc":0.5635086255,"cinema_acc":0.45,"emergency_number_acc":0.2,"foods_acc":0.49,"games_acc":0.45,"herbal_drugs_acc":0.45,"places_acc":0.6285714286,"poetry_acc":0.35,"politicians_acc":0.3,"popular_people_acc":0.4974358974,"Government_law_acc":0.7608695652,"proverbs_acc":0.64,"religous_acc":0.6888888889,"social_manners_acc":0.8202247191,"souvenirs_acc":0.56,"sports_acc":0.3968253968,"GPK_acc":0.5513459621,"SPK_acc":0.5967741935,"UPK_acc":0.5412549724}
|
12 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","acc":0.5633303193,"cinema_acc":0.625,"emergency_number_acc":0.4,"foods_acc":0.68,"games_acc":0.35,"herbal_drugs_acc":0.6,"places_acc":0.7904761905,"poetry_acc":0.7,"politicians_acc":0.75,"popular_people_acc":0.641025641,"Government_law_acc":0.8913043478,"proverbs_acc":0.74,"religous_acc":0.7777777778,"social_manners_acc":0.8764044944,"souvenirs_acc":0.62,"sports_acc":0.6031746032,"GPK_acc":0.6989032901,"SPK_acc":0.5977423401,"UPK_acc":0.5212370076}
|
13 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.5440356745,"cinema_acc":0.5263157895,"emergency_number_acc":0.4,"foods_acc":0.72,"games_acc":0.55,"herbal_drugs_acc":0.575,"places_acc":0.8095238095,"poetry_acc":0.625,"politicians_acc":0.75,"popular_people_acc":0.6717948718,"Government_law_acc":0.8043478261,"proverbs_acc":0.72,"religous_acc":0.8444444444,"social_manners_acc":0.8539325843,"souvenirs_acc":0.6,"sports_acc":0.4920634921,"GPK_acc":0.6906906907,"SPK_acc":0.5934420355,"UPK_acc":0.4897066392}
|
14 |
+
{"Model Name":"Qwen3-14B","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","acc":0.5139458858,"cinema_acc":0.55,"emergency_number_acc":0.4,"foods_acc":0.52,"games_acc":0.5,"herbal_drugs_acc":0.5,"places_acc":0.6285714286,"poetry_acc":0.5,"politicians_acc":0.6,"popular_people_acc":0.5641025641,"Government_law_acc":0.7173913043,"proverbs_acc":0.5,"religous_acc":0.6444444444,"social_manners_acc":0.7415730337,"souvenirs_acc":0.28,"sports_acc":0.5396825397,"GPK_acc":0.5623130608,"SPK_acc":0.5513348862,"UPK_acc":0.4809444373}
|
15 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","acc":0.5105376643,"cinema_acc":0.5,"emergency_number_acc":0.5,"foods_acc":0.53,"games_acc":0.4,"herbal_drugs_acc":0.625,"places_acc":0.6857142857,"poetry_acc":0.575,"politicians_acc":0.65,"popular_people_acc":0.6205128205,"Government_law_acc":0.8043478261,"proverbs_acc":0.61,"religous_acc":0.6,"social_manners_acc":0.7078651685,"souvenirs_acc":0.54,"sports_acc":0.5714285714,"GPK_acc":0.6091724826,"SPK_acc":0.556710267,"UPK_acc":0.4647760811}
|
16 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","acc":0.5097725534,"cinema_acc":0.4875,"emergency_number_acc":0.3,"foods_acc":0.5,"games_acc":0.5,"herbal_drugs_acc":0.4,"places_acc":0.5428571429,"poetry_acc":0.45,"politicians_acc":0.4,"popular_people_acc":0.6051282051,"Government_law_acc":0.7826086957,"proverbs_acc":0.63,"religous_acc":0.7333333333,"social_manners_acc":0.6853932584,"souvenirs_acc":0.34,"sports_acc":0.5079365079,"GPK_acc":0.5593220339,"SPK_acc":0.5384339724,"UPK_acc":0.4828692416}
|
17 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","acc":0.4800723378,"cinema_acc":0.675,"emergency_number_acc":0.5,"foods_acc":0.69,"games_acc":0.5,"herbal_drugs_acc":0.625,"places_acc":0.8,"poetry_acc":0.775,"politicians_acc":0.75,"popular_people_acc":0.7487179487,"Government_law_acc":0.847826087,"proverbs_acc":0.66,"religous_acc":0.5555555556,"social_manners_acc":0.7865168539,"souvenirs_acc":0.62,"sports_acc":0.5396825397,"GPK_acc":0.701894317,"SPK_acc":0.5158573732,"UPK_acc":0.425895034}
|
18 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.4784030048,"cinema_acc":0.4875,"emergency_number_acc":0.5,"foods_acc":0.49,"games_acc":0.6,"herbal_drugs_acc":0.6,"places_acc":0.6857142857,"poetry_acc":0.5,"politicians_acc":0.4,"popular_people_acc":0.6,"Government_law_acc":0.8260869565,"proverbs_acc":0.62,"religous_acc":0.6888888889,"social_manners_acc":0.7528089888,"souvenirs_acc":0.68,"sports_acc":0.3333333333,"GPK_acc":0.5972083749,"SPK_acc":0.5077943021,"UPK_acc":0.4420633902}
|
19 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","acc":0.4763231198,"cinema_acc":0.45,"emergency_number_acc":0.2,"foods_acc":0.41,"games_acc":0.45,"herbal_drugs_acc":0.475,"places_acc":0.6952380952,"poetry_acc":0.325,"politicians_acc":0.45,"popular_people_acc":0.4974358974,"Government_law_acc":0.847826087,"proverbs_acc":0.55,"religous_acc":0.6666666667,"social_manners_acc":0.7078651685,"souvenirs_acc":0.42,"sports_acc":0.4603174603,"GPK_acc":0.5343968096,"SPK_acc":0.5164066703,"UPK_acc":0.4401028278}
|
20 |
+
{"Model Name":"Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","acc":0.4467552341,"cinema_acc":0.375,"emergency_number_acc":0.4,"foods_acc":0.29,"games_acc":0.35,"herbal_drugs_acc":0.4,"places_acc":0.5047619048,"poetry_acc":0.2,"politicians_acc":0.25,"popular_people_acc":0.3692307692,"Government_law_acc":0.7608695652,"proverbs_acc":0.44,"religous_acc":0.5555555556,"social_manners_acc":0.6741573034,"souvenirs_acc":0.18,"sports_acc":0.3968253968,"GPK_acc":0.4207377866,"SPK_acc":0.4819924745,"UPK_acc":0.4248684717}
|
21 |
+
{"Model Name":"aya-expanse-32b","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","acc":0.4140641302,"cinema_acc":0.4875,"emergency_number_acc":0.4,"foods_acc":0.6,"games_acc":0.5,"herbal_drugs_acc":0.675,"places_acc":0.7904761905,"poetry_acc":0.55,"politicians_acc":0.6,"popular_people_acc":0.6820512821,"Government_law_acc":0.847826087,"proverbs_acc":0.66,"religous_acc":0.6222222222,"social_manners_acc":0.7640449438,"souvenirs_acc":0.64,"sports_acc":0.4126984127,"GPK_acc":0.6470588235,"SPK_acc":0.4373768142,"UPK_acc":0.3673809829}
|
22 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","acc":0.4046741323,"cinema_acc":0.425,"emergency_number_acc":0.3,"foods_acc":0.52,"games_acc":0.5,"herbal_drugs_acc":0.5,"places_acc":0.7523809524,"poetry_acc":0.325,"politicians_acc":0.5,"popular_people_acc":0.6307692308,"Government_law_acc":0.8043478261,"proverbs_acc":0.59,"religous_acc":0.5111111111,"social_manners_acc":0.595505618,"souvenirs_acc":0.34,"sports_acc":0.4285714286,"GPK_acc":0.5583250249,"SPK_acc":0.4311055366,"UPK_acc":0.3659694598}
|
23 |
+
{"Model Name":"Qwen3-4B","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","acc":0.4025179106,"cinema_acc":0.425,"emergency_number_acc":0.2,"foods_acc":0.49,"games_acc":0.4,"herbal_drugs_acc":0.425,"places_acc":0.4095238095,"poetry_acc":0.4,"politicians_acc":0.5,"popular_people_acc":0.4974358974,"Government_law_acc":0.6739130435,"proverbs_acc":0.48,"religous_acc":0.5111111111,"social_manners_acc":0.5617977528,"souvenirs_acc":0.26,"sports_acc":0.4126984127,"GPK_acc":0.4656031904,"SPK_acc":0.4341515857,"UPK_acc":0.3717438727}
|
24 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","acc":0.3825554705,"cinema_acc":0.4875,"emergency_number_acc":0.2,"foods_acc":0.46,"games_acc":0.7,"herbal_drugs_acc":0.475,"places_acc":0.5523809524,"poetry_acc":0.525,"politicians_acc":0.5,"popular_people_acc":0.5076923077,"Government_law_acc":0.7608695652,"proverbs_acc":0.54,"religous_acc":0.4444444444,"social_manners_acc":0.6292134831,"souvenirs_acc":0.48,"sports_acc":0.4285714286,"GPK_acc":0.5224327019,"SPK_acc":0.4135459595,"UPK_acc":0.3423585269}
|
25 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","acc":0.2830214927,"cinema_acc":0.5125,"emergency_number_acc":0.2,"foods_acc":0.51,"games_acc":0.25,"herbal_drugs_acc":0.5,"places_acc":0.3904761905,"poetry_acc":0.525,"politicians_acc":0.8,"popular_people_acc":0.5179487179,"Government_law_acc":0.347826087,"proverbs_acc":0.34,"religous_acc":0.3333333333,"social_manners_acc":0.2696629213,"souvenirs_acc":0.24,"sports_acc":0.4603174603,"GPK_acc":0.4267198405,"SPK_acc":0.2777280057,"UPK_acc":0.268317721}
|
26 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","acc":0.278430827,"cinema_acc":0.25,"emergency_number_acc":0.0,"foods_acc":0.16,"games_acc":0.3,"herbal_drugs_acc":0.125,"places_acc":0.4380952381,"poetry_acc":0.075,"politicians_acc":0.0,"popular_people_acc":0.1948717949,"Government_law_acc":0.5652173913,"proverbs_acc":0.2,"religous_acc":0.2444444444,"social_manners_acc":0.3146067416,"souvenirs_acc":0.28,"sports_acc":0.2063492063,"GPK_acc":0.2452642074,"SPK_acc":0.2999462462,"UPK_acc":0.2672911587}
|
27 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","acc":0.2553383877,"cinema_acc":0.375,"emergency_number_acc":0.1,"foods_acc":0.47,"games_acc":0.15,"herbal_drugs_acc":0.425,"places_acc":0.4285714286,"poetry_acc":0.425,"politicians_acc":0.45,"popular_people_acc":0.4051282051,"Government_law_acc":0.4782608696,"proverbs_acc":0.13,"religous_acc":0.4,"social_manners_acc":0.3707865169,"souvenirs_acc":0.12,"sports_acc":0.3333333333,"GPK_acc":0.3599202393,"SPK_acc":0.2727109837,"UPK_acc":0.2294366739}
|
28 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","acc":0.1987201781,"cinema_acc":0.3375,"emergency_number_acc":0.0,"foods_acc":0.18,"games_acc":0.55,"herbal_drugs_acc":0.175,"places_acc":0.4,"poetry_acc":0.15,"politicians_acc":0.25,"popular_people_acc":0.2615384615,"Government_law_acc":0.2608695652,"proverbs_acc":0.31,"religous_acc":0.1555555556,"social_manners_acc":0.393258427,"souvenirs_acc":0.26,"sports_acc":0.2380952381,"GPK_acc":0.2791625125,"SPK_acc":0.2065938004,"UPK_acc":0.1827280893}
|
29 |
+
{"Model Name":"o4-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":null,"cinema_acc":null,"emergency_number_acc":null,"foods_acc":null,"games_acc":null,"herbal_drugs_acc":null,"places_acc":null,"poetry_acc":null,"politicians_acc":null,"popular_people_acc":null,"Government_law_acc":null,"proverbs_acc":null,"religous_acc":null,"social_manners_acc":null,"souvenirs_acc":null,"sports_acc":null,"GPK_acc":null,"SPK_acc":null,"UPK_acc":null}
|
leaderboard/boards_data/all.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.7127,"Persian IFEval":0.8810572687,"Persian MT-Bench":0.8695,"PerMMLU":0.7214996174,"PerCoR":0.9117647059,"Persian NLU":0.7143086066,"Persian NLG":0.1779340777}
|
2 |
+
{"Model Name":"gpt-4.1","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6992,"Persian IFEval":0.8634361233,"Persian MT-Bench":0.87325,"PerMMLU":0.7040411769,"PerCoR":0.8839,"Persian NLU":0.6758278127,"Persian NLG":0.194675133}
|
3 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6886,"Persian IFEval":0.8497790869,"Persian MT-Bench":0.838973064,"PerMMLU":0.6957640676,"PerCoR":0.8637863786,"Persian NLU":0.7050532433,"Persian NLG":0.178231145}
|
4 |
+
{"Model Name":"gpt-4o","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6877,"Persian IFEval":0.8296622614,"Persian MT-Bench":0.8371666667,"PerMMLU":0.6884607359,"PerCoR":0.8665,"Persian NLU":0.7146808531,"Persian NLG":0.18964968}
|
5 |
+
{"Model Name":"deepseek-reasoner","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","Average":0.6715,"Persian IFEval":0.8370044053,"Persian MT-Bench":0.86175,"PerMMLU":0.7808162132,"PerCoR":0.825165033,"Persian NLU":0.6361186163,"Persian NLG":0.0880621978}
|
6 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6556,"Persian IFEval":0.8340675477,"Persian MT-Bench":0.8418333333,"PerMMLU":0.6128538638,"PerCoR":0.7712,"Persian NLU":0.6833497104,"Persian NLG":0.1901206806}
|
7 |
+
{"Model Name":"deepseek-chat","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","Average":0.6458,"Persian IFEval":0.8311306902,"Persian MT-Bench":0.8600833333,"PerMMLU":0.5908047576,"PerCoR":0.8241,"Persian NLU":0.6752949557,"Persian NLG":0.0934094344}
|
8 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","Average":0.6247,"Persian IFEval":0.8296622614,"Persian MT-Bench":0.796,"PerMMLU":0.5633303193,"PerCoR":0.7628,"Persian NLU":0.6898261633,"Persian NLG":0.1067134448}
|
9 |
+
{"Model Name":"gpt-4o-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6246,"Persian IFEval":0.8017621145,"Persian MT-Bench":0.7891666667,"PerMMLU":0.56986854,"PerCoR":0.7598,"Persian NLU":0.6459120734,"Persian NLG":0.1810678527}
|
10 |
+
{"Model Name":"Qwen3-32B","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","Average":0.6224,"Persian IFEval":0.803030303,"Persian MT-Bench":0.7632996633,"PerMMLU":0.5635086255,"PerCoR":0.7654,"Persian NLU":0.6714091535,"Persian NLG":0.1679338638}
|
11 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","Average":0.613,"Persian IFEval":0.7125925926,"Persian MT-Bench":0.7172558923,"PerMMLU":0.5714086374,"PerCoR":0.7956,"Persian NLU":0.6800109206,"Persian NLG":0.2010896964}
|
12 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","Average":0.6008,"Persian IFEval":0.8149779736,"Persian MT-Bench":0.75125,"PerMMLU":0.5105376643,"PerCoR":0.7094,"Persian NLU":0.699116864,"Persian NLG":0.1196804312}
|
13 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","Average":0.5939,"Persian IFEval":0.8325508607,"Persian MT-Bench":0.7431271478,"PerMMLU":0.5097725534,"PerCoR":0.688,"Persian NLU":0.6255818412,"Persian NLG":0.164118288}
|
14 |
+
{"Model Name":"Qwen3-14B","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","Average":0.5912,"Persian IFEval":0.8105726872,"Persian MT-Bench":0.7204545455,"PerMMLU":0.5139458858,"PerCoR":0.6958,"Persian NLU":0.6460328733,"Persian NLG":0.16056333}
|
15 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","Average":0.5705,"Persian IFEval":0.7007407407,"Persian MT-Bench":0.688,"PerMMLU":0.4800723378,"PerCoR":0.7364,"Persian NLU":0.6297634971,"Persian NLG":0.1880477876}
|
16 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","Average":0.5576,"Persian IFEval":0.7526555387,"Persian MT-Bench":0.7290833333,"PerMMLU":0.4763231198,"PerCoR":0.6894,"Persian NLU":0.5661558794,"Persian NLG":0.1319091735}
|
17 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.5546,"Persian IFEval":0.825256975,"Persian MT-Bench":0.7585,"PerMMLU":0.5440356745,"PerCoR":0.7160432086,"Persian NLU":0.3749414991,"Persian NLG":0.1089333827}
|
18 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.5524,"Persian IFEval":0.7577092511,"Persian MT-Bench":0.7363333333,"PerMMLU":0.4784030048,"PerCoR":0.5494,"Persian NLU":0.6262096694,"Persian NLG":0.1665903777}
|
19 |
+
{"Model Name":"Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","Average":0.5252,"Persian IFEval":0.7474302496,"Persian MT-Bench":0.6607526882,"PerMMLU":0.4467552341,"PerCoR":0.5437,"Persian NLU":0.5968415875,"Persian NLG":0.1557270864}
|
20 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","Average":0.4996,"Persian IFEval":0.7444933921,"Persian MT-Bench":0.66825,"PerMMLU":0.3825554705,"PerCoR":0.4832,"Persian NLU":0.6241793507,"Persian NLG":0.0949943578}
|
21 |
+
{"Model Name":"aya-expanse-32b","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","Average":0.4945,"Persian IFEval":0.6989720999,"Persian MT-Bench":0.7085833333,"PerMMLU":0.4140641302,"PerCoR":0.6327,"Persian NLU":0.3928685253,"Persian NLG":0.1196400535}
|
22 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","Average":0.4813,"Persian IFEval":0.5790251108,"Persian MT-Bench":0.6090833333,"PerMMLU":0.4046741323,"PerCoR":0.6,"Persian NLU":0.531045981,"Persian NLG":0.1641995602}
|
23 |
+
{"Model Name":"Qwen3-4B","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","Average":0.4791,"Persian IFEval":0.7577092511,"Persian MT-Bench":0.5599462366,"PerMMLU":0.4025179106,"PerCoR":0.5033,"Persian NLU":0.5121418762,"Persian NLG":0.1389297212}
|
24 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","Average":0.3252,"Persian IFEval":0.5447870778,"Persian MT-Bench":0.4333333333,"PerMMLU":0.2830214927,"PerCoR":0.2599,"Persian NLU":0.3619547874,"Persian NLG":0.0682994522}
|
25 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","Average":0.3039,"Persian IFEval":0.4405286344,"Persian MT-Bench":0.3398268398,"PerMMLU":0.2553383877,"PerCoR":0.3015,"Persian NLU":0.3916645306,"Persian NLG":0.0944140383}
|
26 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","Average":0.2815,"Persian IFEval":0.5330396476,"Persian MT-Bench":0.3756410256,"PerMMLU":0.278430827,"PerCoR":0.2521,"Persian NLU":0.1368924446,"Persian NLG":0.1129755187}
|
27 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","Average":0.205,"Persian IFEval":0.3656387665,"Persian MT-Bench":0.2952160494,"PerMMLU":0.1987201781,"PerCoR":0.2412,"Persian NLU":0.046805056,"Persian NLG":0.0823387318}
|
28 |
+
{"Model Name":"o4-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":null,"Persian IFEval":null,"Persian MT-Bench":null,"PerMMLU":null,"PerCoR":0.8551,"Persian NLU":null,"Persian NLG":null}
|
29 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","Average":null,"Persian IFEval":0.8438880707,"Persian MT-Bench":0.8219166667,"PerMMLU":0.5980651448,"PerCoR":0.798859772,"Persian NLU":null,"Persian NLG":null}
|
leaderboard/boards_data/extractive-qa_PQuAD.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":21.8957345972,"extractive-qa_PQuAD_f1":0.5899280585,"nlu_score":0.7143086066}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":29.8578199052,"extractive-qa_PQuAD_f1":0.6483891649,"nlu_score":0.6241793507}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":51.6587677725,"extractive-qa_PQuAD_f1":0.7997294818,"nlu_score":0.6297634971}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":16.2085308057,"extractive-qa_PQuAD_f1":0.5540542726,"nlu_score":0.6758278127}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":37.4407582938,"extractive-qa_PQuAD_f1":0.7121215175,"nlu_score":0.699116864}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":24.9289099526,"extractive-qa_PQuAD_f1":0.5952537387,"nlu_score":0.6898261633}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":11.9431279621,"extractive-qa_PQuAD_f1":0.5054306037,"nlu_score":0.6460328733}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":12.0379146919,"extractive-qa_PQuAD_f1":0.5152644082,"nlu_score":0.6714091535}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":1.5165876777,"extractive-qa_PQuAD_f1":0.3221621809,"nlu_score":0.3749414991}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":13.0805687204,"extractive-qa_PQuAD_f1":0.5111951184,"nlu_score":0.5661558794}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":7.0142180095,"extractive-qa_PQuAD_f1":0.4986764425,"nlu_score":0.6752949557}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":20.4739336493,"extractive-qa_PQuAD_f1":0.5660677645,"nlu_score":0.5121418762}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":15.6398104265,"extractive-qa_PQuAD_f1":0.4797901431,"nlu_score":0.3619547874}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.8530805687,"extractive-qa_PQuAD_f1":0.3570972648,"nlu_score":0.3928685253}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":38.2938388626,"extractive-qa_PQuAD_f1":0.7091014157,"nlu_score":0.6800109206}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":29.0995260664,"extractive-qa_PQuAD_f1":0.6500014945,"nlu_score":0.6833497104}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":7.2037914692,"extractive-qa_PQuAD_f1":0.4722142546,"nlu_score":0.6459120734}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":37.4407582938,"extractive-qa_PQuAD_f1":0.6861140935,"nlu_score":0.7050532433}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":41.990521327,"extractive-qa_PQuAD_f1":0.7401025641,"nlu_score":0.531045981}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":21.5165876777,"extractive-qa_PQuAD_f1":0.6052090568,"nlu_score":0.6262096694}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":1.8957345972,"extractive-qa_PQuAD_f1":0.4954484984,"nlu_score":0.5968415875}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.4739336493,"extractive-qa_PQuAD_f1":0.3440209421,"nlu_score":0.3916645306}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":17.5355450237,"extractive-qa_PQuAD_f1":0.5641459437,"nlu_score":0.7146808531}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":36.7772511848,"extractive-qa_PQuAD_f1":0.7059801524,"nlu_score":0.6361186163}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":1.4218009479,"extractive-qa_PQuAD_f1":0.6109462131,"nlu_score":0.6255818412}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":2.3696682464,"extractive-qa_PQuAD_f1":0.4003473594,"nlu_score":0.1368924446}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.663507109,"extractive-qa_PQuAD_f1":0.3378125221,"nlu_score":0.046805056}
|
leaderboard/boards_data/ifeval.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8336980306,"strict_instruction_accuracy":0.8810572687,"loose_prompt_accuracy":0.8774617068,"loose_instruction_accuracy":0.9148311307,"strict_combination_category":0.8307692308,"strict_detectable_content_category":0.9782608696,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.8965517241,"strict_language_category":1.0,"strict_length_constraints_category":0.756097561,"strict_punctuation_category":0.9508196721,"strict_startend_category":0.9523809524,"loose_combination_category":0.8461538462,"loose_detectable_content_category":0.9782608696,"loose_detectable_format_category":0.8775510204,"loose_keywords_category":0.9448275862,"loose_language_category":1.0,"loose_length_constraints_category":0.8536585366,"loose_punctuation_category":0.9836065574,"loose_startend_category":0.9682539683}
|
2 |
+
{"Model Name":"gpt-4.1","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8140043764,"strict_instruction_accuracy":0.8634361233,"loose_prompt_accuracy":0.8512035011,"loose_instruction_accuracy":0.8942731278,"strict_combination_category":0.7846153846,"strict_detectable_content_category":0.9347826087,"strict_detectable_format_category":0.8911564626,"strict_keywords_category":0.8482758621,"strict_language_category":1.0,"strict_length_constraints_category":0.7804878049,"strict_punctuation_category":0.868852459,"strict_startend_category":0.9523809524,"loose_combination_category":0.8153846154,"loose_detectable_content_category":0.9347826087,"loose_detectable_format_category":0.9047619048,"loose_keywords_category":0.9103448276,"loose_language_category":1.0,"loose_length_constraints_category":0.837398374,"loose_punctuation_category":0.8852459016,"loose_startend_category":0.9523809524}
|
3 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.7916666667,"strict_instruction_accuracy":0.8497790869,"loose_prompt_accuracy":0.8245614035,"loose_instruction_accuracy":0.8777614138,"strict_combination_category":0.6875,"strict_detectable_content_category":0.9130434783,"strict_detectable_format_category":0.9047619048,"strict_keywords_category":0.7916666667,"strict_language_category":1.0,"strict_length_constraints_category":0.7642276423,"strict_punctuation_category":0.9672131148,"strict_startend_category":0.9523809524,"loose_combination_category":0.703125,"loose_detectable_content_category":0.9130434783,"loose_detectable_format_category":0.9047619048,"loose_keywords_category":0.8541666667,"loose_language_category":1.0,"loose_length_constraints_category":0.837398374,"loose_punctuation_category":0.9672131148,"loose_startend_category":0.9523809524}
|
4 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","strict_prompt_accuracy":0.7802197802,"strict_instruction_accuracy":0.8438880707,"loose_prompt_accuracy":0.832967033,"loose_instruction_accuracy":0.88365243,"strict_combination_category":0.8461538462,"strict_detectable_content_category":0.9347826087,"strict_detectable_format_category":0.8979591837,"strict_keywords_category":0.7793103448,"strict_language_category":1.0,"strict_length_constraints_category":0.7685950413,"strict_punctuation_category":0.8032786885,"strict_startend_category":0.9047619048,"loose_combination_category":0.8461538462,"loose_detectable_content_category":0.9347826087,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.8482758621,"loose_language_category":1.0,"loose_length_constraints_category":0.8429752066,"loose_punctuation_category":0.868852459,"loose_startend_category":0.9365079365}
|
5 |
+
{"Model Name":"deepseek-reasoner","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","strict_prompt_accuracy":0.7702407002,"strict_instruction_accuracy":0.8370044053,"loose_prompt_accuracy":0.8140043764,"loose_instruction_accuracy":0.8707782673,"strict_combination_category":0.7384615385,"strict_detectable_content_category":0.9782608696,"strict_detectable_format_category":0.8503401361,"strict_keywords_category":0.7862068966,"strict_language_category":1.0,"strict_length_constraints_category":0.756097561,"strict_punctuation_category":0.9836065574,"strict_startend_category":0.8571428571,"loose_combination_category":0.7538461538,"loose_detectable_content_category":0.9782608696,"loose_detectable_format_category":0.8571428571,"loose_keywords_category":0.8551724138,"loose_language_category":1.0,"loose_length_constraints_category":0.8048780488,"loose_punctuation_category":1.0,"loose_startend_category":0.9206349206}
|
6 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.772428884,"strict_instruction_accuracy":0.8340675477,"loose_prompt_accuracy":0.7986870897,"loose_instruction_accuracy":0.8575624082,"strict_combination_category":0.6923076923,"strict_detectable_content_category":0.847826087,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.8344827586,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.7154471545,"strict_punctuation_category":0.9180327869,"strict_startend_category":0.9523809524,"loose_combination_category":0.7230769231,"loose_detectable_content_category":0.847826087,"loose_detectable_format_category":0.8843537415,"loose_keywords_category":0.8827586207,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.756097561,"loose_punctuation_category":0.9344262295,"loose_startend_category":0.9523809524}
|
7 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","strict_prompt_accuracy":0.7662037037,"strict_instruction_accuracy":0.8325508607,"loose_prompt_accuracy":0.8078703704,"loose_instruction_accuracy":0.8638497653,"strict_combination_category":0.6349206349,"strict_detectable_content_category":0.8837209302,"strict_detectable_format_category":0.9136690647,"strict_keywords_category":0.7954545455,"strict_language_category":0.9655172414,"strict_length_constraints_category":0.7192982456,"strict_punctuation_category":0.9655172414,"strict_startend_category":0.9180327869,"loose_combination_category":0.7936507937,"loose_detectable_content_category":0.8837209302,"loose_detectable_format_category":0.928057554,"loose_keywords_category":0.8257575758,"loose_language_category":0.9655172414,"loose_length_constraints_category":0.7543859649,"loose_punctuation_category":0.9655172414,"loose_startend_category":0.9180327869}
|
8 |
+
{"Model Name":"deepseek-chat","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","strict_prompt_accuracy":0.7702407002,"strict_instruction_accuracy":0.8311306902,"loose_prompt_accuracy":0.8205689278,"loose_instruction_accuracy":0.8693098385,"strict_combination_category":0.7846153846,"strict_detectable_content_category":1.0,"strict_detectable_format_category":0.8503401361,"strict_keywords_category":0.8,"strict_language_category":1.0,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.9672131148,"strict_startend_category":0.8888888889,"loose_combination_category":0.8153846154,"loose_detectable_content_category":1.0,"loose_detectable_format_category":0.8639455782,"loose_keywords_category":0.8551724138,"loose_language_category":1.0,"loose_length_constraints_category":0.7479674797,"loose_punctuation_category":0.9836065574,"loose_startend_category":0.9365079365}
|
9 |
+
{"Model Name":"gpt-4o","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.772428884,"strict_instruction_accuracy":0.8296622614,"loose_prompt_accuracy":0.8140043764,"loose_instruction_accuracy":0.8649045521,"strict_combination_category":0.8,"strict_detectable_content_category":0.9782608696,"strict_detectable_format_category":0.8911564626,"strict_keywords_category":0.7793103448,"strict_language_category":1.0,"strict_length_constraints_category":0.6829268293,"strict_punctuation_category":0.8360655738,"strict_startend_category":0.9206349206,"loose_combination_category":0.8153846154,"loose_detectable_content_category":0.9782608696,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.8344827586,"loose_language_category":1.0,"loose_length_constraints_category":0.7479674797,"loose_punctuation_category":0.8852459016,"loose_startend_category":0.9365079365}
|
10 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","strict_prompt_accuracy":0.7636761488,"strict_instruction_accuracy":0.8296622614,"loose_prompt_accuracy":0.8052516411,"loose_instruction_accuracy":0.8634361233,"strict_combination_category":0.7230769231,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.8095238095,"strict_keywords_category":0.7931034483,"strict_language_category":1.0,"strict_length_constraints_category":0.7886178862,"strict_punctuation_category":0.9836065574,"strict_startend_category":0.873015873,"loose_combination_category":0.7230769231,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.8095238095,"loose_keywords_category":0.875862069,"loose_language_category":1.0,"loose_length_constraints_category":0.8780487805,"loose_punctuation_category":0.9836065574,"loose_startend_category":0.873015873}
|
11 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.761487965,"strict_instruction_accuracy":0.825256975,"loose_prompt_accuracy":0.7877461707,"loose_instruction_accuracy":0.8516886931,"strict_combination_category":0.7384615385,"strict_detectable_content_category":0.9565217391,"strict_detectable_format_category":0.8571428571,"strict_keywords_category":0.8413793103,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6097560976,"strict_punctuation_category":1.0,"strict_startend_category":0.8888888889,"loose_combination_category":0.7538461538,"loose_detectable_content_category":0.9565217391,"loose_detectable_format_category":0.8775510204,"loose_keywords_category":0.875862069,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.6666666667,"loose_punctuation_category":1.0,"loose_startend_category":0.9206349206}
|
12 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","strict_prompt_accuracy":0.7396061269,"strict_instruction_accuracy":0.8149779736,"loose_prompt_accuracy":0.7877461707,"loose_instruction_accuracy":0.8516886931,"strict_combination_category":0.6923076923,"strict_detectable_content_category":0.9130434783,"strict_detectable_format_category":0.8911564626,"strict_keywords_category":0.7586206897,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.7479674797,"strict_punctuation_category":0.8032786885,"strict_startend_category":0.8888888889,"loose_combination_category":0.6923076923,"loose_detectable_content_category":0.9130434783,"loose_detectable_format_category":0.8911564626,"loose_keywords_category":0.8344827586,"loose_language_category":1.0,"loose_length_constraints_category":0.8292682927,"loose_punctuation_category":0.8360655738,"loose_startend_category":0.9047619048}
|
13 |
+
{"Model Name":"Qwen3-14B","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","strict_prompt_accuracy":0.7483588621,"strict_instruction_accuracy":0.8105726872,"loose_prompt_accuracy":0.7899343545,"loose_instruction_accuracy":0.845814978,"strict_combination_category":0.7230769231,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.8979591837,"strict_keywords_category":0.7724137931,"strict_language_category":0.935483871,"strict_length_constraints_category":0.7479674797,"strict_punctuation_category":0.7049180328,"strict_startend_category":0.8888888889,"loose_combination_category":0.7692307692,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.8413793103,"loose_language_category":0.935483871,"loose_length_constraints_category":0.7642276423,"loose_punctuation_category":0.7540983607,"loose_startend_category":0.9523809524}
|
14 |
+
{"Model Name":"Qwen3-32B","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","strict_prompt_accuracy":0.7342342342,"strict_instruction_accuracy":0.803030303,"loose_prompt_accuracy":0.786036036,"loose_instruction_accuracy":0.846969697,"strict_combination_category":0.625,"strict_detectable_content_category":0.9111111111,"strict_detectable_format_category":0.8951048951,"strict_keywords_category":0.7785714286,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6140350877,"strict_punctuation_category":0.9333333333,"strict_startend_category":0.8888888889,"loose_combination_category":0.6875,"loose_detectable_content_category":0.9111111111,"loose_detectable_format_category":0.9090909091,"loose_keywords_category":0.85,"loose_language_category":1.0,"loose_length_constraints_category":0.6666666667,"loose_punctuation_category":0.95,"loose_startend_category":0.9682539683}
|
15 |
+
{"Model Name":"gpt-4o-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.7308533917,"strict_instruction_accuracy":0.8017621145,"loose_prompt_accuracy":0.772428884,"loose_instruction_accuracy":0.8355359765,"strict_combination_category":0.7384615385,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.843537415,"strict_keywords_category":0.7379310345,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.9016393443,"strict_startend_category":0.9365079365,"loose_combination_category":0.7538461538,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.8775510204,"loose_keywords_category":0.7793103448,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7398373984,"loose_punctuation_category":0.9344262295,"loose_startend_category":0.9365079365}
|
16 |
+
{"Model Name":"Qwen3-4B","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","strict_prompt_accuracy":0.6717724289,"strict_instruction_accuracy":0.7577092511,"loose_prompt_accuracy":0.7264770241,"loose_instruction_accuracy":0.798825257,"strict_combination_category":0.5384615385,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.9047619048,"strict_keywords_category":0.7172413793,"strict_language_category":0.8387096774,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.8196721311,"strict_startend_category":0.7142857143,"loose_combination_category":0.5692307692,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.7931034483,"loose_language_category":0.8709677419,"loose_length_constraints_category":0.7235772358,"loose_punctuation_category":0.8852459016,"loose_startend_category":0.746031746}
|
17 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.6827133479,"strict_instruction_accuracy":0.7577092511,"loose_prompt_accuracy":0.7199124726,"loose_instruction_accuracy":0.7885462555,"strict_combination_category":0.5846153846,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.8027210884,"strict_keywords_category":0.7793103448,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6829268293,"strict_punctuation_category":0.7704918033,"strict_startend_category":0.8095238095,"loose_combination_category":0.6153846154,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.8095238095,"loose_keywords_category":0.8344827586,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7154471545,"loose_punctuation_category":0.8360655738,"loose_startend_category":0.8412698413}
|
18 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","strict_prompt_accuracy":0.6644144144,"strict_instruction_accuracy":0.7526555387,"loose_prompt_accuracy":0.7274774775,"loose_instruction_accuracy":0.7996965099,"strict_combination_category":0.6153846154,"strict_detectable_content_category":0.8444444444,"strict_detectable_format_category":0.8014184397,"strict_keywords_category":0.7642857143,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.641025641,"strict_punctuation_category":0.7333333333,"strict_startend_category":0.8166666667,"loose_combination_category":0.6615384615,"loose_detectable_content_category":0.8444444444,"loose_detectable_format_category":0.8156028369,"loose_keywords_category":0.8071428571,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7606837607,"loose_punctuation_category":0.8,"loose_startend_category":0.85}
|
19 |
+
{"Model Name":"Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","strict_prompt_accuracy":0.6542669584,"strict_instruction_accuracy":0.7474302496,"loose_prompt_accuracy":0.7089715536,"loose_instruction_accuracy":0.7900146843,"strict_combination_category":0.5846153846,"strict_detectable_content_category":0.8260869565,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.6689655172,"strict_language_category":0.9032258065,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.7213114754,"strict_startend_category":0.8412698413,"loose_combination_category":0.6461538462,"loose_detectable_content_category":0.8260869565,"loose_detectable_format_category":0.8843537415,"loose_keywords_category":0.7655172414,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.6991869919,"loose_punctuation_category":0.7868852459,"loose_startend_category":0.873015873}
|
20 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","strict_prompt_accuracy":0.6652078775,"strict_instruction_accuracy":0.7444933921,"loose_prompt_accuracy":0.7177242888,"loose_instruction_accuracy":0.7914831131,"strict_combination_category":0.5076923077,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.8299319728,"strict_keywords_category":0.7172413793,"strict_language_category":1.0,"strict_length_constraints_category":0.6829268293,"strict_punctuation_category":0.7540983607,"strict_startend_category":0.7301587302,"loose_combination_category":0.5538461538,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.8367346939,"loose_keywords_category":0.7931034483,"loose_language_category":1.0,"loose_length_constraints_category":0.7804878049,"loose_punctuation_category":0.8032786885,"loose_startend_category":0.7619047619}
|
21 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","strict_prompt_accuracy":0.6208425721,"strict_instruction_accuracy":0.7125925926,"loose_prompt_accuracy":0.6518847007,"loose_instruction_accuracy":0.7392592593,"strict_combination_category":0.65625,"strict_detectable_content_category":0.8043478261,"strict_detectable_format_category":0.8333333333,"strict_keywords_category":0.7342657343,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.5365853659,"strict_punctuation_category":0.7213114754,"strict_startend_category":0.5873015873,"loose_combination_category":0.65625,"loose_detectable_content_category":0.8043478261,"loose_detectable_format_category":0.8402777778,"loose_keywords_category":0.7692307692,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.593495935,"loose_punctuation_category":0.7540983607,"loose_startend_category":0.6349206349}
|
22 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","strict_prompt_accuracy":0.6114790287,"strict_instruction_accuracy":0.7007407407,"loose_prompt_accuracy":0.6379690949,"loose_instruction_accuracy":0.7274074074,"strict_combination_category":0.5846153846,"strict_detectable_content_category":0.8,"strict_detectable_format_category":0.801369863,"strict_keywords_category":0.7872340426,"strict_language_category":0.6774193548,"strict_length_constraints_category":0.6422764228,"strict_punctuation_category":0.6229508197,"strict_startend_category":0.5238095238,"loose_combination_category":0.5846153846,"loose_detectable_content_category":0.8,"loose_detectable_format_category":0.8287671233,"loose_keywords_category":0.8156028369,"loose_language_category":0.6774193548,"loose_length_constraints_category":0.7154471545,"loose_punctuation_category":0.6393442623,"loose_startend_category":0.5238095238}
|
23 |
+
{"Model Name":"aya-expanse-32b","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","strict_prompt_accuracy":0.6126914661,"strict_instruction_accuracy":0.6989720999,"loose_prompt_accuracy":0.6805251641,"loose_instruction_accuracy":0.7547723935,"strict_combination_category":0.5230769231,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.8571428571,"strict_keywords_category":0.7172413793,"strict_language_category":1.0,"strict_length_constraints_category":0.593495935,"strict_punctuation_category":0.2950819672,"strict_startend_category":0.873015873,"loose_combination_category":0.6307692308,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.8979591837,"loose_keywords_category":0.7379310345,"loose_language_category":1.0,"loose_length_constraints_category":0.7317073171,"loose_punctuation_category":0.3606557377,"loose_startend_category":0.8888888889}
|
24 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","strict_prompt_accuracy":0.4835164835,"strict_instruction_accuracy":0.5790251108,"loose_prompt_accuracy":0.5384615385,"loose_instruction_accuracy":0.6203840473,"strict_combination_category":0.3384615385,"strict_detectable_content_category":0.6444444444,"strict_detectable_format_category":0.7414965986,"strict_keywords_category":0.7062937063,"strict_language_category":0.6451612903,"strict_length_constraints_category":0.6260162602,"strict_punctuation_category":0.1333333333,"strict_startend_category":0.4126984127,"loose_combination_category":0.4,"loose_detectable_content_category":0.6444444444,"loose_detectable_format_category":0.7551020408,"loose_keywords_category":0.7412587413,"loose_language_category":0.6451612903,"loose_length_constraints_category":0.6910569106,"loose_punctuation_category":0.2166666667,"loose_startend_category":0.4761904762}
|
25 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","strict_prompt_accuracy":0.4376367615,"strict_instruction_accuracy":0.5447870778,"loose_prompt_accuracy":0.4748358862,"loose_instruction_accuracy":0.5814977974,"strict_combination_category":0.1384615385,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.768707483,"strict_keywords_category":0.5862068966,"strict_language_category":0.8709677419,"strict_length_constraints_category":0.5853658537,"strict_punctuation_category":0.1803278689,"strict_startend_category":0.3015873016,"loose_combination_category":0.1384615385,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.768707483,"loose_keywords_category":0.6413793103,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.6829268293,"loose_punctuation_category":0.2295081967,"loose_startend_category":0.3174603175}
|
26 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","strict_prompt_accuracy":0.4245076586,"strict_instruction_accuracy":0.5330396476,"loose_prompt_accuracy":0.4792122538,"loose_instruction_accuracy":0.5814977974,"strict_combination_category":0.2461538462,"strict_detectable_content_category":0.5217391304,"strict_detectable_format_category":0.6258503401,"strict_keywords_category":0.6137931034,"strict_language_category":0.5806451613,"strict_length_constraints_category":0.5772357724,"strict_punctuation_category":0.5409836066,"strict_startend_category":0.3174603175,"loose_combination_category":0.3230769231,"loose_detectable_content_category":0.5217391304,"loose_detectable_format_category":0.6462585034,"loose_keywords_category":0.6551724138,"loose_language_category":0.5806451613,"loose_length_constraints_category":0.674796748,"loose_punctuation_category":0.606557377,"loose_startend_category":0.3650793651}
|
27 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","strict_prompt_accuracy":0.3369803063,"strict_instruction_accuracy":0.4405286344,"loose_prompt_accuracy":0.398249453,"loose_instruction_accuracy":0.4948604993,"strict_combination_category":0.0307692308,"strict_detectable_content_category":0.5217391304,"strict_detectable_format_category":0.5034013605,"strict_keywords_category":0.6068965517,"strict_language_category":0.9032258065,"strict_length_constraints_category":0.4146341463,"strict_punctuation_category":0.2459016393,"strict_startend_category":0.2857142857,"loose_combination_category":0.0923076923,"loose_detectable_content_category":0.5217391304,"loose_detectable_format_category":0.5170068027,"loose_keywords_category":0.6482758621,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.5447154472,"loose_punctuation_category":0.3278688525,"loose_startend_category":0.3492063492}
|
28 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","strict_prompt_accuracy":0.2516411379,"strict_instruction_accuracy":0.3656387665,"loose_prompt_accuracy":0.284463895,"loose_instruction_accuracy":0.4111600587,"strict_combination_category":0.0615384615,"strict_detectable_content_category":0.3913043478,"strict_detectable_format_category":0.3945578231,"strict_keywords_category":0.4,"strict_language_category":0.5161290323,"strict_length_constraints_category":0.4959349593,"strict_punctuation_category":0.3606557377,"strict_startend_category":0.1904761905,"loose_combination_category":0.0615384615,"loose_detectable_content_category":0.3913043478,"loose_detectable_format_category":0.4013605442,"loose_keywords_category":0.4551724138,"loose_language_category":0.5161290323,"loose_length_constraints_category":0.5691056911,"loose_punctuation_category":0.4918032787,"loose_startend_category":0.2698412698}
|
29 |
+
{"Model Name":"o4-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":null,"strict_instruction_accuracy":null,"loose_prompt_accuracy":null,"loose_instruction_accuracy":null,"strict_combination_category":null,"strict_detectable_content_category":null,"strict_detectable_format_category":null,"strict_keywords_category":null,"strict_language_category":null,"strict_length_constraints_category":null,"strict_punctuation_category":null,"strict_startend_category":null,"loose_combination_category":null,"loose_detectable_content_category":null,"loose_detectable_format_category":null,"loose_keywords_category":null,"loose_language_category":null,"loose_length_constraints_category":null,"loose_punctuation_category":null,"loose_startend_category":null}
|
leaderboard/boards_data/keyword-extraction_SynKeywords.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2115068728,"keyword-extraction_SynKeywords_precision_mean":0.1912410205,"keyword-extraction_SynKeywords_recall_mean":0.2483695652,"nlu_score":0.7143086066}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.229921048,"keyword-extraction_SynKeywords_precision_mean":0.21147343,"keyword-extraction_SynKeywords_recall_mean":0.2634963768,"nlu_score":0.6241793507}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0860842686,"keyword-extraction_SynKeywords_precision_mean":0.0757882818,"keyword-extraction_SynKeywords_recall_mean":0.1065217391,"nlu_score":0.6297634971}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1217550899,"keyword-extraction_SynKeywords_precision_mean":0.1020894964,"keyword-extraction_SynKeywords_recall_mean":0.1608695652,"nlu_score":0.6758278127}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":null,"keyword-extraction_SynKeywords_precision_mean":null,"keyword-extraction_SynKeywords_recall_mean":null,"nlu_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.3352048805,"keyword-extraction_SynKeywords_precision_mean":0.2914121808,"keyword-extraction_SynKeywords_recall_mean":0.4166666667,"nlu_score":0.699116864}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2384077673,"keyword-extraction_SynKeywords_precision_mean":0.2041836259,"keyword-extraction_SynKeywords_recall_mean":0.3015398551,"nlu_score":0.6898261633}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.233766167,"keyword-extraction_SynKeywords_precision_mean":0.1893302534,"keyword-extraction_SynKeywords_recall_mean":0.3297101449,"nlu_score":0.6460328733}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1369232983,"keyword-extraction_SynKeywords_precision_mean":0.1117212542,"keyword-extraction_SynKeywords_recall_mean":0.1863224638,"nlu_score":0.6714091535}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2568096145,"keyword-extraction_SynKeywords_precision_mean":0.2483731877,"keyword-extraction_SynKeywords_recall_mean":0.2765873016,"nlu_score":0.3749414991}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1942845429,"keyword-extraction_SynKeywords_precision_mean":0.168197784,"keyword-extraction_SynKeywords_recall_mean":0.2451992754,"nlu_score":0.5661558794}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.261926093,"keyword-extraction_SynKeywords_precision_mean":0.2173028298,"keyword-extraction_SynKeywords_recall_mean":0.3492753623,"nlu_score":0.6752949557}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2175605056,"keyword-extraction_SynKeywords_precision_mean":0.1768294437,"keyword-extraction_SynKeywords_recall_mean":0.3029891304,"nlu_score":0.5121418762}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2097414246,"keyword-extraction_SynKeywords_precision_mean":0.1802822781,"keyword-extraction_SynKeywords_recall_mean":0.2621376812,"nlu_score":0.3619547874}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2199224821,"keyword-extraction_SynKeywords_precision_mean":0.1924904051,"keyword-extraction_SynKeywords_recall_mean":0.2695652174,"nlu_score":0.3928685253}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2876907753,"keyword-extraction_SynKeywords_precision_mean":0.2733133111,"keyword-extraction_SynKeywords_recall_mean":0.322192029,"nlu_score":0.6800109206}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1856116909,"keyword-extraction_SynKeywords_precision_mean":0.157770465,"keyword-extraction_SynKeywords_recall_mean":0.2412137681,"nlu_score":0.6833497104}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2783400189,"keyword-extraction_SynKeywords_precision_mean":0.2250927598,"keyword-extraction_SynKeywords_recall_mean":0.3842391304,"nlu_score":0.6459120734}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":null,"keyword-extraction_SynKeywords_precision_mean":null,"keyword-extraction_SynKeywords_recall_mean":null,"nlu_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.205802897,"keyword-extraction_SynKeywords_precision_mean":0.1860666658,"keyword-extraction_SynKeywords_recall_mean":0.2421195652,"nlu_score":0.7050532433}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0012077295,"keyword-extraction_SynKeywords_precision_mean":0.0013586957,"keyword-extraction_SynKeywords_recall_mean":0.0010869565,"nlu_score":0.531045981}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1690961,"keyword-extraction_SynKeywords_precision_mean":0.1495665943,"keyword-extraction_SynKeywords_recall_mean":0.2049818841,"nlu_score":0.6262096694}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2080704644,"keyword-extraction_SynKeywords_precision_mean":0.1673321849,"keyword-extraction_SynKeywords_recall_mean":0.294384058,"nlu_score":0.5968415875}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2510623051,"keyword-extraction_SynKeywords_precision_mean":0.1899292026,"keyword-extraction_SynKeywords_recall_mean":0.4099637681,"nlu_score":0.3916645306}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2160808904,"keyword-extraction_SynKeywords_precision_mean":0.1901842722,"keyword-extraction_SynKeywords_recall_mean":0.2683876812,"nlu_score":0.7146808531}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.3267802104,"keyword-extraction_SynKeywords_precision_mean":0.2985915051,"keyword-extraction_SynKeywords_recall_mean":0.3825181159,"nlu_score":0.6361186163}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2600489896,"keyword-extraction_SynKeywords_precision_mean":0.2150796745,"keyword-extraction_SynKeywords_recall_mean":0.3497282609,"nlu_score":0.6255818412}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0224485659,"keyword-extraction_SynKeywords_precision_mean":0.0230331263,"keyword-extraction_SynKeywords_recall_mean":0.022826087,"nlu_score":0.1368924446}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0,"keyword-extraction_SynKeywords_precision_mean":0.0,"keyword-extraction_SynKeywords_recall_mean":0.0,"nlu_score":0.046805056}
|
leaderboard/boards_data/mt_bench.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"gpt-4.1","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.7428571429,"score_mean":8.7325,"writing_score_w_mean":8.4074074074,"writing_score_mean":8.3833333333,"roleplay_score_w_mean":8.6071428571,"roleplay_score_mean":8.625,"reasoning_score_w_mean":9.0666666667,"reasoning_score_mean":8.9666666667,"math_score_w_mean":9.7826086957,"math_score_mean":9.75,"coding_score_w_mean":8.15,"coding_score_mean":8.15,"extraction_score_w_mean":8.45,"extraction_score_mean":8.45,"stem_score_w_mean":8.7,"stem_score_mean":8.7,"humanities_score_w_mean":9.1,"humanities_score_mean":9.1,"persian_general_knowledge_score_w_mean":7.7777777778,"persian_general_knowledge_score_mean":7.9333333333,"chatbot_rag_score_w_mean":9.3,"chatbot_rag_score_mean":9.2666666667}
|
2 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.7510204082,"score_mean":8.695,"writing_score_w_mean":8.5925925926,"writing_score_mean":8.625,"roleplay_score_w_mean":8.2142857143,"roleplay_score_mean":8.2166666667,"reasoning_score_w_mean":9.0666666667,"reasoning_score_mean":8.7833333333,"math_score_w_mean":9.7826086957,"math_score_mean":9.75,"coding_score_w_mean":7.85,"coding_score_mean":7.85,"extraction_score_w_mean":8.6,"extraction_score_mean":8.6,"stem_score_w_mean":8.55,"stem_score_mean":8.55,"humanities_score_w_mean":9.2,"humanities_score_mean":9.2,"persian_general_knowledge_score_w_mean":8.1481481481,"persian_general_knowledge_score_mean":8.0,"chatbot_rag_score_w_mean":9.3666666667,"chatbot_rag_score_mean":9.375}
|
3 |
+
{"Model Name":"deepseek-reasoner","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","score_w_mean":8.5795918367,"score_mean":8.6175,"writing_score_w_mean":8.6296296296,"writing_score_mean":8.65,"roleplay_score_w_mean":8.1785714286,"roleplay_score_mean":8.225,"reasoning_score_w_mean":8.9,"reasoning_score_mean":8.7416666667,"math_score_w_mean":9.3043478261,"math_score_mean":9.2,"coding_score_w_mean":8.75,"coding_score_mean":8.75,"extraction_score_w_mean":8.5,"extraction_score_mean":8.5,"stem_score_w_mean":8.55,"stem_score_mean":8.55,"humanities_score_w_mean":9.15,"humanities_score_mean":9.15,"persian_general_knowledge_score_w_mean":6.8148148148,"persian_general_knowledge_score_mean":7.2416666667,"chatbot_rag_score_w_mean":9.2,"chatbot_rag_score_mean":9.1666666667}
|
4 |
+
{"Model Name":"deepseek-chat","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","score_w_mean":8.5102040816,"score_mean":8.6008333333,"writing_score_w_mean":8.4444444444,"writing_score_mean":8.4916666667,"roleplay_score_w_mean":8.9285714286,"roleplay_score_mean":8.9666666667,"reasoning_score_w_mean":8.3666666667,"reasoning_score_mean":8.3083333333,"math_score_w_mean":9.1304347826,"math_score_mean":9.0,"coding_score_w_mean":9.35,"coding_score_mean":9.35,"extraction_score_w_mean":8.65,"extraction_score_mean":8.65,"stem_score_w_mean":9.05,"stem_score_mean":9.05,"humanities_score_w_mean":9.25,"humanities_score_mean":9.25,"persian_general_knowledge_score_w_mean":5.0740740741,"persian_general_knowledge_score_mean":5.4916666667,"chatbot_rag_score_w_mean":9.4333333333,"chatbot_rag_score_mean":9.45}
|
5 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.3183673469,"score_mean":8.4183333333,"writing_score_w_mean":8.1111111111,"writing_score_mean":8.125,"roleplay_score_w_mean":8.0714285714,"roleplay_score_mean":8.0333333333,"reasoning_score_w_mean":8.1333333333,"reasoning_score_mean":8.0833333333,"math_score_w_mean":9.4347826087,"math_score_mean":9.35,"coding_score_w_mean":8.85,"coding_score_mean":8.85,"extraction_score_w_mean":8.6,"extraction_score_mean":8.6,"stem_score_w_mean":8.9,"stem_score_mean":8.9,"humanities_score_w_mean":9.0,"humanities_score_mean":9.0,"persian_general_knowledge_score_w_mean":5.2222222222,"persian_general_knowledge_score_mean":5.8083333333,"chatbot_rag_score_w_mean":9.4666666667,"chatbot_rag_score_mean":9.4333333333}
|
6 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.4522821577,"score_mean":8.3897306397,"writing_score_w_mean":8.4444444444,"writing_score_mean":8.475,"roleplay_score_w_mean":8.6785714286,"roleplay_score_mean":8.65,"reasoning_score_w_mean":8.4,"reasoning_score_mean":8.3333333333,"math_score_w_mean":9.0434782609,"math_score_mean":8.9,"coding_score_w_mean":7.05,"coding_score_mean":7.05,"extraction_score_w_mean":7.6,"extraction_score_mean":7.6,"stem_score_w_mean":8.4,"stem_score_mean":8.4,"humanities_score_w_mean":9.0,"humanities_score_mean":9.0,"persian_general_knowledge_score_w_mean":8.4074074074,"persian_general_knowledge_score_mean":8.4166666667,"chatbot_rag_score_w_mean":9.1538461538,"chatbot_rag_score_mean":9.1481481481}
|
7 |
+
{"Model Name":"gpt-4o","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.3551020408,"score_mean":8.3716666667,"writing_score_w_mean":8.1481481481,"writing_score_mean":8.175,"roleplay_score_w_mean":7.5,"roleplay_score_mean":7.45,"reasoning_score_w_mean":8.4666666667,"reasoning_score_mean":8.4833333333,"math_score_w_mean":8.7391304348,"math_score_mean":8.8416666667,"coding_score_w_mean":7.85,"coding_score_mean":7.85,"extraction_score_w_mean":8.25,"extraction_score_mean":8.25,"stem_score_w_mean":8.6,"stem_score_mean":8.6,"humanities_score_w_mean":9.05,"humanities_score_mean":9.05,"persian_general_knowledge_score_w_mean":7.8888888889,"persian_general_knowledge_score_mean":7.9,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1166666667}
|
8 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","score_w_mean":8.1795918367,"score_mean":8.2191666667,"writing_score_w_mean":8.1851851852,"writing_score_mean":8.25,"roleplay_score_w_mean":8.2857142857,"roleplay_score_mean":8.3333333333,"reasoning_score_w_mean":7.5333333333,"reasoning_score_mean":7.5416666667,"math_score_w_mean":9.0869565217,"math_score_mean":8.95,"coding_score_w_mean":8.3,"coding_score_mean":8.3,"extraction_score_w_mean":7.7,"extraction_score_mean":7.7,"stem_score_w_mean":8.4,"stem_score_mean":8.4,"humanities_score_w_mean":8.9,"humanities_score_mean":8.9,"persian_general_knowledge_score_w_mean":6.2962962963,"persian_general_knowledge_score_mean":6.4916666667,"chatbot_rag_score_w_mean":9.3333333333,"chatbot_rag_score_mean":9.325}
|
9 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","score_w_mean":7.8653061224,"score_mean":7.96,"writing_score_w_mean":8.3333333333,"writing_score_mean":8.3083333333,"roleplay_score_w_mean":7.6785714286,"roleplay_score_mean":7.5916666667,"reasoning_score_w_mean":7.0666666667,"reasoning_score_mean":7.2083333333,"math_score_w_mean":8.7826086957,"math_score_mean":8.65,"coding_score_w_mean":7.5,"coding_score_mean":7.5,"extraction_score_w_mean":8.4,"extraction_score_mean":8.4,"stem_score_w_mean":8.65,"stem_score_mean":8.65,"humanities_score_w_mean":8.85,"humanities_score_mean":8.85,"persian_general_knowledge_score_w_mean":4.8518518519,"persian_general_knowledge_score_mean":5.3083333333,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1333333333}
|
10 |
+
{"Model Name":"gpt-4o-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":7.8081632653,"score_mean":7.8916666667,"writing_score_w_mean":7.962962963,"writing_score_mean":7.9666666667,"roleplay_score_w_mean":7.6071428571,"roleplay_score_mean":7.5083333333,"reasoning_score_w_mean":6.7666666667,"reasoning_score_mean":6.7166666667,"math_score_w_mean":8.2608695652,"math_score_mean":8.225,"coding_score_w_mean":7.85,"coding_score_mean":7.85,"extraction_score_w_mean":8.3,"extraction_score_mean":8.3,"stem_score_w_mean":8.5,"stem_score_mean":8.5,"humanities_score_w_mean":8.8,"humanities_score_mean":8.8,"persian_general_knowledge_score_w_mean":5.3703703704,"persian_general_knowledge_score_mean":5.7666666667,"chatbot_rag_score_w_mean":9.2666666667,"chatbot_rag_score_mean":9.2833333333}
|
11 |
+
{"Model Name":"Qwen3-32B","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","score_w_mean":7.5371900826,"score_mean":7.632996633,"writing_score_w_mean":7.4074074074,"writing_score_mean":7.4083333333,"roleplay_score_w_mean":7.4642857143,"roleplay_score_mean":7.2833333333,"reasoning_score_w_mean":7.5185185185,"reasoning_score_mean":7.6296296296,"math_score_w_mean":9.0,"math_score_mean":8.85,"coding_score_w_mean":8.1,"coding_score_mean":8.1,"extraction_score_w_mean":7.75,"extraction_score_mean":7.75,"stem_score_w_mean":9.0,"stem_score_mean":9.0,"humanities_score_w_mean":9.05,"humanities_score_mean":9.05,"persian_general_knowledge_score_w_mean":1.8148148148,"persian_general_knowledge_score_mean":2.0083333333,"chatbot_rag_score_w_mean":9.2666666667,"chatbot_rag_score_mean":9.25}
|
12 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":7.4857142857,"score_mean":7.585,"writing_score_w_mean":8.1111111111,"writing_score_mean":8.0416666667,"roleplay_score_w_mean":7.0357142857,"roleplay_score_mean":7.1833333333,"reasoning_score_w_mean":5.8,"reasoning_score_mean":5.65,"math_score_w_mean":8.4782608696,"math_score_mean":8.25,"coding_score_w_mean":8.05,"coding_score_mean":8.05,"extraction_score_w_mean":7.9,"extraction_score_mean":7.9,"stem_score_w_mean":7.7,"stem_score_mean":7.7,"humanities_score_w_mean":8.5,"humanities_score_mean":8.5,"persian_general_knowledge_score_w_mean":4.8518518519,"persian_general_knowledge_score_mean":5.375,"chatbot_rag_score_w_mean":9.1666666667,"chatbot_rag_score_mean":9.2}
|
13 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","score_w_mean":7.3918367347,"score_mean":7.5125,"writing_score_w_mean":7.8888888889,"writing_score_mean":7.95,"roleplay_score_w_mean":7.7142857143,"roleplay_score_mean":7.6416666667,"reasoning_score_w_mean":5.4333333333,"reasoning_score_mean":5.3166666667,"math_score_w_mean":8.7391304348,"math_score_mean":8.7,"coding_score_w_mean":7.55,"coding_score_mean":7.55,"extraction_score_w_mean":7.7,"extraction_score_mean":7.7,"stem_score_w_mean":8.6,"stem_score_mean":8.6,"humanities_score_w_mean":8.9,"humanities_score_mean":8.9,"persian_general_knowledge_score_w_mean":3.1851851852,"persian_general_knowledge_score_mean":3.5166666667,"chatbot_rag_score_w_mean":9.2333333333,"chatbot_rag_score_mean":9.25}
|
14 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","score_w_mean":7.3613445378,"score_mean":7.4312714777,"writing_score_w_mean":7.0,"writing_score_mean":6.9083333333,"roleplay_score_w_mean":7.2307692308,"roleplay_score_mean":7.0185185185,"reasoning_score_w_mean":6.8518518519,"reasoning_score_mean":6.8981481481,"math_score_w_mean":9.5652173913,"math_score_mean":9.5,"coding_score_w_mean":8.6,"coding_score_mean":8.6,"extraction_score_w_mean":6.75,"extraction_score_mean":6.75,"stem_score_w_mean":8.2,"stem_score_mean":8.2,"humanities_score_w_mean":8.3,"humanities_score_mean":8.3,"persian_general_knowledge_score_w_mean":2.16,"persian_general_knowledge_score_mean":2.1851851852,"chatbot_rag_score_w_mean":9.3,"chatbot_rag_score_mean":9.3333333333}
|
15 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":7.2367346939,"score_mean":7.3633333333,"writing_score_w_mean":7.7407407407,"writing_score_mean":7.6666666667,"roleplay_score_w_mean":7.6428571429,"roleplay_score_mean":7.5416666667,"reasoning_score_w_mean":5.7,"reasoning_score_mean":6.0416666667,"math_score_w_mean":8.1304347826,"math_score_mean":8.25,"coding_score_w_mean":8.05,"coding_score_mean":8.05,"extraction_score_w_mean":5.95,"extraction_score_mean":5.95,"stem_score_w_mean":7.85,"stem_score_mean":7.85,"humanities_score_w_mean":8.7,"humanities_score_mean":8.7,"persian_general_knowledge_score_w_mean":3.9259259259,"persian_general_knowledge_score_mean":4.3833333333,"chatbot_rag_score_w_mean":9.1666666667,"chatbot_rag_score_mean":9.2}
|
16 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","score_w_mean":7.2448979592,"score_mean":7.2908333333,"writing_score_w_mean":7.2592592593,"writing_score_mean":7.2333333333,"roleplay_score_w_mean":6.8214285714,"roleplay_score_mean":6.6666666667,"reasoning_score_w_mean":6.3333333333,"reasoning_score_mean":6.2,"math_score_w_mean":8.5217391304,"math_score_mean":8.3,"coding_score_w_mean":7.6,"coding_score_mean":7.6,"extraction_score_w_mean":7.95,"extraction_score_mean":7.95,"stem_score_w_mean":7.3,"stem_score_mean":7.3,"humanities_score_w_mean":8.7,"humanities_score_mean":8.7,"persian_general_knowledge_score_w_mean":3.1481481481,"persian_general_knowledge_score_mean":3.4583333333,"chatbot_rag_score_w_mean":9.5333333333,"chatbot_rag_score_mean":9.5}
|
17 |
+
{"Model Name":"Qwen3-14B","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","score_w_mean":7.1769547325,"score_mean":7.2045454545,"writing_score_w_mean":7.2222222222,"writing_score_mean":7.2,"roleplay_score_w_mean":7.3214285714,"roleplay_score_mean":7.2583333333,"reasoning_score_w_mean":7.3666666667,"reasoning_score_mean":7.2833333333,"math_score_w_mean":8.3913043478,"math_score_mean":8.325,"coding_score_w_mean":6.7,"coding_score_mean":6.7,"extraction_score_w_mean":7.05,"extraction_score_mean":7.05,"stem_score_w_mean":8.5,"stem_score_mean":8.5,"humanities_score_w_mean":8.7222222222,"humanities_score_mean":8.7222222222,"persian_general_knowledge_score_w_mean":1.6666666667,"persian_general_knowledge_score_mean":1.7333333333,"chatbot_rag_score_w_mean":9.4333333333,"chatbot_rag_score_mean":9.425}
|
18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","score_w_mean":7.132231405,"score_mean":7.1725589226,"writing_score_w_mean":7.2962962963,"writing_score_mean":7.35,"roleplay_score_w_mean":7.1428571429,"roleplay_score_mean":6.9416666667,"reasoning_score_w_mean":6.8148148148,"reasoning_score_mean":6.962962963,"math_score_w_mean":8.0434782609,"math_score_mean":7.925,"coding_score_w_mean":5.9,"coding_score_mean":5.9,"extraction_score_w_mean":7.4,"extraction_score_mean":7.4,"stem_score_w_mean":7.25,"stem_score_mean":7.25,"humanities_score_w_mean":8.35,"humanities_score_mean":8.35,"persian_general_knowledge_score_w_mean":4.2962962963,"persian_general_knowledge_score_mean":4.75,"chatbot_rag_score_w_mean":8.8666666667,"chatbot_rag_score_mean":8.875}
|
19 |
+
{"Model Name":"aya-expanse-32b","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","score_w_mean":7.0367346939,"score_mean":7.0858333333,"writing_score_w_mean":8.1481481481,"writing_score_mean":8.1416666667,"roleplay_score_w_mean":6.8214285714,"roleplay_score_mean":6.6083333333,"reasoning_score_w_mean":5.3,"reasoning_score_mean":5.2416666667,"math_score_w_mean":7.0434782609,"math_score_mean":7.0166666667,"coding_score_w_mean":6.0,"coding_score_mean":6.0,"extraction_score_w_mean":7.2,"extraction_score_mean":7.2,"stem_score_w_mean":7.95,"stem_score_mean":7.95,"humanities_score_w_mean":8.7,"humanities_score_mean":8.7,"persian_general_knowledge_score_w_mean":4.1851851852,"persian_general_knowledge_score_mean":4.6333333333,"chatbot_rag_score_w_mean":9.4,"chatbot_rag_score_mean":9.3666666667}
|
20 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","score_w_mean":6.8285714286,"score_mean":6.88,"writing_score_w_mean":8.1481481481,"writing_score_mean":8.1833333333,"roleplay_score_w_mean":6.8571428571,"roleplay_score_mean":6.6083333333,"reasoning_score_w_mean":4.5666666667,"reasoning_score_mean":4.5416666667,"math_score_w_mean":5.4347826087,"math_score_mean":5.2833333333,"coding_score_w_mean":5.9,"coding_score_mean":5.9,"extraction_score_w_mean":7.35,"extraction_score_mean":7.35,"stem_score_w_mean":7.7,"stem_score_mean":7.7,"humanities_score_w_mean":8.6,"humanities_score_mean":8.6,"persian_general_knowledge_score_w_mean":4.7407407407,"persian_general_knowledge_score_mean":5.3,"chatbot_rag_score_w_mean":9.3333333333,"chatbot_rag_score_mean":9.3333333333}
|
21 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","score_w_mean":6.612244898,"score_mean":6.6825,"writing_score_w_mean":8.1481481481,"writing_score_mean":8.075,"roleplay_score_w_mean":7.0357142857,"roleplay_score_mean":6.8666666667,"reasoning_score_w_mean":4.1666666667,"reasoning_score_mean":4.0333333333,"math_score_w_mean":8.3913043478,"math_score_mean":8.4916666667,"coding_score_w_mean":6.65,"coding_score_mean":6.65,"extraction_score_w_mean":6.0,"extraction_score_mean":6.0,"stem_score_w_mean":7.75,"stem_score_mean":7.75,"humanities_score_w_mean":8.4,"humanities_score_mean":8.4,"persian_general_knowledge_score_w_mean":1.2962962963,"persian_general_knowledge_score_mean":1.4,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1583333333}
|
22 |
+
{"Model Name":"Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","score_w_mean":6.5991189427,"score_mean":6.6075268817,"writing_score_w_mean":6.375,"writing_score_mean":6.2037037037,"roleplay_score_w_mean":6.5263157895,"roleplay_score_mean":6.2738095238,"reasoning_score_w_mean":6.2142857143,"reasoning_score_mean":6.1944444444,"math_score_w_mean":8.652173913,"math_score_mean":8.45,"coding_score_w_mean":5.95,"coding_score_mean":5.95,"extraction_score_w_mean":7.15,"extraction_score_mean":7.15,"stem_score_w_mean":7.3,"stem_score_mean":7.3,"humanities_score_w_mean":7.5555555556,"humanities_score_mean":7.5555555556,"persian_general_knowledge_score_w_mean":1.08,"persian_general_knowledge_score_mean":1.1111111111,"chatbot_rag_score_w_mean":9.2333333333,"chatbot_rag_score_mean":9.25}
|
23 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","score_w_mean":6.0816326531,"score_mean":6.0908333333,"writing_score_w_mean":7.8148148148,"writing_score_mean":7.925,"roleplay_score_w_mean":6.6071428571,"roleplay_score_mean":6.275,"reasoning_score_w_mean":3.8666666667,"reasoning_score_mean":3.7416666667,"math_score_w_mean":3.652173913,"math_score_mean":3.475,"coding_score_w_mean":5.2,"coding_score_mean":5.2,"extraction_score_w_mean":5.95,"extraction_score_mean":5.95,"stem_score_w_mean":6.85,"stem_score_mean":6.85,"humanities_score_w_mean":8.45,"humanities_score_mean":8.45,"persian_general_knowledge_score_w_mean":3.2962962963,"persian_general_knowledge_score_mean":3.8583333333,"chatbot_rag_score_w_mean":9.2,"chatbot_rag_score_mean":9.1833333333}
|
24 |
+
{"Model Name":"Qwen3-4B","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","score_w_mean":5.6592920354,"score_mean":5.5994623656,"writing_score_w_mean":5.5416666667,"writing_score_mean":5.1944444444,"roleplay_score_w_mean":5.4230769231,"roleplay_score_mean":4.962962963,"reasoning_score_w_mean":3.6923076923,"reasoning_score_mean":3.9351851852,"math_score_w_mean":7.9565217391,"math_score_mean":7.9416666667,"coding_score_w_mean":6.3888888889,"coding_score_mean":6.3888888889,"extraction_score_w_mean":5.85,"extraction_score_mean":5.85,"stem_score_w_mean":4.85,"stem_score_mean":4.85,"humanities_score_w_mean":5.6111111111,"humanities_score_mean":5.6111111111,"persian_general_knowledge_score_w_mean":1.0476190476,"persian_general_knowledge_score_mean":1.0416666667,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1166666667}
|
25 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","score_w_mean":4.3829787234,"score_mean":4.3333333333,"writing_score_w_mean":5.5185185185,"writing_score_mean":5.3083333333,"roleplay_score_w_mean":5.6428571429,"roleplay_score_mean":5.2916666667,"reasoning_score_w_mean":2.7666666667,"reasoning_score_mean":2.925,"math_score_w_mean":4.2857142857,"math_score_mean":4.4444444444,"coding_score_w_mean":3.35,"coding_score_mean":3.35,"extraction_score_w_mean":2.3333333333,"extraction_score_mean":2.3333333333,"stem_score_w_mean":4.6875,"stem_score_mean":4.6875,"humanities_score_w_mean":7.1111111111,"humanities_score_mean":7.1111111111,"persian_general_knowledge_score_w_mean":1.2222222222,"persian_general_knowledge_score_mean":1.2166666667,"chatbot_rag_score_w_mean":6.8333333333,"chatbot_rag_score_mean":6.825}
|
26 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","score_w_mean":3.9224806202,"score_mean":3.7564102564,"writing_score_w_mean":3.8333333333,"writing_score_mean":3.7261904762,"roleplay_score_w_mean":2.8571428571,"roleplay_score_mean":2.7333333333,"reasoning_score_w_mean":null,"reasoning_score_mean":null,"math_score_w_mean":null,"math_score_mean":null,"coding_score_w_mean":null,"coding_score_mean":null,"extraction_score_w_mean":4.1,"extraction_score_mean":4.1,"stem_score_w_mean":3.8125,"stem_score_mean":3.8125,"humanities_score_w_mean":2.6428571429,"humanities_score_mean":2.6428571429,"persian_general_knowledge_score_w_mean":1.1111111111,"persian_general_knowledge_score_mean":1.15,"chatbot_rag_score_w_mean":7.6,"chatbot_rag_score_mean":7.4583333333}
|
27 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","score_w_mean":3.6885245902,"score_mean":3.3982683983,"writing_score_w_mean":3.8461538462,"writing_score_mean":3.1166666667,"roleplay_score_w_mean":1.9375,"roleplay_score_mean":1.8333333333,"reasoning_score_w_mean":2.2173913043,"reasoning_score_mean":2.125,"math_score_w_mean":2.8260869565,"math_score_mean":2.675,"coding_score_w_mean":4.1666666667,"coding_score_mean":4.1666666667,"extraction_score_w_mean":3.2777777778,"extraction_score_mean":3.2777777778,"stem_score_w_mean":1.6666666667,"stem_score_mean":1.6666666667,"humanities_score_w_mean":2.125,"humanities_score_mean":2.125,"persian_general_knowledge_score_w_mean":1.0,"persian_general_knowledge_score_mean":1.0,"chatbot_rag_score_w_mean":9.2,"chatbot_rag_score_mean":9.1333333333}
|
28 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","score_w_mean":3.1510791367,"score_mean":2.9521604938,"writing_score_w_mean":1.6666666667,"writing_score_mean":1.6666666667,"roleplay_score_w_mean":1.7272727273,"roleplay_score_mean":1.625,"reasoning_score_w_mean":2.6086956522,"reasoning_score_mean":2.6145833333,"math_score_w_mean":1.7333333333,"math_score_mean":1.7857142857,"coding_score_w_mean":2.5,"coding_score_mean":2.5,"extraction_score_w_mean":null,"extraction_score_mean":null,"stem_score_w_mean":null,"stem_score_mean":null,"humanities_score_w_mean":1.0,"humanities_score_mean":1.0,"persian_general_knowledge_score_w_mean":1.0,"persian_general_knowledge_score_mean":1.0,"chatbot_rag_score_w_mean":8.1666666667,"chatbot_rag_score_mean":8.15}
|
29 |
+
{"Model Name":"o4-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":null,"score_mean":null,"writing_score_w_mean":null,"writing_score_mean":null,"roleplay_score_w_mean":null,"roleplay_score_mean":null,"reasoning_score_w_mean":null,"reasoning_score_mean":null,"math_score_w_mean":null,"math_score_mean":null,"coding_score_w_mean":null,"coding_score_mean":null,"extraction_score_w_mean":null,"extraction_score_mean":null,"stem_score_w_mean":null,"stem_score_mean":null,"humanities_score_w_mean":null,"humanities_score_mean":null,"persian_general_knowledge_score_w_mean":null,"persian_general_knowledge_score_mean":null,"chatbot_rag_score_w_mean":null,"chatbot_rag_score_mean":null}
|
leaderboard/boards_data/ner_arman.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.578306047,"ner_arman_precision_mean":0.5583631307,"ner_arman_recall_mean":0.6250099325,"nlu_score":0.7143086066}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","ner_arman_f1_mean":0.3839211973,"ner_arman_precision_mean":0.3292326466,"ner_arman_recall_mean":0.5049662296,"nlu_score":0.6241793507}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0031613599,"ner_arman_precision_mean":0.0024235201,"ner_arman_recall_mean":0.0047675805,"nlu_score":0.6297634971}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.3097820535,"ner_arman_precision_mean":0.2833333333,"ner_arman_recall_mean":0.3710568137,"nlu_score":0.6758278127}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":null,"ner_arman_precision_mean":null,"ner_arman_recall_mean":null,"nlu_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","ner_arman_f1_mean":0.4764396046,"ner_arman_precision_mean":0.4205999205,"ner_arman_recall_mean":0.5997417561,"nlu_score":0.699116864}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","ner_arman_f1_mean":0.5091463761,"ner_arman_precision_mean":0.4719705999,"ner_arman_recall_mean":0.5898887565,"nlu_score":0.6898261633}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","ner_arman_f1_mean":0.492138652,"ner_arman_precision_mean":0.4553833929,"ner_arman_recall_mean":0.5783671037,"nlu_score":0.6460328733}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","ner_arman_f1_mean":0.4408498401,"ner_arman_precision_mean":0.4206197855,"ner_arman_recall_mean":0.487067938,"nlu_score":0.6714091535}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.0134154417,"ner_arman_precision_mean":0.0131505761,"ner_arman_recall_mean":0.0147993643,"nlu_score":0.3749414991}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","ner_arman_f1_mean":0.028185021,"ner_arman_precision_mean":0.0278440732,"ner_arman_recall_mean":0.0304295943,"nlu_score":0.5661558794}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","ner_arman_f1_mean":0.4737820913,"ner_arman_precision_mean":0.4382598331,"ner_arman_recall_mean":0.5517481128,"nlu_score":0.6752949557}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","ner_arman_f1_mean":0.3426542402,"ner_arman_precision_mean":0.3283122387,"ner_arman_recall_mean":0.3950735002,"nlu_score":0.5121418762}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.3619547874}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","ner_arman_f1_mean":0.369949366,"ner_arman_precision_mean":0.3251050003,"ner_arman_recall_mean":0.4785061581,"nlu_score":0.3928685253}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","ner_arman_f1_mean":0.46241695,"ner_arman_precision_mean":0.4338001589,"ner_arman_recall_mean":0.5298768375,"nlu_score":0.6800109206}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.3636093611,"ner_arman_precision_mean":0.3377433453,"ner_arman_recall_mean":0.4240365515,"nlu_score":0.6833497104}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.0374396958,"ner_arman_precision_mean":0.0342669845,"ner_arman_recall_mean":0.0448549861,"nlu_score":0.6459120734}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","ner_arman_f1_mean":null,"ner_arman_precision_mean":null,"ner_arman_recall_mean":null,"nlu_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5838038137,"ner_arman_precision_mean":0.5621374652,"ner_arman_recall_mean":0.6348629321,"nlu_score":0.7050532433}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.531045981}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.4520824626,"ner_arman_precision_mean":0.4047789318,"ner_arman_recall_mean":0.5640246325,"nlu_score":0.6262096694}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","ner_arman_f1_mean":0.1587859697,"ner_arman_precision_mean":0.1553465009,"ner_arman_recall_mean":0.1764799364,"nlu_score":0.5968415875}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","ner_arman_f1_mean":0.1625858448,"ner_arman_precision_mean":0.158174414,"ner_arman_recall_mean":0.1884982122,"nlu_score":0.3916645306}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5492720496,"ner_arman_precision_mean":0.5296185936,"ner_arman_recall_mean":0.5959078268,"nlu_score":0.7146808531}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","ner_arman_f1_mean":0.247080201,"ner_arman_precision_mean":0.2176003178,"ner_arman_recall_mean":0.3168653159,"nlu_score":0.6361186163}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","ner_arman_f1_mean":0.5000495531,"ner_arman_precision_mean":0.4607965832,"ner_arman_recall_mean":0.5927493047,"nlu_score":0.6255818412}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","ner_arman_f1_mean":0.0638846321,"ner_arman_precision_mean":0.0494466201,"ner_arman_recall_mean":0.1084425904,"nlu_score":0.1368924446}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.046805056}
|
leaderboard/boards_data/nli_farstail.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.868286445,"nli_farstail_precision_modified":0.8795611895,"nli_farstail_recall_modified":0.8694171245,"nli_farstail_fscore_modified":0.8680818161,"nli_farstail_acc":0.868286445,"nli_farstail_precision":0.8795611895,"nli_farstail_recall":0.8694171245,"nli_farstail_fscore":0.8680818161,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6361892583,"nli_farstail_precision_modified":0.6743240456,"nli_farstail_recall_modified":0.6374538968,"nli_farstail_fscore_modified":0.621131875,"nli_farstail_acc":0.6370038412,"nli_farstail_precision":0.6751874567,"nli_farstail_recall":0.638270099,"nli_farstail_fscore":0.6219271782,"nli_farstail_valid_output_ratio":0.9987212276,"nlu_score":0.6241793507}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6898976982,"nli_farstail_precision_modified":0.752223151,"nli_farstail_recall_modified":0.691698665,"nli_farstail_fscore_modified":0.6834607357,"nli_farstail_acc":0.6898976982,"nli_farstail_precision":0.752223151,"nli_farstail_recall":0.691698665,"nli_farstail_fscore":0.6834607357,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8069053708,"nli_farstail_precision_modified":0.8274480721,"nli_farstail_recall_modified":0.8078020735,"nli_farstail_fscore_modified":0.8055860349,"nli_farstail_acc":0.8069053708,"nli_farstail_precision":0.8274480721,"nli_farstail_recall":0.8078020735,"nli_farstail_fscore":0.8055860349,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":null,"nli_farstail_precision_modified":null,"nli_farstail_recall_modified":null,"nli_farstail_fscore_modified":null,"nli_farstail_acc":null,"nli_farstail_precision":null,"nli_farstail_recall":null,"nli_farstail_fscore":null,"nli_farstail_valid_output_ratio":null,"nlu_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6534526854,"nli_farstail_precision_modified":0.7279477253,"nli_farstail_recall_modified":0.6559403118,"nli_farstail_fscore_modified":0.6402480245,"nli_farstail_acc":0.6534526854,"nli_farstail_precision":0.7279477253,"nli_farstail_recall":0.6559403118,"nli_farstail_fscore":0.6402480245,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6918158568,"nli_farstail_precision_modified":0.76120773,"nli_farstail_recall_modified":0.696633339,"nli_farstail_fscore_modified":0.6637995215,"nli_farstail_acc":0.6918158568,"nli_farstail_precision":0.76120773,"nli_farstail_recall":0.696633339,"nli_farstail_fscore":0.6637995215,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7007672634,"nli_farstail_precision_modified":0.7596784307,"nli_farstail_recall_modified":0.7039816989,"nli_farstail_fscore_modified":0.6834876952,"nli_farstail_acc":0.7007672634,"nli_farstail_precision":0.7596784307,"nli_farstail_recall":0.7039816989,"nli_farstail_fscore":0.6834876952,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","nli_farstail_acc_modified":0.716112532,"nli_farstail_precision_modified":0.7794942647,"nli_farstail_recall_modified":0.7185458002,"nli_farstail_fscore_modified":0.7094139725,"nli_farstail_acc":0.716112532,"nli_farstail_precision":0.7794942647,"nli_farstail_recall":0.7185458002,"nli_farstail_fscore":0.7094139725,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.0249360614,"nli_farstail_precision_modified":0.0160900081,"nli_farstail_recall_modified":0.0174126172,"nli_farstail_fscore_modified":0.0167242212,"nli_farstail_acc":0.9069767442,"nli_farstail_precision":0.5852272727,"nli_farstail_recall":0.6333333333,"nli_farstail_fscore":0.6082949309,"nli_farstail_valid_output_ratio":0.0274936061,"nlu_score":0.3749414991}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7461636829,"nli_farstail_precision_modified":0.8279044878,"nli_farstail_recall_modified":0.7431719278,"nli_farstail_fscore_modified":0.7484099134,"nli_farstail_acc":0.7461636829,"nli_farstail_precision":0.8279044878,"nli_farstail_recall":0.7431719278,"nli_farstail_fscore":0.7484099134,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.5661558794}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7405509289,"nli_farstail_precision_modified":0.79378989,"nli_farstail_recall_modified":0.7441180803,"nli_farstail_fscore_modified":0.7266455427,"nli_farstail_acc":0.7405509289,"nli_farstail_precision":0.79378989,"nli_farstail_recall":0.7441180803,"nli_farstail_fscore":0.7266455427,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","nli_farstail_acc_modified":0.1726342711,"nli_farstail_precision_modified":0.1856398147,"nli_farstail_recall_modified":0.156398243,"nli_farstail_fscore_modified":0.1549950666,"nli_farstail_acc":0.7277628032,"nli_farstail_precision":0.7825894076,"nli_farstail_recall":0.6593176606,"nli_farstail_fscore":0.6534023831,"nli_farstail_valid_output_ratio":0.2372122762,"nlu_score":0.5121418762}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.3433503836,"nli_farstail_precision_modified":0.5618320225,"nli_farstail_recall_modified":0.3440157631,"nli_farstail_fscore_modified":0.279029917,"nli_farstail_acc":0.3435700576,"nli_farstail_precision":0.56219148,"nli_farstail_recall":0.3442358627,"nli_farstail_fscore":0.279208439,"nli_farstail_valid_output_ratio":0.9993606138,"nlu_score":0.3619547874}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","nli_farstail_acc_modified":0.378516624,"nli_farstail_precision_modified":0.4433198503,"nli_farstail_recall_modified":0.3422920715,"nli_farstail_fscore_modified":0.347492956,"nli_farstail_acc":0.6932084309,"nli_farstail_precision":0.8118878757,"nli_farstail_recall":0.626867447,"nli_farstail_fscore":0.636392252,"nli_farstail_valid_output_ratio":0.5460358056,"nlu_score":0.3928685253}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","nli_farstail_acc_modified":0.726342711,"nli_farstail_precision_modified":0.8062451443,"nli_farstail_recall_modified":0.7314466615,"nli_farstail_fscore_modified":0.6980605986,"nli_farstail_acc":0.726342711,"nli_farstail_precision":0.8062451443,"nli_farstail_recall":0.7314466615,"nli_farstail_fscore":0.6980605986,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7410485934,"nli_farstail_precision_modified":0.7633275849,"nli_farstail_recall_modified":0.7423464162,"nli_farstail_fscore_modified":0.7375659033,"nli_farstail_acc":0.7410485934,"nli_farstail_precision":0.7633275849,"nli_farstail_recall":0.7423464162,"nli_farstail_fscore":0.7375659033,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7647058824,"nli_farstail_precision_modified":0.7814499507,"nli_farstail_recall_modified":0.7670439826,"nli_farstail_fscore_modified":0.7573199649,"nli_farstail_acc":0.7656850192,"nli_farstail_precision":0.7824505269,"nli_farstail_recall":0.7680261132,"nli_farstail_fscore":0.7582896447,"nli_farstail_valid_output_ratio":0.9987212276,"nlu_score":0.6459120734}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","nli_farstail_acc_modified":null,"nli_farstail_precision_modified":null,"nli_farstail_recall_modified":null,"nli_farstail_fscore_modified":null,"nli_farstail_acc":null,"nli_farstail_precision":null,"nli_farstail_recall":null,"nli_farstail_fscore":null,"nli_farstail_valid_output_ratio":null,"nlu_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.6943734015,"nli_farstail_precision_modified":0.7566862174,"nli_farstail_recall_modified":0.698049667,"nli_farstail_fscore_modified":0.679445114,"nli_farstail_acc":0.6943734015,"nli_farstail_precision":0.7566862174,"nli_farstail_recall":0.698049667,"nli_farstail_fscore":0.679445114,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.537084399,"nli_farstail_precision_modified":0.7429511025,"nli_farstail_recall_modified":0.5428343437,"nli_farstail_fscore_modified":0.4522202373,"nli_farstail_acc":0.537084399,"nli_farstail_precision":0.7429511025,"nli_farstail_recall":0.5428343437,"nli_farstail_fscore":0.4522202373,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.531045981}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7001278772,"nli_farstail_precision_modified":0.7089877668,"nli_farstail_recall_modified":0.701635311,"nli_farstail_fscore_modified":0.6963810855,"nli_farstail_acc":0.7001278772,"nli_farstail_precision":0.7089877668,"nli_farstail_recall":0.701635311,"nli_farstail_fscore":0.6963810855,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6592071611,"nli_farstail_precision_modified":0.7292371837,"nli_farstail_recall_modified":0.6555663858,"nli_farstail_fscore_modified":0.6172863539,"nli_farstail_acc":0.6621708414,"nli_farstail_precision":0.7325157067,"nli_farstail_recall":0.6585136977,"nli_farstail_fscore":0.6200615655,"nli_farstail_valid_output_ratio":0.9955242967,"nlu_score":0.5968415875}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","nli_farstail_acc_modified":0.5421994885,"nli_farstail_precision_modified":0.5647711826,"nli_farstail_recall_modified":0.5444660793,"nli_farstail_fscore_modified":0.5273172992,"nli_farstail_acc":0.5449871465,"nli_farstail_precision":0.5676748904,"nli_farstail_recall":0.5472653908,"nli_farstail_fscore":0.5300284421,"nli_farstail_valid_output_ratio":0.9948849105,"nlu_score":0.3916645306}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8254475703,"nli_farstail_precision_modified":0.8384109819,"nli_farstail_recall_modified":0.8264814456,"nli_farstail_fscore_modified":0.8238714462,"nli_farstail_acc":0.8254475703,"nli_farstail_precision":0.8384109819,"nli_farstail_recall":0.8264814456,"nli_farstail_fscore":0.8238714462,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6854219949,"nli_farstail_precision_modified":0.7452254514,"nli_farstail_recall_modified":0.6884495258,"nli_farstail_fscore_modified":0.6690112082,"nli_farstail_acc":0.6858605246,"nli_farstail_precision":0.7457022432,"nli_farstail_recall":0.6888899926,"nli_farstail_fscore":0.6694392384,"nli_farstail_valid_output_ratio":0.9993606138,"nlu_score":0.6361186163}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6086956522,"nli_farstail_precision_modified":0.6940003558,"nli_farstail_recall_modified":0.6092669096,"nli_farstail_fscore_modified":0.5908473619,"nli_farstail_acc":0.6110397946,"nli_farstail_precision":0.6966730144,"nli_farstail_recall":0.611613252,"nli_farstail_fscore":0.593122769,"nli_farstail_valid_output_ratio":0.9961636829,"nlu_score":0.6255818412}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","nli_farstail_acc_modified":0.0,"nli_farstail_precision_modified":0.0,"nli_farstail_recall_modified":0.0,"nli_farstail_fscore_modified":0.0,"nli_farstail_acc":0.0,"nli_farstail_precision":0.0,"nli_farstail_recall":0.0,"nli_farstail_fscore":0.0,"nli_farstail_valid_output_ratio":0.0,"nlu_score":0.1368924446}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","nli_farstail_acc_modified":0.0,"nli_farstail_precision_modified":0.0,"nli_farstail_recall_modified":0.0,"nli_farstail_fscore_modified":0.0,"nli_farstail_acc":0.0,"nli_farstail_precision":0.0,"nli_farstail_recall":0.0,"nli_farstail_fscore":0.0,"nli_farstail_valid_output_ratio":0.0,"nlu_score":0.046805056}
|
leaderboard/boards_data/paraphrase-detection_FarsiParaphraseDetection.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_acc":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8697318008,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9057190558,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8481376599,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8593214965,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8697318008,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9057190558,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8481376599,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8593214965,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6241793507}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8595146871,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9014336918,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8358208955,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8471137461,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8595146871,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9014336918,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8358208955,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8471137461,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":null,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":null,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":null,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":null,"paraphrase-detection_FarsiParaphraseDetection_acc":null,"paraphrase-detection_FarsiParaphraseDetection_precision":null,"paraphrase-detection_FarsiParaphraseDetection_recall":null,"paraphrase-detection_FarsiParaphraseDetection_fscore":null,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":null,"nlu_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8812260536,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9126075915,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8615704957,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8725766572,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8812260536,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9126075915,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8615704957,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8725766572,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9029374202,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9262452107,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8869436301,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.897066353,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9029374202,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9262452107,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8869436301,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.897066353,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8390804598,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8902439024,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8119402985,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8225473409,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8390804598,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8902439024,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8119402985,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8225473409,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8952745849,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9226415094,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8776119403,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8883498185,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8952745849,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9226415094,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8776119403,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8883498185,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8556832695,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8960759725,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8314059789,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8433777185,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8634020619,"paraphrase-detection_FarsiParaphraseDetection_precision":0.904159132,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8389057751,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8509855072,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9910600255,"nlu_score":0.3749414991}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.680715198,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8209169054,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.6268656716,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.5933059088,"paraphrase-detection_FarsiParaphraseDetection_acc":0.680715198,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8209169054,"paraphrase-detection_FarsiParaphraseDetection_recall":0.6268656716,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.5933059088,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.5661558794}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.846743295,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8943661972,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8208955224,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8318539728,"paraphrase-detection_FarsiParaphraseDetection_acc":0.846743295,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8943661972,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8208955224,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8318539728,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.5823754789,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.6860358387,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.5643977685,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.5413041169,"paraphrase-detection_FarsiParaphraseDetection_acc":0.6940639269,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8176043557,"paraphrase-detection_FarsiParaphraseDetection_recall":0.6726384365,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.6451158653,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.8390804598,"nlu_score":0.5121418762}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.5095785441,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.6043173519,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.5590051972,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.4734880854,"paraphrase-detection_FarsiParaphraseDetection_acc":0.5095785441,"paraphrase-detection_FarsiParaphraseDetection_precision":0.6043173519,"paraphrase-detection_FarsiParaphraseDetection_recall":0.5590051972,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.4734880854,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.3619547874}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.908045977,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9307692308,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8925373134,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9026092485,"paraphrase-detection_FarsiParaphraseDetection_acc":0.908045977,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9307692308,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8925373134,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9026092485,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.3928685253}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_acc":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8326947637,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8868739206,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8044776119,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.81470067,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8326947637,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8868739206,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8044776119,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.81470067,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8607918263,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9021543986,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8373134328,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8486231942,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8607918263,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9021543986,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8373134328,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8486231942,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":null,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":null,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":null,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":null,"paraphrase-detection_FarsiParaphraseDetection_acc":null,"paraphrase-detection_FarsiParaphraseDetection_precision":null,"paraphrase-detection_FarsiParaphraseDetection_recall":null,"paraphrase-detection_FarsiParaphraseDetection_fscore":null,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":null,"nlu_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8888888889,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9186915888,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8701492537,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8811336459,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8888888889,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9186915888,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8701492537,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8811336459,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9169859515,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9233165065,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.9095332885,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9144938271,"paraphrase-detection_FarsiParaphraseDetection_acc":0.925257732,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9316453926,"paraphrase-detection_FarsiParaphraseDetection_recall":0.9177378414,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9227431271,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9910600255,"nlu_score":0.531045981}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8045977011,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8727121464,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.771641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.779104351,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8045977011,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8727121464,"paraphrase-detection_FarsiParaphraseDetection_recall":0.771641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.779104351,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.7982120051,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8696369637,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7641791045,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7707505633,"paraphrase-detection_FarsiParaphraseDetection_acc":0.7982120051,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8696369637,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7641791045,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7707505633,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8722860792,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9057423702,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8514992004,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8624887603,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8722860792,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9057423702,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8514992004,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8624887603,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.3916645306}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8799489144,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9132841328,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8597014925,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8709280303,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8799489144,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9132841328,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8597014925,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8709280303,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9131545338,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9329661667,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9083655739,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9131545338,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9329661667,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9083655739,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6361186163}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.7777777778,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8343364681,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7575227312,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7618590799,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8152610442,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8745454545,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7940298507,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7985751802,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9540229885,"nlu_score":0.6255818412}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.5210727969,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.2605363985,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.4559386973,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.3315917799,"paraphrase-detection_FarsiParaphraseDetection_acc":0.5714285714,"paraphrase-detection_FarsiParaphraseDetection_precision":0.2857142857,"paraphrase-detection_FarsiParaphraseDetection_recall":0.5,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.3636363636,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9118773946,"nlu_score":0.1368924446}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.091954023,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.0459770115,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.0791826309,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.0581749941,"paraphrase-detection_FarsiParaphraseDetection_acc":0.5806451613,"paraphrase-detection_FarsiParaphraseDetection_precision":0.2903225806,"paraphrase-detection_FarsiParaphraseDetection_recall":0.5,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.3673469388,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.1583652618,"nlu_score":0.046805056}
|
leaderboard/boards_data/paraphrase-detection_parsinlu.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.844,"paraphrase-detection_parsinlu_precision_modified":0.8671683358,"paraphrase-detection_parsinlu_recall_modified":0.8248878009,"paraphrase-detection_parsinlu_fscore_modified":0.8334216056,"paraphrase-detection_parsinlu_acc":0.844,"paraphrase-detection_parsinlu_precision":0.8671683358,"paraphrase-detection_parsinlu_recall":0.8248878009,"paraphrase-detection_parsinlu_fscore":0.8334216056,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.838,"paraphrase-detection_parsinlu_precision_modified":0.8416530278,"paraphrase-detection_parsinlu_recall_modified":0.8270501836,"paraphrase-detection_parsinlu_fscore_modified":0.8316645261,"paraphrase-detection_parsinlu_acc":0.838,"paraphrase-detection_parsinlu_precision":0.8416530278,"paraphrase-detection_parsinlu_recall":0.8270501836,"paraphrase-detection_parsinlu_fscore":0.8316645261,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6241793507}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.872,"paraphrase-detection_parsinlu_precision_modified":0.872593554,"paraphrase-detection_parsinlu_recall_modified":0.8654426765,"paraphrase-detection_parsinlu_fscore_modified":0.8682824025,"paraphrase-detection_parsinlu_acc":0.872,"paraphrase-detection_parsinlu_precision":0.872593554,"paraphrase-detection_parsinlu_recall":0.8654426765,"paraphrase-detection_parsinlu_fscore":0.8682824025,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.826,"paraphrase-detection_parsinlu_precision_modified":0.861267166,"paraphrase-detection_parsinlu_recall_modified":0.802243982,"paraphrase-detection_parsinlu_fscore_modified":0.8109673691,"paraphrase-detection_parsinlu_acc":0.826,"paraphrase-detection_parsinlu_precision":0.861267166,"paraphrase-detection_parsinlu_recall":0.802243982,"paraphrase-detection_parsinlu_fscore":0.8109673691,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":null,"paraphrase-detection_parsinlu_precision_modified":null,"paraphrase-detection_parsinlu_recall_modified":null,"paraphrase-detection_parsinlu_fscore_modified":null,"paraphrase-detection_parsinlu_acc":null,"paraphrase-detection_parsinlu_precision":null,"paraphrase-detection_parsinlu_recall":null,"paraphrase-detection_parsinlu_fscore":null,"paraphrase-detection_parsinlu_valid_output_ratio":null,"nlu_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.878,"paraphrase-detection_parsinlu_precision_modified":0.8823390152,"paraphrase-detection_parsinlu_recall_modified":0.8689922481,"paraphrase-detection_parsinlu_fscore_modified":0.8736280355,"paraphrase-detection_parsinlu_acc":0.878,"paraphrase-detection_parsinlu_precision":0.8823390152,"paraphrase-detection_parsinlu_recall":0.8689922481,"paraphrase-detection_parsinlu_fscore":0.8736280355,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.876,"paraphrase-detection_parsinlu_precision_modified":0.8806824921,"paraphrase-detection_parsinlu_recall_modified":0.8666666667,"paraphrase-detection_parsinlu_fscore_modified":0.871456768,"paraphrase-detection_parsinlu_acc":0.876,"paraphrase-detection_parsinlu_precision":0.8806824921,"paraphrase-detection_parsinlu_recall":0.8666666667,"paraphrase-detection_parsinlu_fscore":0.871456768,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.854,"paraphrase-detection_parsinlu_precision_modified":0.8742015099,"paraphrase-detection_parsinlu_recall_modified":0.8365157079,"paraphrase-detection_parsinlu_fscore_modified":0.8449177639,"paraphrase-detection_parsinlu_acc":0.854,"paraphrase-detection_parsinlu_precision":0.8742015099,"paraphrase-detection_parsinlu_recall":0.8365157079,"paraphrase-detection_parsinlu_fscore":0.8449177639,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.876,"paraphrase-detection_parsinlu_precision_modified":0.8735210118,"paraphrase-detection_parsinlu_recall_modified":0.8735210118,"paraphrase-detection_parsinlu_fscore_modified":0.8735210118,"paraphrase-detection_parsinlu_acc":0.876,"paraphrase-detection_parsinlu_precision":0.8735210118,"paraphrase-detection_parsinlu_recall":0.8735210118,"paraphrase-detection_parsinlu_fscore":0.8735210118,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.744,"paraphrase-detection_parsinlu_precision_modified":0.7517357255,"paraphrase-detection_parsinlu_recall_modified":0.7357334824,"paraphrase-detection_parsinlu_fscore_modified":0.7395,"paraphrase-detection_parsinlu_acc":0.8416289593,"paraphrase-detection_parsinlu_precision":0.85037978,"paraphrase-detection_parsinlu_recall":0.832277695,"paraphrase-detection_parsinlu_fscore":0.8365384615,"paraphrase-detection_parsinlu_valid_output_ratio":0.884,"nlu_score":0.3749414991}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.66,"paraphrase-detection_parsinlu_precision_modified":0.7933390651,"paraphrase-detection_parsinlu_recall_modified":0.6057935537,"paraphrase-detection_parsinlu_fscore_modified":0.5625411726,"paraphrase-detection_parsinlu_acc":0.66,"paraphrase-detection_parsinlu_precision":0.7933390651,"paraphrase-detection_parsinlu_recall":0.6057935537,"paraphrase-detection_parsinlu_fscore":0.5625411726,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.5661558794}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.764,"paraphrase-detection_parsinlu_precision_modified":0.8406006212,"paraphrase-detection_parsinlu_recall_modified":0.7272949816,"paraphrase-detection_parsinlu_fscore_modified":0.7279551449,"paraphrase-detection_parsinlu_acc":0.764,"paraphrase-detection_parsinlu_precision":0.8406006212,"paraphrase-detection_parsinlu_recall":0.7272949816,"paraphrase-detection_parsinlu_fscore":0.7279551449,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.672,"paraphrase-detection_parsinlu_precision_modified":0.7868195779,"paraphrase-detection_parsinlu_recall_modified":0.6265486372,"paraphrase-detection_parsinlu_fscore_modified":0.5973050157,"paraphrase-detection_parsinlu_acc":0.6801619433,"paraphrase-detection_parsinlu_precision":0.796376091,"paraphrase-detection_parsinlu_recall":0.6341585396,"paraphrase-detection_parsinlu_fscore":0.6045597325,"paraphrase-detection_parsinlu_valid_output_ratio":0.988,"nlu_score":0.5121418762}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.576,"paraphrase-detection_parsinlu_precision_modified":0.6352777945,"paraphrase-detection_parsinlu_recall_modified":0.6097919217,"paraphrase-detection_parsinlu_fscore_modified":0.5654241624,"paraphrase-detection_parsinlu_acc":0.576,"paraphrase-detection_parsinlu_precision":0.6352777945,"paraphrase-detection_parsinlu_recall":0.6097919217,"paraphrase-detection_parsinlu_fscore":0.5654241624,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.3619547874}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.864,"paraphrase-detection_parsinlu_precision_modified":0.8615596015,"paraphrase-detection_parsinlu_recall_modified":0.8607099143,"paraphrase-detection_parsinlu_fscore_modified":0.8611201882,"paraphrase-detection_parsinlu_acc":0.864,"paraphrase-detection_parsinlu_precision":0.8615596015,"paraphrase-detection_parsinlu_recall":0.8607099143,"paraphrase-detection_parsinlu_fscore":0.8611201882,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.3928685253}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.828,"paraphrase-detection_parsinlu_precision_modified":0.8504577445,"paraphrase-detection_parsinlu_recall_modified":0.807996736,"paraphrase-detection_parsinlu_fscore_modified":0.8159372646,"paraphrase-detection_parsinlu_acc":0.828,"paraphrase-detection_parsinlu_precision":0.8504577445,"paraphrase-detection_parsinlu_recall":0.807996736,"paraphrase-detection_parsinlu_fscore":0.8159372646,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.804,"paraphrase-detection_parsinlu_precision_modified":0.8556500813,"paraphrase-detection_parsinlu_recall_modified":0.7749490004,"paraphrase-detection_parsinlu_fscore_modified":0.7822570611,"paraphrase-detection_parsinlu_acc":0.804,"paraphrase-detection_parsinlu_precision":0.8556500813,"paraphrase-detection_parsinlu_recall":0.7749490004,"paraphrase-detection_parsinlu_fscore":0.7822570611,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.838,"paraphrase-detection_parsinlu_precision_modified":0.8576776974,"paraphrase-detection_parsinlu_recall_modified":0.819624643,"paraphrase-detection_parsinlu_fscore_modified":0.8275649186,"paraphrase-detection_parsinlu_acc":0.838,"paraphrase-detection_parsinlu_precision":0.8576776974,"paraphrase-detection_parsinlu_recall":0.819624643,"paraphrase-detection_parsinlu_fscore":0.8275649186,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":null,"paraphrase-detection_parsinlu_precision_modified":null,"paraphrase-detection_parsinlu_recall_modified":null,"paraphrase-detection_parsinlu_fscore_modified":null,"paraphrase-detection_parsinlu_acc":null,"paraphrase-detection_parsinlu_precision":null,"paraphrase-detection_parsinlu_recall":null,"paraphrase-detection_parsinlu_fscore":null,"paraphrase-detection_parsinlu_valid_output_ratio":null,"nlu_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.868,"paraphrase-detection_parsinlu_precision_modified":0.8786613063,"paraphrase-detection_parsinlu_recall_modified":0.8550795594,"paraphrase-detection_parsinlu_fscore_modified":0.8617882093,"paraphrase-detection_parsinlu_acc":0.868,"paraphrase-detection_parsinlu_precision":0.8786613063,"paraphrase-detection_parsinlu_recall":0.8550795594,"paraphrase-detection_parsinlu_fscore":0.8617882093,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.778,"paraphrase-detection_parsinlu_precision_modified":0.8054432653,"paraphrase-detection_parsinlu_recall_modified":0.7971712985,"paraphrase-detection_parsinlu_fscore_modified":0.7776855183,"paraphrase-detection_parsinlu_acc":0.7842741935,"paraphrase-detection_parsinlu_precision":0.8119387755,"paraphrase-detection_parsinlu_recall":0.8036000993,"paraphrase-detection_parsinlu_fscore":0.7839571757,"paraphrase-detection_parsinlu_valid_output_ratio":0.992,"nlu_score":0.531045981}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.792,"paraphrase-detection_parsinlu_precision_modified":0.8426666667,"paraphrase-detection_parsinlu_recall_modified":0.7621379029,"paraphrase-detection_parsinlu_fscore_modified":0.7682709447,"paraphrase-detection_parsinlu_acc":0.792,"paraphrase-detection_parsinlu_precision":0.8426666667,"paraphrase-detection_parsinlu_recall":0.7621379029,"paraphrase-detection_parsinlu_fscore":0.7682709447,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.798,"paraphrase-detection_parsinlu_precision_modified":0.8383696273,"paraphrase-detection_parsinlu_recall_modified":0.7708282334,"paraphrase-detection_parsinlu_fscore_modified":0.7777278949,"paraphrase-detection_parsinlu_acc":0.798,"paraphrase-detection_parsinlu_precision":0.8383696273,"paraphrase-detection_parsinlu_recall":0.7708282334,"paraphrase-detection_parsinlu_fscore":0.7777278949,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.634,"paraphrase-detection_parsinlu_precision_modified":0.7237713267,"paraphrase-detection_parsinlu_recall_modified":0.6720930233,"paraphrase-detection_parsinlu_fscore_modified":0.6220242152,"paraphrase-detection_parsinlu_acc":0.634,"paraphrase-detection_parsinlu_precision":0.7237713267,"paraphrase-detection_parsinlu_recall":0.6720930233,"paraphrase-detection_parsinlu_fscore":0.6220242152,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.3916645306}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.848,"paraphrase-detection_parsinlu_precision_modified":0.8717792656,"paraphrase-detection_parsinlu_recall_modified":0.8289677683,"paraphrase-detection_parsinlu_fscore_modified":0.8376928465,"paraphrase-detection_parsinlu_acc":0.848,"paraphrase-detection_parsinlu_precision":0.8717792656,"paraphrase-detection_parsinlu_recall":0.8289677683,"paraphrase-detection_parsinlu_fscore":0.8376928465,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.826,"paraphrase-detection_parsinlu_precision_modified":0.8267131595,"paraphrase-detection_parsinlu_recall_modified":0.8165238678,"paraphrase-detection_parsinlu_fscore_modified":0.8200389709,"paraphrase-detection_parsinlu_acc":0.826,"paraphrase-detection_parsinlu_precision":0.8267131595,"paraphrase-detection_parsinlu_recall":0.8165238678,"paraphrase-detection_parsinlu_fscore":0.8200389709,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6361186163}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.824,"paraphrase-detection_parsinlu_precision_modified":0.8599831541,"paraphrase-detection_parsinlu_recall_modified":0.7999184007,"paraphrase-detection_parsinlu_fscore_modified":0.8085591465,"paraphrase-detection_parsinlu_acc":0.824,"paraphrase-detection_parsinlu_precision":0.8599831541,"paraphrase-detection_parsinlu_recall":0.7999184007,"paraphrase-detection_parsinlu_fscore":0.8085591465,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6255818412}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.57,"paraphrase-detection_parsinlu_precision_modified":0.285,"paraphrase-detection_parsinlu_recall_modified":0.5,"paraphrase-detection_parsinlu_fscore_modified":0.3630573248,"paraphrase-detection_parsinlu_acc":0.57,"paraphrase-detection_parsinlu_precision":0.285,"paraphrase-detection_parsinlu_recall":0.5,"paraphrase-detection_parsinlu_fscore":0.3630573248,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.1368924446}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.072,"paraphrase-detection_parsinlu_precision_modified":0.036,"paraphrase-detection_parsinlu_recall_modified":0.066,"paraphrase-detection_parsinlu_fscore_modified":0.0465882353,"paraphrase-detection_parsinlu_acc":0.5454545455,"paraphrase-detection_parsinlu_precision":0.2727272727,"paraphrase-detection_parsinlu_recall":0.5,"paraphrase-detection_parsinlu_fscore":0.3529411765,"paraphrase-detection_parsinlu_valid_output_ratio":0.132,"nlu_score":0.046805056}
|
leaderboard/boards_data/persian_csr.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.9117647059,"acc_strict":0.9117647059,"donyaeeqtesad_acc":0.918429003,"isna_acc":0.90625,"ninisite_article_acc":0.9027027027,"virgool_4_acc":0.9244712991,"khabaronline_acc":0.936,"digiato_acc":0.9184100418,"doctoreto_acc":0.89,"sarzamindownload_acc":0.8758169935,"hamgardi_acc":0.8702064897,"bigbangpage_acc":0.9363057325,"wiki_ahlolbait_acc":0.9671052632,"virgool_3_acc":0.9223880597,"virgool_2_acc":0.9235474006,"virgool_1_acc":0.914556962,"hamshahrionline_acc":0.9440789474,"tabnak_acc":0.9253112033,"alibaba_acc":0.9117647059,"digikala_mag_acc":0.9198396794,"yjc_acc":0.908045977,"beytoote_acc":0.9168975069,"asriran_acc":0.8888888889,"ecoiran_acc":0.8825396825,"hawzah_acc":0.9382022472,"zoomit_acc":0.9341176471,"wikipedia_acc":0.9476190476,"namnak_acc":0.8882833787,"khodro45_acc":0.9117647059,"fidibo_acc":0.9162995595,"newmiind_acc":0.8257839721,"taaghche_acc":0.9358974359,"motamem_acc":0.9684210526,"varzesh3_acc":0.9397993311,"mehrnews_acc":0.9233870968,"tasnim_acc":0.9115384615,"magerta_acc":0.8571428571,"radiokodak_book_acc":0.8695652174,"vipofilm_acc":1.0,"wikishia_acc":1.0,"voolak_acc":0.8604651163,"farsroid_acc":0.7894736842,"parsiday_acc":0.8333333333,"soft98_acc":1.0,"ninisite_discussion_acc":0.8}
|
2 |
+
{"Model Name":"gpt-4.1","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8839,"acc_strict":0.8839,"donyaeeqtesad_acc":0.8610271903,"isna_acc":0.8359375,"ninisite_article_acc":0.8702702703,"virgool_4_acc":0.9033232628,"khabaronline_acc":0.884,"digiato_acc":0.8705636743,"doctoreto_acc":0.885,"sarzamindownload_acc":0.8562091503,"hamgardi_acc":0.8377581121,"bigbangpage_acc":0.923566879,"wiki_ahlolbait_acc":0.9276315789,"virgool_3_acc":0.9104477612,"virgool_2_acc":0.8837920489,"virgool_1_acc":0.8607594937,"hamshahrionline_acc":0.9114754098,"tabnak_acc":0.887966805,"alibaba_acc":0.9117647059,"digikala_mag_acc":0.9118236473,"yjc_acc":0.867816092,"beytoote_acc":0.91966759,"asriran_acc":0.8792270531,"ecoiran_acc":0.8634920635,"hawzah_acc":0.9269662921,"zoomit_acc":0.9154929577,"wikipedia_acc":0.9428571429,"namnak_acc":0.8583106267,"khodro45_acc":0.8602941176,"fidibo_acc":0.9074889868,"newmiind_acc":0.8020833333,"taaghche_acc":0.8974358974,"motamem_acc":0.9263157895,"varzesh3_acc":0.9096989967,"mehrnews_acc":0.8508064516,"tasnim_acc":0.8307692308,"magerta_acc":0.8403361345,"radiokodak_book_acc":0.8695652174,"vipofilm_acc":1.0,"wikishia_acc":1.0,"voolak_acc":0.8837209302,"farsroid_acc":0.8421052632,"parsiday_acc":0.8833333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.9}
|
3 |
+
{"Model Name":"gpt-4o","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8665,"acc_strict":0.7832,"donyaeeqtesad_acc":0.8459214502,"isna_acc":0.8359375,"ninisite_article_acc":0.8594594595,"virgool_4_acc":0.8882175227,"khabaronline_acc":0.896,"digiato_acc":0.8496868476,"doctoreto_acc":0.855,"sarzamindownload_acc":0.8235294118,"hamgardi_acc":0.8200589971,"bigbangpage_acc":0.9171974522,"wiki_ahlolbait_acc":0.9342105263,"virgool_3_acc":0.8656716418,"virgool_2_acc":0.8837920489,"virgool_1_acc":0.8892405063,"hamshahrionline_acc":0.9409836066,"tabnak_acc":0.887966805,"alibaba_acc":0.8529411765,"digikala_mag_acc":0.8817635271,"yjc_acc":0.8103448276,"beytoote_acc":0.8891966759,"asriran_acc":0.8550724638,"ecoiran_acc":0.8126984127,"hawzah_acc":0.9129213483,"zoomit_acc":0.9084507042,"wikipedia_acc":0.9285714286,"namnak_acc":0.8446866485,"khodro45_acc":0.8455882353,"fidibo_acc":0.872246696,"newmiind_acc":0.8125,"taaghche_acc":0.9038461538,"motamem_acc":0.9157894737,"varzesh3_acc":0.8996655518,"mehrnews_acc":0.814516129,"tasnim_acc":0.8461538462,"magerta_acc":0.781512605,"radiokodak_book_acc":0.7826086957,"vipofilm_acc":0.9230769231,"wikishia_acc":0.9696969697,"voolak_acc":0.7674418605,"farsroid_acc":0.7105263158,"parsiday_acc":0.85,"soft98_acc":0.8,"ninisite_discussion_acc":0.9}
|
4 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8637863786,"acc_strict":0.8637863786,"donyaeeqtesad_acc":0.8398791541,"isna_acc":0.828125,"ninisite_article_acc":0.8243243243,"virgool_4_acc":0.8851963746,"khabaronline_acc":0.84,"digiato_acc":0.8747390397,"doctoreto_acc":0.895,"sarzamindownload_acc":0.8366013072,"hamgardi_acc":0.802359882,"bigbangpage_acc":0.898089172,"wiki_ahlolbait_acc":0.9407894737,"virgool_3_acc":0.8955223881,"virgool_2_acc":0.8899082569,"virgool_1_acc":0.8892405063,"hamshahrionline_acc":0.8754098361,"tabnak_acc":0.8755186722,"alibaba_acc":0.8426229508,"digikala_mag_acc":0.877755511,"yjc_acc":0.816091954,"beytoote_acc":0.8836565097,"asriran_acc":0.8888888889,"ecoiran_acc":0.8126984127,"hawzah_acc":0.8904494382,"zoomit_acc":0.9084507042,"wikipedia_acc":0.919047619,"namnak_acc":0.8392370572,"khodro45_acc":0.8823529412,"fidibo_acc":0.9030837004,"newmiind_acc":0.8125,"taaghche_acc":0.9423076923,"motamem_acc":0.9157894737,"varzesh3_acc":0.8929765886,"mehrnews_acc":0.8427419355,"tasnim_acc":0.8153846154,"magerta_acc":0.7773109244,"radiokodak_book_acc":0.6956521739,"vipofilm_acc":1.0,"wikishia_acc":0.9696969697,"voolak_acc":0.7441860465,"farsroid_acc":0.7894736842,"parsiday_acc":0.8166666667,"soft98_acc":1.0,"ninisite_discussion_acc":0.9}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8551,"acc_strict":0.8551,"donyaeeqtesad_acc":0.8429003021,"isna_acc":0.828125,"ninisite_article_acc":0.8540540541,"virgool_4_acc":0.8610271903,"khabaronline_acc":0.84,"digiato_acc":0.8830897704,"doctoreto_acc":0.87,"sarzamindownload_acc":0.8758169935,"hamgardi_acc":0.796460177,"bigbangpage_acc":0.8853503185,"wiki_ahlolbait_acc":0.9013157895,"virgool_3_acc":0.871641791,"virgool_2_acc":0.9051987768,"virgool_1_acc":0.8481012658,"hamshahrionline_acc":0.8786885246,"tabnak_acc":0.8713692946,"alibaba_acc":0.8758169935,"digikala_mag_acc":0.879759519,"yjc_acc":0.7988505747,"beytoote_acc":0.8753462604,"asriran_acc":0.8260869565,"ecoiran_acc":0.8031746032,"hawzah_acc":0.8511235955,"zoomit_acc":0.8849765258,"wikipedia_acc":0.9285714286,"namnak_acc":0.8310626703,"khodro45_acc":0.8897058824,"fidibo_acc":0.872246696,"newmiind_acc":0.7881944444,"taaghche_acc":0.8974358974,"motamem_acc":0.9157894737,"varzesh3_acc":0.8762541806,"mehrnews_acc":0.8346774194,"tasnim_acc":0.8269230769,"magerta_acc":0.7941176471,"radiokodak_book_acc":0.6956521739,"vipofilm_acc":0.7692307692,"wikishia_acc":0.9393939394,"voolak_acc":0.8372093023,"farsroid_acc":0.8157894737,"parsiday_acc":0.7583333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.3}
|
6 |
+
{"Model Name":"deepseek-reasoner","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","acc":0.825165033,"acc_strict":0.825165033,"donyaeeqtesad_acc":0.8247734139,"isna_acc":0.8046875,"ninisite_article_acc":0.8594594595,"virgool_4_acc":0.8580060423,"khabaronline_acc":0.78,"digiato_acc":0.8267223382,"doctoreto_acc":0.835,"sarzamindownload_acc":0.7908496732,"hamgardi_acc":0.808259587,"bigbangpage_acc":0.9044585987,"wiki_ahlolbait_acc":0.9006622517,"virgool_3_acc":0.8268656716,"virgool_2_acc":0.8348623853,"virgool_1_acc":0.7911392405,"hamshahrionline_acc":0.8651315789,"tabnak_acc":0.8215767635,"alibaba_acc":0.8169934641,"digikala_mag_acc":0.8336673347,"yjc_acc":0.7873563218,"beytoote_acc":0.8310249307,"asriran_acc":0.8212560386,"ecoiran_acc":0.7523809524,"hawzah_acc":0.8735955056,"zoomit_acc":0.8450704225,"wikipedia_acc":0.9,"namnak_acc":0.8337874659,"khodro45_acc":0.8088235294,"fidibo_acc":0.845814978,"newmiind_acc":0.7604166667,"taaghche_acc":0.891025641,"motamem_acc":0.8947368421,"varzesh3_acc":0.8093645485,"mehrnews_acc":0.7782258065,"tasnim_acc":0.8115384615,"magerta_acc":0.7647058824,"radiokodak_book_acc":0.6086956522,"vipofilm_acc":0.9230769231,"wikishia_acc":0.9393939394,"voolak_acc":0.8372093023,"farsroid_acc":0.7368421053,"parsiday_acc":0.7583333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.7}
|
7 |
+
{"Model Name":"deepseek-chat","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","acc":0.8241,"acc_strict":0.5115,"donyaeeqtesad_acc":0.8096676737,"isna_acc":0.7734375,"ninisite_article_acc":0.8216216216,"virgool_4_acc":0.8580060423,"khabaronline_acc":0.812,"digiato_acc":0.8141962422,"doctoreto_acc":0.845,"sarzamindownload_acc":0.7843137255,"hamgardi_acc":0.7669616519,"bigbangpage_acc":0.8598726115,"wiki_ahlolbait_acc":0.8947368421,"virgool_3_acc":0.8298507463,"virgool_2_acc":0.8532110092,"virgool_1_acc":0.8164556962,"hamshahrionline_acc":0.868852459,"tabnak_acc":0.8174273859,"alibaba_acc":0.8464052288,"digikala_mag_acc":0.8236472946,"yjc_acc":0.7931034483,"beytoote_acc":0.8282548476,"asriran_acc":0.8019323671,"ecoiran_acc":0.7523809524,"hawzah_acc":0.8651685393,"zoomit_acc":0.8568075117,"wikipedia_acc":0.9,"namnak_acc":0.8310626703,"khodro45_acc":0.8161764706,"fidibo_acc":0.8810572687,"newmiind_acc":0.7569444444,"taaghche_acc":0.9166666667,"motamem_acc":0.9052631579,"varzesh3_acc":0.8327759197,"mehrnews_acc":0.7822580645,"tasnim_acc":0.8038461538,"magerta_acc":0.768907563,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.8461538462,"wikishia_acc":0.9696969697,"voolak_acc":0.7906976744,"farsroid_acc":0.7368421053,"parsiday_acc":0.7666666667,"soft98_acc":0.8,"ninisite_discussion_acc":0.7}
|
8 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","acc":0.798859772,"acc_strict":0.7983596719,"donyaeeqtesad_acc":0.7673716012,"isna_acc":0.76953125,"ninisite_article_acc":0.7696476965,"virgool_4_acc":0.8398791541,"khabaronline_acc":0.78,"digiato_acc":0.7870563674,"doctoreto_acc":0.82,"sarzamindownload_acc":0.7712418301,"hamgardi_acc":0.7610619469,"bigbangpage_acc":0.8789808917,"wiki_ahlolbait_acc":0.8486842105,"virgool_3_acc":0.8119402985,"virgool_2_acc":0.8226299694,"virgool_1_acc":0.8037974684,"hamshahrionline_acc":0.8557377049,"tabnak_acc":0.8298755187,"alibaba_acc":0.8562091503,"digikala_mag_acc":0.8152610442,"yjc_acc":0.7471264368,"beytoote_acc":0.8005540166,"asriran_acc":0.7922705314,"ecoiran_acc":0.7333333333,"hawzah_acc":0.8342696629,"zoomit_acc":0.8427230047,"wikipedia_acc":0.9095238095,"namnak_acc":0.7738419619,"khodro45_acc":0.8088235294,"fidibo_acc":0.845814978,"newmiind_acc":0.7222222222,"taaghche_acc":0.8397435897,"motamem_acc":0.8947368421,"varzesh3_acc":0.7993311037,"mehrnews_acc":0.7338709677,"tasnim_acc":0.7730769231,"magerta_acc":0.6974789916,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8484848485,"voolak_acc":0.7441860465,"farsroid_acc":0.6578947368,"parsiday_acc":0.7166666667,"soft98_acc":0.8,"ninisite_discussion_acc":0.6}
|
9 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","acc":0.7956,"acc_strict":0.1123,"donyaeeqtesad_acc":0.7764350453,"isna_acc":0.76171875,"ninisite_article_acc":0.772972973,"virgool_4_acc":0.8549848943,"khabaronline_acc":0.78,"digiato_acc":0.7954070981,"doctoreto_acc":0.755,"sarzamindownload_acc":0.7385620915,"hamgardi_acc":0.7492625369,"bigbangpage_acc":0.8407643312,"wiki_ahlolbait_acc":0.8421052632,"virgool_3_acc":0.8029850746,"virgool_2_acc":0.8287461774,"virgool_1_acc":0.8259493671,"hamshahrionline_acc":0.862295082,"tabnak_acc":0.8257261411,"alibaba_acc":0.8366013072,"digikala_mag_acc":0.8076152305,"yjc_acc":0.7356321839,"beytoote_acc":0.7922437673,"asriran_acc":0.7874396135,"ecoiran_acc":0.7142857143,"hawzah_acc":0.845505618,"zoomit_acc":0.8403755869,"wikipedia_acc":0.9047619048,"namnak_acc":0.7874659401,"khodro45_acc":0.7941176471,"fidibo_acc":0.8414096916,"newmiind_acc":0.7465277778,"taaghche_acc":0.8076923077,"motamem_acc":0.8947368421,"varzesh3_acc":0.7959866221,"mehrnews_acc":0.7419354839,"tasnim_acc":0.7346153846,"magerta_acc":0.6848739496,"radiokodak_book_acc":0.6086956522,"vipofilm_acc":0.8461538462,"wikishia_acc":0.8787878788,"voolak_acc":0.7906976744,"farsroid_acc":0.6578947368,"parsiday_acc":0.7583333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.5}
|
10 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7712,"acc_strict":0.7712,"donyaeeqtesad_acc":0.7703927492,"isna_acc":0.75390625,"ninisite_article_acc":0.7648648649,"virgool_4_acc":0.8247734139,"khabaronline_acc":0.76,"digiato_acc":0.7745302714,"doctoreto_acc":0.785,"sarzamindownload_acc":0.7581699346,"hamgardi_acc":0.6784660767,"bigbangpage_acc":0.8407643312,"wiki_ahlolbait_acc":0.8223684211,"virgool_3_acc":0.7910447761,"virgool_2_acc":0.7920489297,"virgool_1_acc":0.7943037975,"hamshahrionline_acc":0.8295081967,"tabnak_acc":0.7634854772,"alibaba_acc":0.7973856209,"digikala_mag_acc":0.8056112224,"yjc_acc":0.724137931,"beytoote_acc":0.7783933518,"asriran_acc":0.7777777778,"ecoiran_acc":0.7079365079,"hawzah_acc":0.7724719101,"zoomit_acc":0.8098591549,"wikipedia_acc":0.8761904762,"namnak_acc":0.7547683924,"khodro45_acc":0.7941176471,"fidibo_acc":0.7841409692,"newmiind_acc":0.6875,"taaghche_acc":0.8269230769,"motamem_acc":0.8631578947,"varzesh3_acc":0.7926421405,"mehrnews_acc":0.7056451613,"tasnim_acc":0.7076923077,"magerta_acc":0.6890756303,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.8461538462,"wikishia_acc":0.8181818182,"voolak_acc":0.6279069767,"farsroid_acc":0.6578947368,"parsiday_acc":0.7083333333,"soft98_acc":0.7,"ninisite_discussion_acc":0.5}
|
11 |
+
{"Model Name":"Qwen3-32B","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","acc":0.7654,"acc_strict":0.7653,"donyaeeqtesad_acc":0.749244713,"isna_acc":0.75,"ninisite_article_acc":0.7621621622,"virgool_4_acc":0.7824773414,"khabaronline_acc":0.724,"digiato_acc":0.8037578288,"doctoreto_acc":0.8,"sarzamindownload_acc":0.7450980392,"hamgardi_acc":0.6991150442,"bigbangpage_acc":0.8025477707,"wiki_ahlolbait_acc":0.8157894737,"virgool_3_acc":0.8029850746,"virgool_2_acc":0.8073394495,"virgool_1_acc":0.7943037975,"hamshahrionline_acc":0.8,"tabnak_acc":0.7634854772,"alibaba_acc":0.8039215686,"digikala_mag_acc":0.7875751503,"yjc_acc":0.6896551724,"beytoote_acc":0.7783933518,"asriran_acc":0.7632850242,"ecoiran_acc":0.6793650794,"hawzah_acc":0.7724719101,"zoomit_acc":0.8215962441,"wikipedia_acc":0.8523809524,"namnak_acc":0.7520435967,"khodro45_acc":0.8088235294,"fidibo_acc":0.7665198238,"newmiind_acc":0.6909722222,"taaghche_acc":0.7564102564,"motamem_acc":0.8736842105,"varzesh3_acc":0.762541806,"mehrnews_acc":0.689516129,"tasnim_acc":0.7192307692,"magerta_acc":0.7268907563,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8484848485,"voolak_acc":0.6744186047,"farsroid_acc":0.6578947368,"parsiday_acc":0.675,"soft98_acc":0.7,"ninisite_discussion_acc":0.7}
|
12 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","acc":0.7628,"acc_strict":0.7628,"donyaeeqtesad_acc":0.6978851964,"isna_acc":0.7265625,"ninisite_article_acc":0.7621621622,"virgool_4_acc":0.8187311178,"khabaronline_acc":0.74,"digiato_acc":0.7661795407,"doctoreto_acc":0.78,"sarzamindownload_acc":0.6993464052,"hamgardi_acc":0.7109144543,"bigbangpage_acc":0.821656051,"wiki_ahlolbait_acc":0.8026315789,"virgool_3_acc":0.7940298507,"virgool_2_acc":0.755351682,"virgool_1_acc":0.7784810127,"hamshahrionline_acc":0.8229508197,"tabnak_acc":0.8174273859,"alibaba_acc":0.7843137255,"digikala_mag_acc":0.7975951904,"yjc_acc":0.7126436782,"beytoote_acc":0.7534626039,"asriran_acc":0.7391304348,"ecoiran_acc":0.7079365079,"hawzah_acc":0.7752808989,"zoomit_acc":0.7957746479,"wikipedia_acc":0.8428571429,"namnak_acc":0.7493188011,"khodro45_acc":0.7867647059,"fidibo_acc":0.8237885463,"newmiind_acc":0.6909722222,"taaghche_acc":0.7820512821,"motamem_acc":0.8315789474,"varzesh3_acc":0.7993311037,"mehrnews_acc":0.6975806452,"tasnim_acc":0.7307692308,"magerta_acc":0.6722689076,"radiokodak_book_acc":0.6956521739,"vipofilm_acc":0.8461538462,"wikishia_acc":0.9393939394,"voolak_acc":0.6976744186,"farsroid_acc":0.6315789474,"parsiday_acc":0.7,"soft98_acc":0.8,"ninisite_discussion_acc":0.7}
|
13 |
+
{"Model Name":"gpt-4o-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7598,"acc_strict":0.7598,"donyaeeqtesad_acc":0.749244713,"isna_acc":0.71484375,"ninisite_article_acc":0.7351351351,"virgool_4_acc":0.8006042296,"khabaronline_acc":0.736,"digiato_acc":0.7599164927,"doctoreto_acc":0.775,"sarzamindownload_acc":0.6535947712,"hamgardi_acc":0.7197640118,"bigbangpage_acc":0.7961783439,"wiki_ahlolbait_acc":0.8289473684,"virgool_3_acc":0.7492537313,"virgool_2_acc":0.7828746177,"virgool_1_acc":0.8006329114,"hamshahrionline_acc":0.8131147541,"tabnak_acc":0.7427385892,"alibaba_acc":0.7810457516,"digikala_mag_acc":0.7615230461,"yjc_acc":0.7643678161,"beytoote_acc":0.7783933518,"asriran_acc":0.7536231884,"ecoiran_acc":0.6952380952,"hawzah_acc":0.7668539326,"zoomit_acc":0.7957746479,"wikipedia_acc":0.8761904762,"namnak_acc":0.7765667575,"khodro45_acc":0.7573529412,"fidibo_acc":0.7621145374,"newmiind_acc":0.6909722222,"taaghche_acc":0.7820512821,"motamem_acc":0.8736842105,"varzesh3_acc":0.8060200669,"mehrnews_acc":0.6733870968,"tasnim_acc":0.75,"magerta_acc":0.6764705882,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.8461538462,"wikishia_acc":0.8484848485,"voolak_acc":0.7441860465,"farsroid_acc":0.7631578947,"parsiday_acc":0.7083333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.4}
|
14 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","acc":0.7364,"acc_strict":0.7364,"donyaeeqtesad_acc":0.752265861,"isna_acc":0.67578125,"ninisite_article_acc":0.7054054054,"virgool_4_acc":0.746223565,"khabaronline_acc":0.724,"digiato_acc":0.7223382046,"doctoreto_acc":0.7,"sarzamindownload_acc":0.6993464052,"hamgardi_acc":0.7020648968,"bigbangpage_acc":0.8089171975,"wiki_ahlolbait_acc":0.8486842105,"virgool_3_acc":0.7194029851,"virgool_2_acc":0.7339449541,"virgool_1_acc":0.7246835443,"hamshahrionline_acc":0.8262295082,"tabnak_acc":0.7178423237,"alibaba_acc":0.7712418301,"digikala_mag_acc":0.7715430862,"yjc_acc":0.7183908046,"beytoote_acc":0.7479224377,"asriran_acc":0.768115942,"ecoiran_acc":0.6698412698,"hawzah_acc":0.7415730337,"zoomit_acc":0.79342723,"wikipedia_acc":0.819047619,"namnak_acc":0.7220708447,"khodro45_acc":0.75,"fidibo_acc":0.7665198238,"newmiind_acc":0.6631944444,"taaghche_acc":0.7820512821,"motamem_acc":0.8631578947,"varzesh3_acc":0.7525083612,"mehrnews_acc":0.6653225806,"tasnim_acc":0.75,"magerta_acc":0.6134453782,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8787878788,"voolak_acc":0.6511627907,"farsroid_acc":0.5,"parsiday_acc":0.7083333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.7}
|
15 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7160432086,"acc_strict":0.7160432086,"donyaeeqtesad_acc":0.6888217523,"isna_acc":0.69140625,"ninisite_article_acc":0.7,"virgool_4_acc":0.7311178248,"khabaronline_acc":0.712,"digiato_acc":0.7181628392,"doctoreto_acc":0.745,"sarzamindownload_acc":0.6013071895,"hamgardi_acc":0.6755162242,"bigbangpage_acc":0.7770700637,"wiki_ahlolbait_acc":0.8684210526,"virgool_3_acc":0.7194029851,"virgool_2_acc":0.7064220183,"virgool_1_acc":0.7056962025,"hamshahrionline_acc":0.7803278689,"tabnak_acc":0.6887966805,"alibaba_acc":0.7483660131,"digikala_mag_acc":0.7434869739,"yjc_acc":0.6724137931,"beytoote_acc":0.728531856,"asriran_acc":0.7487922705,"ecoiran_acc":0.6761904762,"hawzah_acc":0.7584269663,"zoomit_acc":0.7558685446,"wikipedia_acc":0.780952381,"namnak_acc":0.7002724796,"khodro45_acc":0.7279411765,"fidibo_acc":0.7665198238,"newmiind_acc":0.6202090592,"taaghche_acc":0.7628205128,"motamem_acc":0.8404255319,"varzesh3_acc":0.7324414716,"mehrnews_acc":0.6169354839,"tasnim_acc":0.6923076923,"magerta_acc":0.6680672269,"radiokodak_book_acc":0.5652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.7878787879,"voolak_acc":0.6511627907,"farsroid_acc":0.7105263158,"parsiday_acc":0.575,"soft98_acc":0.9,"ninisite_discussion_acc":0.3}
|
16 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","acc":0.7094,"acc_strict":0.7094,"donyaeeqtesad_acc":0.6586102719,"isna_acc":0.65625,"ninisite_article_acc":0.7243243243,"virgool_4_acc":0.7643504532,"khabaronline_acc":0.704,"digiato_acc":0.7369519833,"doctoreto_acc":0.76,"sarzamindownload_acc":0.6797385621,"hamgardi_acc":0.6666666667,"bigbangpage_acc":0.7515923567,"wiki_ahlolbait_acc":0.7631578947,"virgool_3_acc":0.7223880597,"virgool_2_acc":0.7584097859,"virgool_1_acc":0.7183544304,"hamshahrionline_acc":0.7213114754,"tabnak_acc":0.7219917012,"alibaba_acc":0.6830065359,"digikala_mag_acc":0.7354709419,"yjc_acc":0.6206896552,"beytoote_acc":0.7146814404,"asriran_acc":0.7198067633,"ecoiran_acc":0.6603174603,"hawzah_acc":0.702247191,"zoomit_acc":0.7323943662,"wikipedia_acc":0.7714285714,"namnak_acc":0.7329700272,"khodro45_acc":0.7352941176,"fidibo_acc":0.718061674,"newmiind_acc":0.6493055556,"taaghche_acc":0.7564102564,"motamem_acc":0.8210526316,"varzesh3_acc":0.7157190635,"mehrnews_acc":0.6088709677,"tasnim_acc":0.6576923077,"magerta_acc":0.6302521008,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8787878788,"voolak_acc":0.6976744186,"farsroid_acc":0.7368421053,"parsiday_acc":0.6583333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.8}
|
17 |
+
{"Model Name":"Qwen3-14B","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","acc":0.6958,"acc_strict":0.6958,"donyaeeqtesad_acc":0.6495468278,"isna_acc":0.62890625,"ninisite_article_acc":0.6972972973,"virgool_4_acc":0.7069486405,"khabaronline_acc":0.652,"digiato_acc":0.7202505219,"doctoreto_acc":0.77,"sarzamindownload_acc":0.614379085,"hamgardi_acc":0.6430678466,"bigbangpage_acc":0.7579617834,"wiki_ahlolbait_acc":0.7631578947,"virgool_3_acc":0.7373134328,"virgool_2_acc":0.7155963303,"virgool_1_acc":0.7278481013,"hamshahrionline_acc":0.7278688525,"tabnak_acc":0.6970954357,"alibaba_acc":0.7254901961,"digikala_mag_acc":0.7074148297,"yjc_acc":0.6379310345,"beytoote_acc":0.6842105263,"asriran_acc":0.6859903382,"ecoiran_acc":0.653968254,"hawzah_acc":0.7078651685,"zoomit_acc":0.7676056338,"wikipedia_acc":0.8142857143,"namnak_acc":0.6621253406,"khodro45_acc":0.7647058824,"fidibo_acc":0.731277533,"newmiind_acc":0.6597222222,"taaghche_acc":0.6987179487,"motamem_acc":0.8105263158,"varzesh3_acc":0.6220735786,"mehrnews_acc":0.625,"tasnim_acc":0.6692307692,"magerta_acc":0.6596638655,"radiokodak_book_acc":0.5652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8787878788,"voolak_acc":0.6279069767,"farsroid_acc":0.6052631579,"parsiday_acc":0.5666666667,"soft98_acc":0.9,"ninisite_discussion_acc":0.7}
|
18 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","acc":0.6894,"acc_strict":0.6894,"donyaeeqtesad_acc":0.670694864,"isna_acc":0.63671875,"ninisite_article_acc":0.6945945946,"virgool_4_acc":0.7039274924,"khabaronline_acc":0.664,"digiato_acc":0.6826722338,"doctoreto_acc":0.755,"sarzamindownload_acc":0.6339869281,"hamgardi_acc":0.6342182891,"bigbangpage_acc":0.7452229299,"wiki_ahlolbait_acc":0.7697368421,"virgool_3_acc":0.7014925373,"virgool_2_acc":0.7125382263,"virgool_1_acc":0.7341772152,"hamshahrionline_acc":0.7278688525,"tabnak_acc":0.6307053942,"alibaba_acc":0.7647058824,"digikala_mag_acc":0.7174348697,"yjc_acc":0.5804597701,"beytoote_acc":0.6814404432,"asriran_acc":0.6811594203,"ecoiran_acc":0.6158730159,"hawzah_acc":0.6994382022,"zoomit_acc":0.7441314554,"wikipedia_acc":0.8333333333,"namnak_acc":0.659400545,"khodro45_acc":0.7058823529,"fidibo_acc":0.7268722467,"newmiind_acc":0.6527777778,"taaghche_acc":0.7051282051,"motamem_acc":0.8526315789,"varzesh3_acc":0.6789297659,"mehrnews_acc":0.5887096774,"tasnim_acc":0.6692307692,"magerta_acc":0.6680672269,"radiokodak_book_acc":0.4347826087,"vipofilm_acc":0.6923076923,"wikishia_acc":0.7575757576,"voolak_acc":0.6511627907,"farsroid_acc":0.6842105263,"parsiday_acc":0.55,"soft98_acc":0.8,"ninisite_discussion_acc":0.4}
|
19 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","acc":0.688,"acc_strict":0.688,"donyaeeqtesad_acc":0.6465256798,"isna_acc":0.6640625,"ninisite_article_acc":0.7243243243,"virgool_4_acc":0.7311178248,"khabaronline_acc":0.668,"digiato_acc":0.6764091858,"doctoreto_acc":0.765,"sarzamindownload_acc":0.7058823529,"hamgardi_acc":0.6519174041,"bigbangpage_acc":0.8025477707,"wiki_ahlolbait_acc":0.7368421053,"virgool_3_acc":0.7134328358,"virgool_2_acc":0.7003058104,"virgool_1_acc":0.7025316456,"hamshahrionline_acc":0.6819672131,"tabnak_acc":0.7012448133,"alibaba_acc":0.7189542484,"digikala_mag_acc":0.6753507014,"yjc_acc":0.632183908,"beytoote_acc":0.6703601108,"asriran_acc":0.652173913,"ecoiran_acc":0.6126984127,"hawzah_acc":0.7387640449,"zoomit_acc":0.7300469484,"wikipedia_acc":0.7904761905,"namnak_acc":0.6920980926,"khodro45_acc":0.7279411765,"fidibo_acc":0.6872246696,"newmiind_acc":0.6631944444,"taaghche_acc":0.6858974359,"motamem_acc":0.8,"varzesh3_acc":0.6120401338,"mehrnews_acc":0.6129032258,"tasnim_acc":0.65,"magerta_acc":0.6596638655,"radiokodak_book_acc":0.4782608696,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8787878788,"voolak_acc":0.6511627907,"farsroid_acc":0.6578947368,"parsiday_acc":0.6,"soft98_acc":0.8,"ninisite_discussion_acc":0.4}
|
20 |
+
{"Model Name":"aya-expanse-32b","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","acc":0.6327,"acc_strict":0.0585,"donyaeeqtesad_acc":0.6223564955,"isna_acc":0.5703125,"ninisite_article_acc":0.6621621622,"virgool_4_acc":0.6435045317,"khabaronline_acc":0.632,"digiato_acc":0.6346555324,"doctoreto_acc":0.65,"sarzamindownload_acc":0.5620915033,"hamgardi_acc":0.6194690265,"bigbangpage_acc":0.7070063694,"wiki_ahlolbait_acc":0.6776315789,"virgool_3_acc":0.6208955224,"virgool_2_acc":0.626911315,"virgool_1_acc":0.6518987342,"hamshahrionline_acc":0.6557377049,"tabnak_acc":0.6639004149,"alibaba_acc":0.6666666667,"digikala_mag_acc":0.627254509,"yjc_acc":0.6206896552,"beytoote_acc":0.6675900277,"asriran_acc":0.6231884058,"ecoiran_acc":0.5904761905,"hawzah_acc":0.6797752809,"zoomit_acc":0.5915492958,"wikipedia_acc":0.7333333333,"namnak_acc":0.6403269755,"khodro45_acc":0.6102941176,"fidibo_acc":0.704845815,"newmiind_acc":0.5416666667,"taaghche_acc":0.6217948718,"motamem_acc":0.7684210526,"varzesh3_acc":0.6254180602,"mehrnews_acc":0.5927419355,"tasnim_acc":0.6230769231,"magerta_acc":0.5672268908,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.6153846154,"wikishia_acc":0.7575757576,"voolak_acc":0.6279069767,"farsroid_acc":0.6578947368,"parsiday_acc":0.5083333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.6}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","acc":0.6,"acc_strict":0.6,"donyaeeqtesad_acc":0.6042296073,"isna_acc":0.56640625,"ninisite_article_acc":0.572972973,"virgool_4_acc":0.5951661631,"khabaronline_acc":0.6,"digiato_acc":0.5908141962,"doctoreto_acc":0.605,"sarzamindownload_acc":0.5882352941,"hamgardi_acc":0.5722713864,"bigbangpage_acc":0.6369426752,"wiki_ahlolbait_acc":0.6578947368,"virgool_3_acc":0.5731343284,"virgool_2_acc":0.623853211,"virgool_1_acc":0.6139240506,"hamshahrionline_acc":0.6295081967,"tabnak_acc":0.6514522822,"alibaba_acc":0.6307189542,"digikala_mag_acc":0.6152304609,"yjc_acc":0.5747126437,"beytoote_acc":0.5900277008,"asriran_acc":0.5314009662,"ecoiran_acc":0.5619047619,"hawzah_acc":0.6292134831,"zoomit_acc":0.5915492958,"wikipedia_acc":0.6571428571,"namnak_acc":0.6267029973,"khodro45_acc":0.6397058824,"fidibo_acc":0.6872246696,"newmiind_acc":0.4895833333,"taaghche_acc":0.6217948718,"motamem_acc":0.6736842105,"varzesh3_acc":0.635451505,"mehrnews_acc":0.5725806452,"tasnim_acc":0.6115384615,"magerta_acc":0.5672268908,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.6923076923,"wikishia_acc":0.6060606061,"voolak_acc":0.6046511628,"farsroid_acc":0.4736842105,"parsiday_acc":0.5,"soft98_acc":0.8,"ninisite_discussion_acc":0.6}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.5494,"acc_strict":0.5494,"donyaeeqtesad_acc":0.5347432024,"isna_acc":0.4921875,"ninisite_article_acc":0.5864864865,"virgool_4_acc":0.5921450151,"khabaronline_acc":0.556,"digiato_acc":0.5469728601,"doctoreto_acc":0.6,"sarzamindownload_acc":0.522875817,"hamgardi_acc":0.5044247788,"bigbangpage_acc":0.6305732484,"wiki_ahlolbait_acc":0.6644736842,"virgool_3_acc":0.5582089552,"virgool_2_acc":0.5107033639,"virgool_1_acc":0.5949367089,"hamshahrionline_acc":0.5639344262,"tabnak_acc":0.510373444,"alibaba_acc":0.6078431373,"digikala_mag_acc":0.5611222445,"yjc_acc":0.591954023,"beytoote_acc":0.5567867036,"asriran_acc":0.5265700483,"ecoiran_acc":0.4920634921,"hawzah_acc":0.547752809,"zoomit_acc":0.5821596244,"wikipedia_acc":0.6380952381,"namnak_acc":0.5449591281,"khodro45_acc":0.6102941176,"fidibo_acc":0.5726872247,"newmiind_acc":0.5,"taaghche_acc":0.5,"motamem_acc":0.6210526316,"varzesh3_acc":0.4816053512,"mehrnews_acc":0.4838709677,"tasnim_acc":0.5692307692,"magerta_acc":0.5042016807,"radiokodak_book_acc":0.347826087,"vipofilm_acc":0.5384615385,"wikishia_acc":0.5757575758,"voolak_acc":0.488372093,"farsroid_acc":0.4736842105,"parsiday_acc":0.4,"soft98_acc":0.7,"ninisite_discussion_acc":0.4}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","acc":0.5437,"acc_strict":0.5437,"donyaeeqtesad_acc":0.4954682779,"isna_acc":0.515625,"ninisite_article_acc":0.527027027,"virgool_4_acc":0.5649546828,"khabaronline_acc":0.508,"digiato_acc":0.5469728601,"doctoreto_acc":0.605,"sarzamindownload_acc":0.4836601307,"hamgardi_acc":0.5014749263,"bigbangpage_acc":0.6560509554,"wiki_ahlolbait_acc":0.5723684211,"virgool_3_acc":0.6119402985,"virgool_2_acc":0.5626911315,"virgool_1_acc":0.5696202532,"hamshahrionline_acc":0.5540983607,"tabnak_acc":0.5643153527,"alibaba_acc":0.6045751634,"digikala_mag_acc":0.5711422846,"yjc_acc":0.5172413793,"beytoote_acc":0.5152354571,"asriran_acc":0.5555555556,"ecoiran_acc":0.4761904762,"hawzah_acc":0.5926966292,"zoomit_acc":0.5938967136,"wikipedia_acc":0.6761904762,"namnak_acc":0.4741144414,"khodro45_acc":0.5441176471,"fidibo_acc":0.5682819383,"newmiind_acc":0.5104166667,"taaghche_acc":0.5320512821,"motamem_acc":0.6526315789,"varzesh3_acc":0.4648829431,"mehrnews_acc":0.4475806452,"tasnim_acc":0.5153846154,"magerta_acc":0.5630252101,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.6923076923,"wikishia_acc":0.5757575758,"voolak_acc":0.511627907,"farsroid_acc":0.5789473684,"parsiday_acc":0.3916666667,"soft98_acc":0.7,"ninisite_discussion_acc":0.4}
|
24 |
+
{"Model Name":"Qwen3-4B","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","acc":0.5033,"acc_strict":0.5033,"donyaeeqtesad_acc":0.4954682779,"isna_acc":0.48046875,"ninisite_article_acc":0.4810810811,"virgool_4_acc":0.5256797583,"khabaronline_acc":0.504,"digiato_acc":0.5073068894,"doctoreto_acc":0.615,"sarzamindownload_acc":0.4901960784,"hamgardi_acc":0.4601769912,"bigbangpage_acc":0.5414012739,"wiki_ahlolbait_acc":0.5197368421,"virgool_3_acc":0.5731343284,"virgool_2_acc":0.5565749235,"virgool_1_acc":0.5094936709,"hamshahrionline_acc":0.4655737705,"tabnak_acc":0.5145228216,"alibaba_acc":0.5098039216,"digikala_mag_acc":0.5230460922,"yjc_acc":0.5114942529,"beytoote_acc":0.4764542936,"asriran_acc":0.4782608696,"ecoiran_acc":0.4253968254,"hawzah_acc":0.5028089888,"zoomit_acc":0.5328638498,"wikipedia_acc":0.6047619048,"namnak_acc":0.4795640327,"khodro45_acc":0.6102941176,"fidibo_acc":0.550660793,"newmiind_acc":0.4895833333,"taaghche_acc":0.5064102564,"motamem_acc":0.5894736842,"varzesh3_acc":0.3913043478,"mehrnews_acc":0.439516129,"tasnim_acc":0.4807692308,"magerta_acc":0.5546218487,"radiokodak_book_acc":0.347826087,"vipofilm_acc":0.5384615385,"wikishia_acc":0.5454545455,"voolak_acc":0.488372093,"farsroid_acc":0.5263157895,"parsiday_acc":0.3083333333,"soft98_acc":0.7,"ninisite_discussion_acc":0.1}
|
25 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","acc":0.4832,"acc_strict":0.4832,"donyaeeqtesad_acc":0.498489426,"isna_acc":0.44140625,"ninisite_article_acc":0.4486486486,"virgool_4_acc":0.5075528701,"khabaronline_acc":0.504,"digiato_acc":0.4822546973,"doctoreto_acc":0.5,"sarzamindownload_acc":0.4117647059,"hamgardi_acc":0.5250737463,"bigbangpage_acc":0.5031847134,"wiki_ahlolbait_acc":0.5197368421,"virgool_3_acc":0.4985074627,"virgool_2_acc":0.4495412844,"virgool_1_acc":0.5063291139,"hamshahrionline_acc":0.5344262295,"tabnak_acc":0.4605809129,"alibaba_acc":0.5032679739,"digikala_mag_acc":0.4729458918,"yjc_acc":0.4482758621,"beytoote_acc":0.4903047091,"asriran_acc":0.4734299517,"ecoiran_acc":0.419047619,"hawzah_acc":0.4831460674,"zoomit_acc":0.5305164319,"wikipedia_acc":0.5666666667,"namnak_acc":0.4931880109,"khodro45_acc":0.5294117647,"fidibo_acc":0.4801762115,"newmiind_acc":0.4479166667,"taaghche_acc":0.4230769231,"motamem_acc":0.6421052632,"varzesh3_acc":0.4515050167,"mehrnews_acc":0.4072580645,"tasnim_acc":0.5,"magerta_acc":0.4453781513,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.5384615385,"wikishia_acc":0.6363636364,"voolak_acc":0.4651162791,"farsroid_acc":0.4736842105,"parsiday_acc":0.35,"soft98_acc":0.9,"ninisite_discussion_acc":0.5}
|
26 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","acc":0.3015,"acc_strict":0.3011,"donyaeeqtesad_acc":0.2990936556,"isna_acc":0.29296875,"ninisite_article_acc":0.2864864865,"virgool_4_acc":0.2839879154,"khabaronline_acc":0.276,"digiato_acc":0.2922755741,"doctoreto_acc":0.345,"sarzamindownload_acc":0.3267973856,"hamgardi_acc":0.3215339233,"bigbangpage_acc":0.2547770701,"wiki_ahlolbait_acc":0.2894736842,"virgool_3_acc":0.3313432836,"virgool_2_acc":0.3058103976,"virgool_1_acc":0.3512658228,"hamshahrionline_acc":0.262295082,"tabnak_acc":0.3278008299,"alibaba_acc":0.3235294118,"digikala_mag_acc":0.3206412826,"yjc_acc":0.2816091954,"beytoote_acc":0.2991689751,"asriran_acc":0.3188405797,"ecoiran_acc":0.2698412698,"hawzah_acc":0.3174157303,"zoomit_acc":0.3028169014,"wikipedia_acc":0.3380952381,"namnak_acc":0.2888283379,"khodro45_acc":0.3308823529,"fidibo_acc":0.3259911894,"newmiind_acc":0.2916666667,"taaghche_acc":0.2371794872,"motamem_acc":0.4,"varzesh3_acc":0.2240802676,"mehrnews_acc":0.2459677419,"tasnim_acc":0.3346153846,"magerta_acc":0.3361344538,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.4615384615,"wikishia_acc":0.2727272727,"voolak_acc":0.4418604651,"farsroid_acc":0.1578947368,"parsiday_acc":0.2083333333,"soft98_acc":0.7,"ninisite_discussion_acc":0.1}
|
27 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","acc":0.2599,"acc_strict":0.2599,"donyaeeqtesad_acc":0.2719033233,"isna_acc":0.25,"ninisite_article_acc":0.2567567568,"virgool_4_acc":0.3202416918,"khabaronline_acc":0.276,"digiato_acc":0.2630480167,"doctoreto_acc":0.245,"sarzamindownload_acc":0.2418300654,"hamgardi_acc":0.2979351032,"bigbangpage_acc":0.2484076433,"wiki_ahlolbait_acc":0.2631578947,"virgool_3_acc":0.2507462687,"virgool_2_acc":0.247706422,"virgool_1_acc":0.2594936709,"hamshahrionline_acc":0.2852459016,"tabnak_acc":0.2489626556,"alibaba_acc":0.2712418301,"digikala_mag_acc":0.2705410822,"yjc_acc":0.275862069,"beytoote_acc":0.2603878116,"asriran_acc":0.2608695652,"ecoiran_acc":0.2634920635,"hawzah_acc":0.2724719101,"zoomit_acc":0.2511737089,"wikipedia_acc":0.2857142857,"namnak_acc":0.2098092643,"khodro45_acc":0.2720588235,"fidibo_acc":0.2466960352,"newmiind_acc":0.2222222222,"taaghche_acc":0.25,"motamem_acc":0.2947368421,"varzesh3_acc":0.2441471572,"mehrnews_acc":0.25,"tasnim_acc":0.2692307692,"magerta_acc":0.2352941176,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.3846153846,"wikishia_acc":0.1515151515,"voolak_acc":0.2558139535,"farsroid_acc":0.2631578947,"parsiday_acc":0.1916666667,"soft98_acc":0.1,"ninisite_discussion_acc":0.4}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","acc":0.2521,"acc_strict":0.2517,"donyaeeqtesad_acc":0.2779456193,"isna_acc":0.29296875,"ninisite_article_acc":0.2594594595,"virgool_4_acc":0.2235649547,"khabaronline_acc":0.2,"digiato_acc":0.24217119,"doctoreto_acc":0.24,"sarzamindownload_acc":0.2352941176,"hamgardi_acc":0.2684365782,"bigbangpage_acc":0.2802547771,"wiki_ahlolbait_acc":0.2368421053,"virgool_3_acc":0.2298507463,"virgool_2_acc":0.2599388379,"virgool_1_acc":0.2689873418,"hamshahrionline_acc":0.2327868852,"tabnak_acc":0.2697095436,"alibaba_acc":0.2124183007,"digikala_mag_acc":0.246492986,"yjc_acc":0.2586206897,"beytoote_acc":0.2631578947,"asriran_acc":0.2898550725,"ecoiran_acc":0.2603174603,"hawzah_acc":0.2556179775,"zoomit_acc":0.2887323944,"wikipedia_acc":0.2238095238,"namnak_acc":0.2561307902,"khodro45_acc":0.25,"fidibo_acc":0.2202643172,"newmiind_acc":0.25,"taaghche_acc":0.2692307692,"motamem_acc":0.2842105263,"varzesh3_acc":0.2107023411,"mehrnews_acc":0.2338709677,"tasnim_acc":0.2307692308,"magerta_acc":0.3235294118,"radiokodak_book_acc":0.1739130435,"vipofilm_acc":0.4615384615,"wikishia_acc":0.3333333333,"voolak_acc":0.2790697674,"farsroid_acc":0.2368421053,"parsiday_acc":0.1833333333,"soft98_acc":0.3,"ninisite_discussion_acc":0.5}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","acc":0.2412,"acc_strict":0.0079,"donyaeeqtesad_acc":0.253776435,"isna_acc":0.25390625,"ninisite_article_acc":0.2486486486,"virgool_4_acc":0.2809667674,"khabaronline_acc":0.248,"digiato_acc":0.2192066806,"doctoreto_acc":0.245,"sarzamindownload_acc":0.2483660131,"hamgardi_acc":0.2507374631,"bigbangpage_acc":0.2802547771,"wiki_ahlolbait_acc":0.2434210526,"virgool_3_acc":0.2208955224,"virgool_2_acc":0.2140672783,"virgool_1_acc":0.2373417722,"hamshahrionline_acc":0.2983606557,"tabnak_acc":0.2282157676,"alibaba_acc":0.2581699346,"digikala_mag_acc":0.2224448898,"yjc_acc":0.2701149425,"beytoote_acc":0.2520775623,"asriran_acc":0.1884057971,"ecoiran_acc":0.2349206349,"hawzah_acc":0.2696629213,"zoomit_acc":0.2558685446,"wikipedia_acc":0.1761904762,"namnak_acc":0.2343324251,"khodro45_acc":0.2279411765,"fidibo_acc":0.2907488987,"newmiind_acc":0.28125,"taaghche_acc":0.1987179487,"motamem_acc":0.2736842105,"varzesh3_acc":0.2307692308,"mehrnews_acc":0.2096774194,"tasnim_acc":0.2269230769,"magerta_acc":0.1848739496,"radiokodak_book_acc":0.2173913043,"vipofilm_acc":0.1538461538,"wikishia_acc":0.1515151515,"voolak_acc":0.2790697674,"farsroid_acc":0.2105263158,"parsiday_acc":0.225,"soft98_acc":0.2,"ninisite_discussion_acc":0.4}
|
leaderboard/boards_data/persian_nlg.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.2447184183,"question-generation_PersianQA_rougeL_recall":0.3388367288,"question-generation_PersianQA_rougeL_f1_score":0.269297654},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.2019260724,"translation-en2fa_en2fa_epoque_bleu":0.4752747269,"translation-en2fa_en2fa_mizan_bleu":0.165706346,"translation-en2fa_en2fa_quran_bleu":0.1194336982,"translation-en2fa_en2fa_sahife_bleu":0.0819129449,"translation-en2fa_en2fa_nahj_bleu":0.0545857968,"translation-en2fa_en2fa_tep_bleu":0.0782996247},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1632163927,"summarization_SamSUM-fa_rougeL_recall":0.387510969,"summarization_SamSUM-fa_rougeL_f1_score":0.2157634129},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.2559078555,"translation-fa2en_fa2en_tep_bleu":0.1687480056,"translation-fa2en_fa2en_mizan_bleu":0.2113676707,"translation-fa2en_fa2en_quran_bleu":0.2008290856,"translation-fa2en_fa2en_epoque_bleu":0.5099219192,"translation-fa2en_fa2en_nahj_bleu":0.0984185664,"translation-fa2en_fa2en_sahife_bleu":0.1125739279},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.1302111402,"translation-ar2fa_ar2fa_sahife_bleu":0.1104606951,"translation-ar2fa_ar2fa_nahj_bleu":0.0742081609,"translation-ar2fa_ar2fa_quran_bleu":0.2031644157},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.2237919051,"summarization_PnSummary_rougeL_recall":0.3532978852,"summarization_PnSummary_rougeL_f1_score":0.2484855426},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0860361971,"translation-fa2ar_fa2ar_nahj_bleu":0.0440530096,"translation-fa2ar_fa2ar_sahife_bleu":0.0833828112,"translation-fa2ar_fa2ar_quran_bleu":0.1306727704},"nlg_score":0.2010896964}
|
2 |
+
{"Model Name":"gpt-4.1","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.2576021626,"question-generation_PersianQA_rougeL_recall":0.3924501003,"question-generation_PersianQA_rougeL_f1_score":0.2985826349},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1974288311,"translation-en2fa_en2fa_epoque_bleu":0.4102902123,"translation-en2fa_en2fa_mizan_bleu":0.1898606624,"translation-en2fa_en2fa_quran_bleu":0.1638084791,"translation-en2fa_en2fa_sahife_bleu":0.1095493859,"translation-en2fa_en2fa_nahj_bleu":0.0487097316,"translation-en2fa_en2fa_tep_bleu":0.0737497745},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1681357159,"summarization_SamSUM-fa_rougeL_recall":0.3567938895,"summarization_SamSUM-fa_rougeL_f1_score":0.2189693454},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.2307102128,"translation-fa2en_fa2en_tep_bleu":0.1527807458,"translation-fa2en_fa2en_mizan_bleu":0.1927067243,"translation-fa2en_fa2en_quran_bleu":0.1628198329,"translation-fa2en_fa2en_epoque_bleu":0.4676472481,"translation-fa2en_fa2en_nahj_bleu":0.0810494281,"translation-fa2en_fa2en_sahife_bleu":0.1009417344},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.1534130086,"translation-ar2fa_ar2fa_sahife_bleu":0.1250461134,"translation-ar2fa_ar2fa_nahj_bleu":0.0624466634,"translation-ar2fa_ar2fa_quran_bleu":0.2681979318},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1234743619,"summarization_PnSummary_rougeL_recall":0.376111826,"summarization_PnSummary_rougeL_f1_score":0.1808600563},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0827618418,"translation-fa2ar_fa2ar_nahj_bleu":0.038434531,"translation-fa2ar_fa2ar_sahife_bleu":0.0781455938,"translation-fa2ar_fa2ar_quran_bleu":0.1317054007},"nlg_score":0.194675133}
|
3 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.2438951227,"question-generation_PersianQA_rougeL_recall":0.3687301621,"question-generation_PersianQA_rougeL_f1_score":0.2816187853},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1979467916,"translation-en2fa_en2fa_epoque_bleu":0.4460981632,"translation-en2fa_en2fa_mizan_bleu":0.1745376389,"translation-en2fa_en2fa_quran_bleu":0.137406774,"translation-en2fa_en2fa_sahife_bleu":0.091586235,"translation-en2fa_en2fa_nahj_bleu":0.0490159552,"translation-en2fa_en2fa_tep_bleu":0.072776086},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.171454009,"summarization_SamSUM-fa_rougeL_recall":0.3692597258,"summarization_SamSUM-fa_rougeL_f1_score":0.2248722593},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.2389011537,"translation-fa2en_fa2en_tep_bleu":0.1431825698,"translation-fa2en_fa2en_mizan_bleu":0.2056729072,"translation-fa2en_fa2en_quran_bleu":0.1776018574,"translation-fa2en_fa2en_epoque_bleu":0.4842161688,"translation-fa2en_fa2en_nahj_bleu":0.0886384727,"translation-fa2en_fa2en_sahife_bleu":0.1045044839},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.143500711,"translation-ar2fa_ar2fa_sahife_bleu":0.1221294429,"translation-ar2fa_ar2fa_nahj_bleu":0.069521493,"translation-ar2fa_ar2fa_quran_bleu":0.235152236},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1156493376,"summarization_PnSummary_rougeL_recall":0.403347998,"summarization_PnSummary_rougeL_f1_score":0.1750055649},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0689994984,"translation-fa2ar_fa2ar_nahj_bleu":0.0397020785,"translation-fa2ar_fa2ar_sahife_bleu":0.0751264317,"translation-fa2ar_fa2ar_quran_bleu":0.092169985},"nlg_score":0.1901206806}
|
4 |
+
{"Model Name":"gpt-4o","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.2275858051,"question-generation_PersianQA_rougeL_recall":0.3654754607,"question-generation_PersianQA_rougeL_f1_score":0.2679025722},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.2099911906,"translation-en2fa_en2fa_epoque_bleu":0.4805793807,"translation-en2fa_en2fa_mizan_bleu":0.1904867707,"translation-en2fa_en2fa_quran_bleu":0.1412389522,"translation-en2fa_en2fa_sahife_bleu":0.0861059288,"translation-en2fa_en2fa_nahj_bleu":0.0528683421,"translation-en2fa_en2fa_tep_bleu":0.0688528109},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.165108522,"summarization_SamSUM-fa_rougeL_recall":0.3982318891,"summarization_SamSUM-fa_rougeL_f1_score":0.2240082992},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.234039473,"translation-fa2en_fa2en_tep_bleu":0.1597644653,"translation-fa2en_fa2en_mizan_bleu":0.1946759365,"translation-fa2en_fa2en_quran_bleu":0.1638938233,"translation-fa2en_fa2en_epoque_bleu":0.474760879,"translation-fa2en_fa2en_nahj_bleu":0.0825458621,"translation-fa2en_fa2en_sahife_bleu":0.0952634494},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.1542520457,"translation-ar2fa_ar2fa_sahife_bleu":0.1283925803,"translation-ar2fa_ar2fa_nahj_bleu":0.0660434951,"translation-ar2fa_ar2fa_quran_bleu":0.2639096342},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1134979628,"summarization_PnSummary_rougeL_recall":0.3909794734,"summarization_PnSummary_rougeL_f1_score":0.1716841943},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0656699851,"translation-fa2ar_fa2ar_nahj_bleu":0.0347167128,"translation-fa2ar_fa2ar_sahife_bleu":0.0732417084,"translation-fa2ar_fa2ar_quran_bleu":0.0890515341},"nlg_score":0.18964968}
|
5 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.2149535143,"question-generation_PersianQA_rougeL_recall":0.3019561885,"question-generation_PersianQA_rougeL_f1_score":0.2405115465},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.2018158808,"translation-en2fa_en2fa_epoque_bleu":0.4332944681,"translation-en2fa_en2fa_mizan_bleu":0.1925182751,"translation-en2fa_en2fa_quran_bleu":0.1530925462,"translation-en2fa_en2fa_sahife_bleu":0.1026499453,"translation-en2fa_en2fa_nahj_bleu":0.051968827,"translation-en2fa_en2fa_tep_bleu":0.0708487287},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1591262985,"summarization_SamSUM-fa_rougeL_recall":0.4163090512,"summarization_SamSUM-fa_rougeL_f1_score":0.2208876443},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.2337569687,"translation-fa2en_fa2en_tep_bleu":0.1386371644,"translation-fa2en_fa2en_mizan_bleu":0.2129637469,"translation-fa2en_fa2en_quran_bleu":0.1702102457,"translation-fa2en_fa2en_epoque_bleu":0.478211182,"translation-fa2en_fa2en_nahj_bleu":0.083013513,"translation-fa2en_fa2en_sahife_bleu":0.072000292},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.1414109272,"translation-ar2fa_ar2fa_sahife_bleu":0.136408042,"translation-ar2fa_ar2fa_nahj_bleu":0.0653197648,"translation-ar2fa_ar2fa_quran_bleu":0.2187004167},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1227039295,"summarization_PnSummary_rougeL_recall":0.4315497639,"summarization_PnSummary_rougeL_f1_score":0.1856517383},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0922998074,"translation-fa2ar_fa2ar_nahj_bleu":0.0511154919,"translation-fa2ar_fa2ar_sahife_bleu":0.0589808221,"translation-fa2ar_fa2ar_quran_bleu":0.1668031083},"nlg_score":0.1880477876}
|
6 |
+
{"Model Name":"gpt-4o-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1942536013,"question-generation_PersianQA_rougeL_recall":0.3435531442,"question-generation_PersianQA_rougeL_f1_score":0.2369359061},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.2014428857,"translation-en2fa_en2fa_epoque_bleu":0.4710672433,"translation-en2fa_en2fa_mizan_bleu":0.1830885263,"translation-en2fa_en2fa_quran_bleu":0.1141518863,"translation-en2fa_en2fa_sahife_bleu":0.0806159411,"translation-en2fa_en2fa_nahj_bleu":0.0504089542,"translation-en2fa_en2fa_tep_bleu":0.0648627292},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1578034675,"summarization_SamSUM-fa_rougeL_recall":0.3902121243,"summarization_SamSUM-fa_rougeL_f1_score":0.2156396673},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.2332592983,"translation-fa2en_fa2en_tep_bleu":0.1497847918,"translation-fa2en_fa2en_mizan_bleu":0.1972270386,"translation-fa2en_fa2en_quran_bleu":0.1725699648,"translation-fa2en_fa2en_epoque_bleu":0.4678973942,"translation-fa2en_fa2en_nahj_bleu":0.090543674,"translation-fa2en_fa2en_sahife_bleu":0.1008380909},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.1397574972,"translation-ar2fa_ar2fa_sahife_bleu":0.1273211367,"translation-ar2fa_ar2fa_nahj_bleu":0.0658485892,"translation-ar2fa_ar2fa_quran_bleu":0.2224073202},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1160048958,"summarization_PnSummary_rougeL_recall":0.3980422927,"summarization_PnSummary_rougeL_f1_score":0.1751797476},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0652599666,"translation-fa2ar_fa2ar_nahj_bleu":0.0373134355,"translation-fa2ar_fa2ar_sahife_bleu":0.0688517527,"translation-fa2ar_fa2ar_quran_bleu":0.0896147118},"nlg_score":0.1810678527}
|
7 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.3141052553,"question-generation_PersianQA_rougeL_recall":0.4102615831,"question-generation_PersianQA_rougeL_f1_score":0.3441804021},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1718324934,"translation-en2fa_en2fa_epoque_bleu":0.364783925,"translation-en2fa_en2fa_mizan_bleu":0.1532613543,"translation-en2fa_en2fa_quran_bleu":0.1620975016,"translation-en2fa_en2fa_sahife_bleu":0.0967871625,"translation-en2fa_en2fa_nahj_bleu":0.0457580774,"translation-en2fa_en2fa_tep_bleu":0.05756103},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1808561992,"summarization_SamSUM-fa_rougeL_recall":0.414509553,"summarization_SamSUM-fa_rougeL_f1_score":0.2406998552},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0757086487,"translation-fa2en_fa2en_tep_bleu":0.0316922994,"translation-fa2en_fa2en_mizan_bleu":0.0530331645,"translation-fa2en_fa2en_quran_bleu":0.1028139165,"translation-fa2en_fa2en_epoque_bleu":0.157367237,"translation-fa2en_fa2en_nahj_bleu":0.0336372263,"translation-fa2en_fa2en_sahife_bleu":0.0279485156},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.15661924,"translation-ar2fa_ar2fa_sahife_bleu":0.1122809429,"translation-ar2fa_ar2fa_nahj_bleu":0.0629397909,"translation-ar2fa_ar2fa_quran_bleu":0.2899530138},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1228424623,"summarization_PnSummary_rougeL_recall":0.3750771332,"summarization_PnSummary_rougeL_f1_score":0.1793201723},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.079257203,"translation-fa2ar_fa2ar_nahj_bleu":0.0338415847,"translation-fa2ar_fa2ar_sahife_bleu":0.0570744002,"translation-fa2ar_fa2ar_quran_bleu":0.146855624},"nlg_score":0.178231145}
|
8 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1693490122,"question-generation_PersianQA_rougeL_recall":0.3886090827,"question-generation_PersianQA_rougeL_f1_score":0.227277052},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1799534349,"translation-en2fa_en2fa_epoque_bleu":0.4004213933,"translation-en2fa_en2fa_mizan_bleu":0.1703393716,"translation-en2fa_en2fa_quran_bleu":0.1225698669,"translation-en2fa_en2fa_sahife_bleu":0.0832764011,"translation-en2fa_en2fa_nahj_bleu":0.0439108113,"translation-en2fa_en2fa_tep_bleu":0.0595417592},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1538512444,"summarization_SamSUM-fa_rougeL_recall":0.3849531288,"summarization_SamSUM-fa_rougeL_f1_score":0.2115502707},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.2247897554,"translation-fa2en_fa2en_tep_bleu":0.1341840946,"translation-fa2en_fa2en_mizan_bleu":0.1909021288,"translation-fa2en_fa2en_quran_bleu":0.1740971535,"translation-fa2en_fa2en_epoque_bleu":0.4544315204,"translation-fa2en_fa2en_nahj_bleu":0.0877235615,"translation-fa2en_fa2en_sahife_bleu":0.0975791022},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.1471879954,"translation-ar2fa_ar2fa_sahife_bleu":0.1294214814,"translation-ar2fa_ar2fa_nahj_bleu":0.0642841927,"translation-ar2fa_ar2fa_quran_bleu":0.2437131219},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1188323392,"summarization_PnSummary_rougeL_recall":0.3948447809,"summarization_PnSummary_rougeL_f1_score":0.1786530476},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0761269879,"translation-fa2ar_fa2ar_nahj_bleu":0.0321440801,"translation-fa2ar_fa2ar_sahife_bleu":0.0613632957,"translation-fa2ar_fa2ar_quran_bleu":0.134873588},"nlg_score":0.1779340777}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.2281053588,"question-generation_PersianQA_rougeL_recall":0.370933314,"question-generation_PersianQA_rougeL_f1_score":0.273363418},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1623218856,"translation-en2fa_en2fa_epoque_bleu":0.3677879105,"translation-en2fa_en2fa_mizan_bleu":0.147599732,"translation-en2fa_en2fa_quran_bleu":0.0938457658,"translation-en2fa_en2fa_sahife_bleu":0.0698903005,"translation-en2fa_en2fa_nahj_bleu":0.0435129812,"translation-en2fa_en2fa_tep_bleu":0.0620337306},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1514618398,"summarization_SamSUM-fa_rougeL_recall":0.3683020708,"summarization_SamSUM-fa_rougeL_f1_score":0.2063212948},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.217991447,"translation-fa2en_fa2en_tep_bleu":0.1271542443,"translation-fa2en_fa2en_mizan_bleu":0.1728081337,"translation-fa2en_fa2en_quran_bleu":0.158860515,"translation-fa2en_fa2en_epoque_bleu":0.4572670962,"translation-fa2en_fa2en_nahj_bleu":0.0902445729,"translation-fa2en_fa2en_sahife_bleu":0.0945000287},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0985860814,"translation-ar2fa_ar2fa_sahife_bleu":0.0857687109,"translation-ar2fa_ar2fa_nahj_bleu":0.0622600203,"translation-ar2fa_ar2fa_quran_bleu":0.1459132099},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1089978489,"summarization_PnSummary_rougeL_recall":0.3936021933,"summarization_PnSummary_rougeL_f1_score":0.1662525669},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0507003531,"translation-fa2ar_fa2ar_nahj_bleu":0.0316047659,"translation-fa2ar_fa2ar_sahife_bleu":0.0534488007,"translation-fa2ar_fa2ar_quran_bleu":0.0670474926},"nlg_score":0.1679338638}
|
10 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1622159789,"question-generation_PersianQA_rougeL_recall":0.302597472,"question-generation_PersianQA_rougeL_f1_score":0.2021048057},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1831593088,"translation-en2fa_en2fa_epoque_bleu":0.4052150706,"translation-en2fa_en2fa_mizan_bleu":0.1692823494,"translation-en2fa_en2fa_quran_bleu":0.1400476579,"translation-en2fa_en2fa_sahife_bleu":0.0812805634,"translation-en2fa_en2fa_nahj_bleu":0.048146149,"translation-en2fa_en2fa_tep_bleu":0.0610881446},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.16175156,"summarization_SamSUM-fa_rougeL_recall":0.3477483743,"summarization_SamSUM-fa_rougeL_f1_score":0.209834706},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.2165819036,"translation-fa2en_fa2en_tep_bleu":0.13491043,"translation-fa2en_fa2en_mizan_bleu":0.1810957829,"translation-fa2en_fa2en_quran_bleu":0.164168601,"translation-fa2en_fa2en_epoque_bleu":0.4383628208,"translation-fa2en_fa2en_nahj_bleu":0.0942939662,"translation-fa2en_fa2en_sahife_bleu":0.0827637394},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.1315367808,"translation-ar2fa_ar2fa_sahife_bleu":0.1063921688,"translation-ar2fa_ar2fa_nahj_bleu":0.0642188893,"translation-ar2fa_ar2fa_quran_bleu":0.2206333896},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1120916238,"summarization_PnSummary_rougeL_recall":0.3610411286,"summarization_PnSummary_rougeL_f1_score":0.1660826543},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0568324844,"translation-fa2ar_fa2ar_nahj_bleu":0.03267488,"translation-fa2ar_fa2ar_sahife_bleu":0.0579381183,"translation-fa2ar_fa2ar_quran_bleu":0.0798844549},"nlg_score":0.1665903777}
|
11 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1546246184,"question-generation_PersianQA_rougeL_recall":0.253394795,"question-generation_PersianQA_rougeL_f1_score":0.1829113647},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1506934102,"translation-en2fa_en2fa_epoque_bleu":0.2951032905,"translation-en2fa_en2fa_mizan_bleu":0.1500681264,"translation-en2fa_en2fa_quran_bleu":0.1104277702,"translation-en2fa_en2fa_sahife_bleu":0.092222972,"translation-en2fa_en2fa_nahj_bleu":0.0497623005,"translation-en2fa_en2fa_tep_bleu":0.0692905167},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1944265929,"summarization_SamSUM-fa_rougeL_recall":0.3761499249,"summarization_SamSUM-fa_rougeL_f1_score":0.242617187},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.1892370035,"translation-fa2en_fa2en_tep_bleu":0.1290684643,"translation-fa2en_fa2en_mizan_bleu":0.1721408901,"translation-fa2en_fa2en_quran_bleu":0.1736791408,"translation-fa2en_fa2en_epoque_bleu":0.346100597,"translation-fa2en_fa2en_nahj_bleu":0.0776400174,"translation-fa2en_fa2en_sahife_bleu":0.08279759},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.1144863268,"translation-ar2fa_ar2fa_sahife_bleu":0.1190971594,"translation-ar2fa_ar2fa_nahj_bleu":0.0648109303,"translation-ar2fa_ar2fa_quran_bleu":0.157067121},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1298447221,"summarization_PnSummary_rougeL_recall":0.3548911672,"summarization_PnSummary_rougeL_f1_score":0.1841564462},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0852951832,"translation-fa2ar_fa2ar_nahj_bleu":0.0464072569,"translation-fa2ar_fa2ar_sahife_bleu":0.0713426227,"translation-fa2ar_fa2ar_quran_bleu":0.1381356701},"nlg_score":0.1641995602}
|
12 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1996840686,"question-generation_PersianQA_rougeL_recall":0.3393114266,"question-generation_PersianQA_rougeL_f1_score":0.2417040176},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1617787549,"translation-en2fa_en2fa_epoque_bleu":0.3821529147,"translation-en2fa_en2fa_mizan_bleu":0.1337537913,"translation-en2fa_en2fa_quran_bleu":0.0860909143,"translation-en2fa_en2fa_sahife_bleu":0.0770506908,"translation-en2fa_en2fa_nahj_bleu":0.0441728515,"translation-en2fa_en2fa_tep_bleu":0.0587014819},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1523824175,"summarization_SamSUM-fa_rougeL_recall":0.3838683519,"summarization_SamSUM-fa_rougeL_f1_score":0.2083553767},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.2177785793,"translation-fa2en_fa2en_tep_bleu":0.1189948472,"translation-fa2en_fa2en_mizan_bleu":0.1793626928,"translation-fa2en_fa2en_quran_bleu":0.1718006478,"translation-fa2en_fa2en_epoque_bleu":0.4500382308,"translation-fa2en_fa2en_nahj_bleu":0.0836776138,"translation-fa2en_fa2en_sahife_bleu":0.1034067477},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.090408759,"translation-ar2fa_ar2fa_sahife_bleu":0.0778953352,"translation-ar2fa_ar2fa_nahj_bleu":0.0610049198,"translation-ar2fa_ar2fa_quran_bleu":0.13085583},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1212751301,"summarization_PnSummary_rougeL_recall":0.3923323141,"summarization_PnSummary_rougeL_f1_score":0.1804727387},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0483297895,"translation-fa2ar_fa2ar_nahj_bleu":0.0310247441,"translation-fa2ar_fa2ar_sahife_bleu":0.0512375201,"translation-fa2ar_fa2ar_quran_bleu":0.0627271043},"nlg_score":0.164118288}
|
13 |
+
{"Model Name":"Qwen3-14B","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1987198912,"question-generation_PersianQA_rougeL_recall":0.3431437262,"question-generation_PersianQA_rougeL_f1_score":0.2419384398},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1535253787,"translation-en2fa_en2fa_epoque_bleu":0.3553678809,"translation-en2fa_en2fa_mizan_bleu":0.1285441922,"translation-en2fa_en2fa_quran_bleu":0.0857809616,"translation-en2fa_en2fa_sahife_bleu":0.0787025343,"translation-en2fa_en2fa_nahj_bleu":0.0404850935,"translation-en2fa_en2fa_tep_bleu":0.0586129062},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1469468837,"summarization_SamSUM-fa_rougeL_recall":0.3743807014,"summarization_SamSUM-fa_rougeL_f1_score":0.2022859929},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.2145488085,"translation-fa2en_fa2en_tep_bleu":0.1307272464,"translation-fa2en_fa2en_mizan_bleu":0.1697754862,"translation-fa2en_fa2en_quran_bleu":0.1552415558,"translation-fa2en_fa2en_epoque_bleu":0.4513682579,"translation-fa2en_fa2en_nahj_bleu":0.0842673472,"translation-fa2en_fa2en_sahife_bleu":0.0853787118},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0910450298,"translation-ar2fa_ar2fa_sahife_bleu":0.0862679894,"translation-ar2fa_ar2fa_nahj_bleu":0.0558129824,"translation-ar2fa_ar2fa_quran_bleu":0.1292925153},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1123870374,"summarization_PnSummary_rougeL_recall":0.4032007327,"summarization_PnSummary_rougeL_f1_score":0.17115848},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0494411806,"translation-fa2ar_fa2ar_nahj_bleu":0.0369805868,"translation-fa2ar_fa2ar_sahife_bleu":0.0567654991,"translation-fa2ar_fa2ar_quran_bleu":0.0545774559},"nlg_score":0.16056333}
|
14 |
+
{"Model Name":"Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1965366702,"question-generation_PersianQA_rougeL_recall":0.340760284,"question-generation_PersianQA_rougeL_f1_score":0.2388923895},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1550276898,"translation-en2fa_en2fa_epoque_bleu":0.3721582216,"translation-en2fa_en2fa_mizan_bleu":0.1231599039,"translation-en2fa_en2fa_quran_bleu":0.0882213453,"translation-en2fa_en2fa_sahife_bleu":0.0725213197,"translation-en2fa_en2fa_nahj_bleu":0.0424186358,"translation-en2fa_en2fa_tep_bleu":0.0528718634},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1463365551,"summarization_SamSUM-fa_rougeL_recall":0.3856017289,"summarization_SamSUM-fa_rougeL_f1_score":0.2024070197},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.2024225184,"translation-fa2en_fa2en_tep_bleu":0.1163127945,"translation-fa2en_fa2en_mizan_bleu":0.1649009947,"translation-fa2en_fa2en_quran_bleu":0.1513328968,"translation-fa2en_fa2en_epoque_bleu":0.4171232399,"translation-fa2en_fa2en_nahj_bleu":0.0857999462,"translation-fa2en_fa2en_sahife_bleu":0.0929479364},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0821020713,"translation-ar2fa_ar2fa_sahife_bleu":0.0730469461,"translation-ar2fa_ar2fa_nahj_bleu":0.0579031327,"translation-ar2fa_ar2fa_quran_bleu":0.1141461882},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.109255822,"summarization_PnSummary_rougeL_recall":0.3979273385,"summarization_PnSummary_rougeL_f1_score":0.1669061111},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0423318046,"translation-fa2ar_fa2ar_nahj_bleu":0.0329089717,"translation-fa2ar_fa2ar_sahife_bleu":0.0445101244,"translation-fa2ar_fa2ar_quran_bleu":0.0495763178},"nlg_score":0.1557270864}
|
15 |
+
{"Model Name":"Qwen3-4B","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1744197112,"question-generation_PersianQA_rougeL_recall":0.2697024508,"question-generation_PersianQA_rougeL_f1_score":0.2017710943},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1217211215,"translation-en2fa_en2fa_epoque_bleu":0.2916268514,"translation-en2fa_en2fa_mizan_bleu":0.091925603,"translation-en2fa_en2fa_quran_bleu":0.065498518,"translation-en2fa_en2fa_sahife_bleu":0.0612237455,"translation-en2fa_en2fa_nahj_bleu":0.0385824628,"translation-en2fa_en2fa_tep_bleu":0.0453883692},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1429609514,"summarization_SamSUM-fa_rougeL_recall":0.397717388,"summarization_SamSUM-fa_rougeL_f1_score":0.2013136641},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.1840809218,"translation-fa2en_fa2en_tep_bleu":0.1011436783,"translation-fa2en_fa2en_mizan_bleu":0.149157222,"translation-fa2en_fa2en_quran_bleu":0.1377761662,"translation-fa2en_fa2en_epoque_bleu":0.3802946233,"translation-fa2en_fa2en_nahj_bleu":0.0851756367,"translation-fa2en_fa2en_sahife_bleu":0.0857201524},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0636385541,"translation-ar2fa_ar2fa_sahife_bleu":0.0557180428,"translation-ar2fa_ar2fa_nahj_bleu":0.0539968488,"translation-ar2fa_ar2fa_quran_bleu":0.0807186853},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1067208324,"summarization_PnSummary_rougeL_recall":0.4109136551,"summarization_PnSummary_rougeL_f1_score":0.1648475797},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0351351131,"translation-fa2ar_fa2ar_nahj_bleu":0.0313503027,"translation-fa2ar_fa2ar_sahife_bleu":0.042075565,"translation-fa2ar_fa2ar_quran_bleu":0.0319794715},"nlg_score":0.1389297212}
|
16 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1520819517,"question-generation_PersianQA_rougeL_recall":0.26324767,"question-generation_PersianQA_rougeL_f1_score":0.1843401988},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1458447378,"translation-en2fa_en2fa_epoque_bleu":0.3541508677,"translation-en2fa_en2fa_mizan_bleu":0.1259468635,"translation-en2fa_en2fa_quran_bleu":0.0887225632,"translation-en2fa_en2fa_sahife_bleu":0.0672732746,"translation-en2fa_en2fa_nahj_bleu":0.0407327793,"translation-en2fa_en2fa_tep_bleu":0.0293172873},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.147286408,"summarization_SamSUM-fa_rougeL_recall":0.4066657958,"summarization_SamSUM-fa_rougeL_f1_score":0.2072278176},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.1451163884,"translation-fa2en_fa2en_tep_bleu":0.0393307601,"translation-fa2en_fa2en_mizan_bleu":0.1009347025,"translation-fa2en_fa2en_quran_bleu":0.0929688918,"translation-fa2en_fa2en_epoque_bleu":0.3660914464,"translation-fa2en_fa2en_nahj_bleu":0.0536507876,"translation-fa2en_fa2en_sahife_bleu":0.05038339},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0594554675,"translation-ar2fa_ar2fa_sahife_bleu":0.0539986603,"translation-ar2fa_ar2fa_nahj_bleu":0.035240584,"translation-ar2fa_ar2fa_quran_bleu":0.0879164142},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1029257145,"summarization_PnSummary_rougeL_recall":0.4347811424,"summarization_PnSummary_rougeL_f1_score":0.1621438757},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0192357288,"translation-fa2ar_fa2ar_nahj_bleu":0.0151369319,"translation-fa2ar_fa2ar_sahife_bleu":0.0245784397,"translation-fa2ar_fa2ar_quran_bleu":0.0179918148},"nlg_score":0.1319091735}
|
17 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.2572991833,"question-generation_PersianQA_rougeL_recall":0.3740225235,"question-generation_PersianQA_rougeL_f1_score":0.2927586837},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.08817485,"translation-en2fa_en2fa_epoque_bleu":0.1886801725,"translation-en2fa_en2fa_mizan_bleu":0.0879987558,"translation-en2fa_en2fa_quran_bleu":0.0657922023,"translation-en2fa_en2fa_sahife_bleu":0.0296141618,"translation-en2fa_en2fa_nahj_bleu":0.0192266597,"translation-en2fa_en2fa_tep_bleu":0.0366296874},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1658145118,"summarization_SamSUM-fa_rougeL_recall":0.3677760479,"summarization_SamSUM-fa_rougeL_f1_score":0.2189237562},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0257184881,"translation-fa2en_fa2en_tep_bleu":0.011593122,"translation-fa2en_fa2en_mizan_bleu":0.0215328963,"translation-fa2en_fa2en_quran_bleu":0.0262056878,"translation-fa2en_fa2en_epoque_bleu":0.047221295,"translation-fa2en_fa2en_nahj_bleu":0.0178557856,"translation-fa2en_fa2en_sahife_bleu":0.0169922826},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.032619077,"translation-ar2fa_ar2fa_sahife_bleu":0.0333185867,"translation-ar2fa_ar2fa_nahj_bleu":0.0106299838,"translation-ar2fa_ar2fa_quran_bleu":0.0528092057},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1156871575,"summarization_PnSummary_rougeL_recall":0.3630716995,"summarization_PnSummary_rougeL_f1_score":0.1697348346},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0098333287,"translation-fa2ar_fa2ar_nahj_bleu":0.0072190824,"translation-fa2ar_fa2ar_sahife_bleu":0.0110570977,"translation-fa2ar_fa2ar_quran_bleu":0.0112238061},"nlg_score":0.1196804312}
|
18 |
+
{"Model Name":"aya-expanse-32b","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.0980160864,"question-generation_PersianQA_rougeL_recall":0.347983913,"question-generation_PersianQA_rougeL_f1_score":0.1443872083},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0951102411,"translation-en2fa_en2fa_epoque_bleu":0.2204131973,"translation-en2fa_en2fa_mizan_bleu":0.0772021612,"translation-en2fa_en2fa_quran_bleu":0.0914129011,"translation-en2fa_en2fa_sahife_bleu":0.0555605793,"translation-en2fa_en2fa_nahj_bleu":0.0296371925,"translation-en2fa_en2fa_tep_bleu":0.0145962694},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1338082958,"summarization_SamSUM-fa_rougeL_recall":0.397938928,"summarization_SamSUM-fa_rougeL_f1_score":0.1933390916},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.14443262,"translation-fa2en_fa2en_tep_bleu":0.0636878051,"translation-fa2en_fa2en_mizan_bleu":0.1045784226,"translation-fa2en_fa2en_quran_bleu":0.1065169191,"translation-fa2en_fa2en_epoque_bleu":0.3331896819,"translation-fa2en_fa2en_nahj_bleu":0.0573420672,"translation-fa2en_fa2en_sahife_bleu":0.0526154809},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0608470749,"translation-ar2fa_ar2fa_sahife_bleu":0.0636783644,"translation-ar2fa_ar2fa_nahj_bleu":0.0258604511,"translation-ar2fa_ar2fa_quran_bleu":0.091253078},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1094933227,"summarization_PnSummary_rougeL_recall":0.3979476995,"summarization_PnSummary_rougeL_f1_score":0.1674664883},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0318976507,"translation-fa2ar_fa2ar_nahj_bleu":0.0222927973,"translation-fa2ar_fa2ar_sahife_bleu":0.0296757253,"translation-fa2ar_fa2ar_quran_bleu":0.0437244293},"nlg_score":0.1196400535}
|
19 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1264186031,"question-generation_PersianQA_rougeL_recall":0.2582953109,"question-generation_PersianQA_rougeL_f1_score":0.1600835412},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1074044673,"translation-en2fa_en2fa_epoque_bleu":0.2689676347,"translation-en2fa_en2fa_mizan_bleu":0.0784179406,"translation-en2fa_en2fa_quran_bleu":0.0573255404,"translation-en2fa_en2fa_sahife_bleu":0.0534655564,"translation-en2fa_en2fa_nahj_bleu":0.0373749355,"translation-en2fa_en2fa_tep_bleu":0.0279497965},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1385750553,"summarization_SamSUM-fa_rougeL_recall":0.3133561002,"summarization_SamSUM-fa_rougeL_f1_score":0.1819150852},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0862123314,"translation-fa2en_fa2en_tep_bleu":0.0334491641,"translation-fa2en_fa2en_mizan_bleu":0.0758837027,"translation-fa2en_fa2en_quran_bleu":0.0892296624,"translation-fa2en_fa2en_epoque_bleu":0.1688644918,"translation-fa2en_fa2en_nahj_bleu":0.042819328,"translation-fa2en_fa2en_sahife_bleu":0.0473482715},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0381647164,"translation-ar2fa_ar2fa_sahife_bleu":0.0517672982,"translation-ar2fa_ar2fa_nahj_bleu":0.0235396776,"translation-ar2fa_ar2fa_quran_bleu":0.0384559215},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.151465786,"summarization_PnSummary_rougeL_recall":0.3775823327,"summarization_PnSummary_rougeL_f1_score":0.203395452},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0136530376,"translation-fa2ar_fa2ar_nahj_bleu":0.0110489285,"translation-fa2ar_fa2ar_sahife_bleu":0.0135009036,"translation-fa2ar_fa2ar_quran_bleu":0.0164092807},"nlg_score":0.1129755187}
|
20 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.0870939736,"question-generation_PersianQA_rougeL_recall":0.3600941065,"question-generation_PersianQA_rougeL_f1_score":0.1336375958},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.056370201,"translation-en2fa_en2fa_epoque_bleu":0.1154392548,"translation-en2fa_en2fa_mizan_bleu":0.0484324583,"translation-en2fa_en2fa_quran_bleu":0.0612465488,"translation-en2fa_en2fa_sahife_bleu":0.0466818991,"translation-en2fa_en2fa_nahj_bleu":0.0218444477,"translation-en2fa_en2fa_tep_bleu":0.0118186665},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1772724525,"summarization_SamSUM-fa_rougeL_recall":0.341583677,"summarization_SamSUM-fa_rougeL_f1_score":0.2233271064},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0691353117,"translation-fa2en_fa2en_tep_bleu":0.0320908261,"translation-fa2en_fa2en_mizan_bleu":0.0535229905,"translation-fa2en_fa2en_quran_bleu":0.0800143919,"translation-fa2en_fa2en_epoque_bleu":0.133977443,"translation-fa2en_fa2en_nahj_bleu":0.0362958954,"translation-fa2en_fa2en_sahife_bleu":0.0393317574},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0581992714,"translation-ar2fa_ar2fa_sahife_bleu":0.0540221076,"translation-ar2fa_ar2fa_nahj_bleu":0.0233017704,"translation-ar2fa_ar2fa_quran_bleu":0.095529061},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.132916538,"summarization_PnSummary_rougeL_recall":0.3579358655,"summarization_PnSummary_rougeL_f1_score":0.1887379797},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0331262129,"translation-fa2ar_fa2ar_nahj_bleu":0.0202107323,"translation-fa2ar_fa2ar_sahife_bleu":0.0280883311,"translation-fa2ar_fa2ar_quran_bleu":0.0510795752},"nlg_score":0.1089333827}
|
21 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.2041596361,"question-generation_PersianQA_rougeL_recall":0.3456815337,"question-generation_PersianQA_rougeL_f1_score":0.2459732807},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0439502467,"translation-en2fa_en2fa_epoque_bleu":0.0932804064,"translation-en2fa_en2fa_mizan_bleu":0.0446467932,"translation-en2fa_en2fa_quran_bleu":0.0435800727,"translation-en2fa_en2fa_sahife_bleu":0.0197005921,"translation-en2fa_en2fa_nahj_bleu":0.0132822652,"translation-en2fa_en2fa_tep_bleu":0.0087342692},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1588367988,"summarization_SamSUM-fa_rougeL_recall":0.3735722635,"summarization_SamSUM-fa_rougeL_f1_score":0.2131671502},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0199585579,"translation-fa2en_fa2en_tep_bleu":0.0097804397,"translation-fa2en_fa2en_mizan_bleu":0.0144809896,"translation-fa2en_fa2en_quran_bleu":0.0259691427,"translation-fa2en_fa2en_epoque_bleu":0.0345304173,"translation-fa2en_fa2en_nahj_bleu":0.0150589625,"translation-fa2en_fa2en_sahife_bleu":0.0157047184},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0465792583,"translation-ar2fa_ar2fa_sahife_bleu":0.023795336,"translation-ar2fa_ar2fa_nahj_bleu":0.0121091058,"translation-ar2fa_ar2fa_quran_bleu":0.1021098256},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1124574222,"summarization_PnSummary_rougeL_recall":0.3717393409,"summarization_PnSummary_rougeL_f1_score":0.1673025553},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0100630648,"translation-fa2ar_fa2ar_nahj_bleu":0.0071647909,"translation-fa2ar_fa2ar_sahife_bleu":0.0101185743,"translation-fa2ar_fa2ar_quran_bleu":0.0129058292},"nlg_score":0.1067134448}
|
22 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1704020873,"question-generation_PersianQA_rougeL_recall":0.3000756202,"question-generation_PersianQA_rougeL_f1_score":0.2079039891},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0366912467,"translation-en2fa_en2fa_epoque_bleu":0.0623359898,"translation-en2fa_en2fa_mizan_bleu":0.0442763597,"translation-en2fa_en2fa_quran_bleu":0.0309309044,"translation-en2fa_en2fa_sahife_bleu":0.0330663757,"translation-en2fa_en2fa_nahj_bleu":0.0124767847,"translation-en2fa_en2fa_tep_bleu":0.0116612774},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1638274716,"summarization_SamSUM-fa_rougeL_recall":0.3535878882,"summarization_SamSUM-fa_rougeL_f1_score":0.2134854664},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.016856047,"translation-fa2en_fa2en_tep_bleu":0.0085125001,"translation-fa2en_fa2en_mizan_bleu":0.013661635,"translation-fa2en_fa2en_quran_bleu":0.0181666202,"translation-fa2en_fa2en_epoque_bleu":0.0301282339,"translation-fa2en_fa2en_nahj_bleu":0.0122360126,"translation-fa2en_fa2en_sahife_bleu":0.0110323989},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0186923531,"translation-ar2fa_ar2fa_sahife_bleu":0.0174521967,"translation-ar2fa_ar2fa_nahj_bleu":0.0097734226,"translation-ar2fa_ar2fa_quran_bleu":0.0284054936},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1095844839,"summarization_PnSummary_rougeL_recall":0.3735331299,"summarization_PnSummary_rougeL_f1_score":0.1645385252},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0067928767,"translation-fa2ar_fa2ar_nahj_bleu":0.0056689454,"translation-fa2ar_fa2ar_sahife_bleu":0.009024465,"translation-fa2ar_fa2ar_quran_bleu":0.0056852198},"nlg_score":0.0949943578}
|
23 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1127092702,"question-generation_PersianQA_rougeL_recall":0.2982763168,"question-generation_PersianQA_rougeL_f1_score":0.1525970768},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0472831089,"translation-en2fa_en2fa_epoque_bleu":0.0950858392,"translation-en2fa_en2fa_mizan_bleu":0.0348348322,"translation-en2fa_en2fa_quran_bleu":0.0417444578,"translation-en2fa_en2fa_sahife_bleu":0.044168541,"translation-en2fa_en2fa_nahj_bleu":0.0239185439,"translation-en2fa_en2fa_tep_bleu":0.0188699837},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1180795687,"summarization_SamSUM-fa_rougeL_recall":0.3922712004,"summarization_SamSUM-fa_rougeL_f1_score":0.170765794},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0901939948,"translation-fa2en_fa2en_tep_bleu":0.0521908916,"translation-fa2en_fa2en_mizan_bleu":0.0828690879,"translation-fa2en_fa2en_quran_bleu":0.0756298248,"translation-fa2en_fa2en_epoque_bleu":0.1645619674,"translation-fa2en_fa2en_nahj_bleu":0.048616237,"translation-fa2en_fa2en_sahife_bleu":0.0518842318},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0352516229,"translation-ar2fa_ar2fa_sahife_bleu":0.031818336,"translation-ar2fa_ar2fa_nahj_bleu":0.0219225394,"translation-ar2fa_ar2fa_quran_bleu":0.0513475391},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.0921640152,"summarization_PnSummary_rougeL_recall":0.4401953868,"summarization_PnSummary_rougeL_f1_score":0.1480945013},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0167121698,"translation-fa2ar_fa2ar_nahj_bleu":0.0182214992,"translation-fa2ar_fa2ar_sahife_bleu":0.0203567578,"translation-fa2ar_fa2ar_quran_bleu":0.0115582526},"nlg_score":0.0944140383}
|
24 |
+
{"Model Name":"deepseek-chat","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1939037413,"question-generation_PersianQA_rougeL_recall":0.4070822245,"question-generation_PersianQA_rougeL_f1_score":0.2439578999},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0438887609,"translation-en2fa_en2fa_epoque_bleu":0.0714603918,"translation-en2fa_en2fa_mizan_bleu":0.0595250407,"translation-en2fa_en2fa_quran_bleu":0.0428487402,"translation-en2fa_en2fa_sahife_bleu":0.0258372032,"translation-en2fa_en2fa_nahj_bleu":0.0133722454,"translation-en2fa_en2fa_tep_bleu":0.0142899909},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.0893319419,"summarization_SamSUM-fa_rougeL_recall":0.3701712252,"summarization_SamSUM-fa_rougeL_f1_score":0.1392333016},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0450244679,"translation-fa2en_fa2en_tep_bleu":0.0166138322,"translation-fa2en_fa2en_mizan_bleu":0.0478141187,"translation-fa2en_fa2en_quran_bleu":0.0426202225,"translation-fa2en_fa2en_epoque_bleu":0.0802277942,"translation-fa2en_fa2en_nahj_bleu":0.0252662094,"translation-fa2en_fa2en_sahife_bleu":0.0268950031},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0302818033,"translation-ar2fa_ar2fa_sahife_bleu":0.0272381325,"translation-ar2fa_ar2fa_nahj_bleu":0.0129029913,"translation-ar2fa_ar2fa_quran_bleu":0.0498353456},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.0811186656,"summarization_PnSummary_rougeL_recall":0.3940089293,"summarization_PnSummary_rougeL_f1_score":0.1316106196},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0198691873,"translation-fa2ar_fa2ar_nahj_bleu":0.0113771734,"translation-fa2ar_fa2ar_sahife_bleu":0.0154846482,"translation-fa2ar_fa2ar_quran_bleu":0.0327457404},"nlg_score":0.0934094344}
|
25 |
+
{"Model Name":"deepseek-reasoner","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1342253144,"question-generation_PersianQA_rougeL_recall":0.4100317735,"question-generation_PersianQA_rougeL_f1_score":0.18410589},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0409401575,"translation-en2fa_en2fa_epoque_bleu":0.0902479461,"translation-en2fa_en2fa_mizan_bleu":0.0327725294,"translation-en2fa_en2fa_quran_bleu":0.0443958388,"translation-en2fa_en2fa_sahife_bleu":0.0278897851,"translation-en2fa_en2fa_nahj_bleu":0.0148027555,"translation-en2fa_en2fa_tep_bleu":0.0071499459},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1081719649,"summarization_SamSUM-fa_rougeL_recall":0.3726803698,"summarization_SamSUM-fa_rougeL_f1_score":0.1606804283},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0414094379,"translation-fa2en_fa2en_tep_bleu":0.019539618,"translation-fa2en_fa2en_mizan_bleu":0.0346087447,"translation-fa2en_fa2en_quran_bleu":0.0396858881,"translation-fa2en_fa2en_epoque_bleu":0.0798341141,"translation-fa2en_fa2en_nahj_bleu":0.0244191809,"translation-fa2en_fa2en_sahife_bleu":0.0231626908},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0334933514,"translation-ar2fa_ar2fa_sahife_bleu":0.0313812328,"translation-ar2fa_ar2fa_nahj_bleu":0.013862611,"translation-ar2fa_ar2fa_quran_bleu":0.0542546733},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.0849469928,"summarization_PnSummary_rougeL_recall":0.3820724231,"summarization_PnSummary_rougeL_f1_score":0.1359575611},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0198485582,"translation-fa2ar_fa2ar_nahj_bleu":0.0111873845,"translation-fa2ar_fa2ar_sahife_bleu":0.015856468,"translation-fa2ar_fa2ar_quran_bleu":0.032501822},"nlg_score":0.0880621978}
|
26 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.0567952998,"question-generation_PersianQA_rougeL_recall":0.2105979358,"question-generation_PersianQA_rougeL_f1_score":0.0793499521},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0373710986,"translation-en2fa_en2fa_epoque_bleu":0.0773774592,"translation-en2fa_en2fa_mizan_bleu":0.034234366,"translation-en2fa_en2fa_quran_bleu":0.0258474786,"translation-en2fa_en2fa_sahife_bleu":0.0240302635,"translation-en2fa_en2fa_nahj_bleu":0.0149718554,"translation-en2fa_en2fa_tep_bleu":0.0146400693},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1565749742,"summarization_SamSUM-fa_rougeL_recall":0.2642298658,"summarization_SamSUM-fa_rougeL_f1_score":0.1759907012},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0423299736,"translation-fa2en_fa2en_tep_bleu":0.0124774953,"translation-fa2en_fa2en_mizan_bleu":0.0314077643,"translation-fa2en_fa2en_quran_bleu":0.0294898862,"translation-fa2en_fa2en_epoque_bleu":0.1006673489,"translation-fa2en_fa2en_nahj_bleu":0.0117672852,"translation-fa2en_fa2en_sahife_bleu":0.0246608556},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0056487667,"translation-ar2fa_ar2fa_sahife_bleu":0.0084650778,"translation-ar2fa_ar2fa_nahj_bleu":0.0073044047,"translation-ar2fa_ar2fa_quran_bleu":0.0012595996},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.2061378815,"summarization_PnSummary_rougeL_recall":0.2755376589,"summarization_PnSummary_rougeL_f1_score":0.2192316506},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0164489799,"translation-fa2ar_fa2ar_nahj_bleu":0.0152537955,"translation-fa2ar_fa2ar_sahife_bleu":0.0220286512,"translation-fa2ar_fa2ar_quran_bleu":0.012064493},"nlg_score":0.0823387318}
|
27 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.045673941,"question-generation_PersianQA_rougeL_recall":0.0991932753,"question-generation_PersianQA_rougeL_f1_score":0.0576169145},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0460704645,"translation-en2fa_en2fa_epoque_bleu":0.1309820272,"translation-en2fa_en2fa_mizan_bleu":0.0316650952,"translation-en2fa_en2fa_quran_bleu":0.0134401079,"translation-en2fa_en2fa_sahife_bleu":0.0141114981,"translation-en2fa_en2fa_nahj_bleu":0.0127654414,"translation-en2fa_en2fa_tep_bleu":0.0065463218},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1340334866,"summarization_SamSUM-fa_rougeL_recall":0.3184206946,"summarization_SamSUM-fa_rougeL_f1_score":0.179098961},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0146059874,"translation-fa2en_fa2en_tep_bleu":0.0065306354,"translation-fa2en_fa2en_mizan_bleu":0.0119363121,"translation-fa2en_fa2en_quran_bleu":0.0152281808,"translation-fa2en_fa2en_epoque_bleu":0.0274143056,"translation-fa2en_fa2en_nahj_bleu":0.0094070307,"translation-fa2en_fa2en_sahife_bleu":0.0093811964},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.01007549,"translation-ar2fa_ar2fa_sahife_bleu":0.0116017776,"translation-ar2fa_ar2fa_nahj_bleu":0.0067782437,"translation-ar2fa_ar2fa_quran_bleu":0.0116815864},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.111370713,"summarization_PnSummary_rougeL_recall":0.3732014316,"summarization_PnSummary_rougeL_f1_score":0.1661125342},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0045158138,"translation-fa2ar_fa2ar_nahj_bleu":0.004600061,"translation-fa2ar_fa2ar_sahife_bleu":0.0052362431,"translation-fa2ar_fa2ar_quran_bleu":0.0037111373},"nlg_score":0.0682994522}
|
28 |
+
{"Model Name":"o4-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":null,"question-generation_PersianQA_rougeL_recall":null,"question-generation_PersianQA_rougeL_f1_score":null},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":null,"translation-en2fa_en2fa_epoque_bleu":null,"translation-en2fa_en2fa_mizan_bleu":null,"translation-en2fa_en2fa_quran_bleu":null,"translation-en2fa_en2fa_sahife_bleu":null,"translation-en2fa_en2fa_nahj_bleu":null,"translation-en2fa_en2fa_tep_bleu":null},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":null,"summarization_SamSUM-fa_rougeL_recall":null,"summarization_SamSUM-fa_rougeL_f1_score":null},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":null,"translation-fa2en_fa2en_tep_bleu":null,"translation-fa2en_fa2en_mizan_bleu":null,"translation-fa2en_fa2en_quran_bleu":null,"translation-fa2en_fa2en_epoque_bleu":null,"translation-fa2en_fa2en_nahj_bleu":null,"translation-fa2en_fa2en_sahife_bleu":null},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":null,"translation-ar2fa_ar2fa_sahife_bleu":null,"translation-ar2fa_ar2fa_nahj_bleu":null,"translation-ar2fa_ar2fa_quran_bleu":null},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":null,"summarization_PnSummary_rougeL_recall":null,"summarization_PnSummary_rougeL_f1_score":null},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":null,"translation-fa2ar_fa2ar_nahj_bleu":null,"translation-fa2ar_fa2ar_sahife_bleu":null,"translation-fa2ar_fa2ar_quran_bleu":null},"nlg_score":null}
|
29 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":null,"question-generation_PersianQA_rougeL_recall":null,"question-generation_PersianQA_rougeL_f1_score":null},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":null,"translation-en2fa_en2fa_epoque_bleu":null,"translation-en2fa_en2fa_mizan_bleu":null,"translation-en2fa_en2fa_quran_bleu":null,"translation-en2fa_en2fa_sahife_bleu":null,"translation-en2fa_en2fa_nahj_bleu":null,"translation-en2fa_en2fa_tep_bleu":null},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":null,"summarization_SamSUM-fa_rougeL_recall":null,"summarization_SamSUM-fa_rougeL_f1_score":null},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":null,"translation-fa2en_fa2en_tep_bleu":null,"translation-fa2en_fa2en_mizan_bleu":null,"translation-fa2en_fa2en_quran_bleu":null,"translation-fa2en_fa2en_epoque_bleu":null,"translation-fa2en_fa2en_nahj_bleu":null,"translation-fa2en_fa2en_sahife_bleu":null},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":null,"translation-ar2fa_ar2fa_sahife_bleu":null,"translation-ar2fa_ar2fa_nahj_bleu":null,"translation-ar2fa_ar2fa_quran_bleu":null},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":null,"summarization_PnSummary_rougeL_recall":null,"summarization_PnSummary_rougeL_f1_score":null},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":null,"translation-fa2ar_fa2ar_nahj_bleu":null,"translation-fa2ar_fa2ar_sahife_bleu":null,"translation-fa2ar_fa2ar_quran_bleu":null},"nlg_score":null}
|
leaderboard/boards_data/persian_nlu.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
leaderboard/boards_data/question-generation_PersianQA.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1693490122,"question-generation_PersianQA_rougeL_recall":0.3886090827,"question-generation_PersianQA_rougeL_f1_score":0.227277052,"nlg_score":0.1779340777}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1704020873,"question-generation_PersianQA_rougeL_recall":0.3000756202,"question-generation_PersianQA_rougeL_f1_score":0.2079039891,"nlg_score":0.0949943578}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2149535143,"question-generation_PersianQA_rougeL_recall":0.3019561885,"question-generation_PersianQA_rougeL_f1_score":0.2405115465,"nlg_score":0.1880477876}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2576021626,"question-generation_PersianQA_rougeL_recall":0.3924501003,"question-generation_PersianQA_rougeL_f1_score":0.2985826349,"nlg_score":0.194675133}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":null,"question-generation_PersianQA_rougeL_recall":null,"question-generation_PersianQA_rougeL_f1_score":null,"nlg_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2572991833,"question-generation_PersianQA_rougeL_recall":0.3740225235,"question-generation_PersianQA_rougeL_f1_score":0.2927586837,"nlg_score":0.1196804312}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2041596361,"question-generation_PersianQA_rougeL_recall":0.3456815337,"question-generation_PersianQA_rougeL_f1_score":0.2459732807,"nlg_score":0.1067134448}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1987198912,"question-generation_PersianQA_rougeL_recall":0.3431437262,"question-generation_PersianQA_rougeL_f1_score":0.2419384398,"nlg_score":0.16056333}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2281053588,"question-generation_PersianQA_rougeL_recall":0.370933314,"question-generation_PersianQA_rougeL_f1_score":0.273363418,"nlg_score":0.1679338638}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.0870939736,"question-generation_PersianQA_rougeL_recall":0.3600941065,"question-generation_PersianQA_rougeL_f1_score":0.1336375958,"nlg_score":0.1089333827}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1520819517,"question-generation_PersianQA_rougeL_recall":0.26324767,"question-generation_PersianQA_rougeL_f1_score":0.1843401988,"nlg_score":0.1319091735}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1939037413,"question-generation_PersianQA_rougeL_recall":0.4070822245,"question-generation_PersianQA_rougeL_f1_score":0.2439578999,"nlg_score":0.0934094344}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1744197112,"question-generation_PersianQA_rougeL_recall":0.2697024508,"question-generation_PersianQA_rougeL_f1_score":0.2017710943,"nlg_score":0.1389297212}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.045673941,"question-generation_PersianQA_rougeL_recall":0.0991932753,"question-generation_PersianQA_rougeL_f1_score":0.0576169145,"nlg_score":0.0682994522}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.0980160864,"question-generation_PersianQA_rougeL_recall":0.347983913,"question-generation_PersianQA_rougeL_f1_score":0.1443872083,"nlg_score":0.1196400535}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2447184183,"question-generation_PersianQA_rougeL_recall":0.3388367288,"question-generation_PersianQA_rougeL_f1_score":0.269297654,"nlg_score":0.2010896964}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2438951227,"question-generation_PersianQA_rougeL_recall":0.3687301621,"question-generation_PersianQA_rougeL_f1_score":0.2816187853,"nlg_score":0.1901206806}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1942536013,"question-generation_PersianQA_rougeL_recall":0.3435531442,"question-generation_PersianQA_rougeL_f1_score":0.2369359061,"nlg_score":0.1810678527}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":null,"question-generation_PersianQA_rougeL_recall":null,"question-generation_PersianQA_rougeL_f1_score":null,"nlg_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.3141052553,"question-generation_PersianQA_rougeL_recall":0.4102615831,"question-generation_PersianQA_rougeL_f1_score":0.3441804021,"nlg_score":0.178231145}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1546246184,"question-generation_PersianQA_rougeL_recall":0.253394795,"question-generation_PersianQA_rougeL_f1_score":0.1829113647,"nlg_score":0.1641995602}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1622159789,"question-generation_PersianQA_rougeL_recall":0.302597472,"question-generation_PersianQA_rougeL_f1_score":0.2021048057,"nlg_score":0.1665903777}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1965366702,"question-generation_PersianQA_rougeL_recall":0.340760284,"question-generation_PersianQA_rougeL_f1_score":0.2388923895,"nlg_score":0.1557270864}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1127092702,"question-generation_PersianQA_rougeL_recall":0.2982763168,"question-generation_PersianQA_rougeL_f1_score":0.1525970768,"nlg_score":0.0944140383}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2275858051,"question-generation_PersianQA_rougeL_recall":0.3654754607,"question-generation_PersianQA_rougeL_f1_score":0.2679025722,"nlg_score":0.18964968}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1342253144,"question-generation_PersianQA_rougeL_recall":0.4100317735,"question-generation_PersianQA_rougeL_f1_score":0.18410589,"nlg_score":0.0880621978}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1996840686,"question-generation_PersianQA_rougeL_recall":0.3393114266,"question-generation_PersianQA_rougeL_f1_score":0.2417040176,"nlg_score":0.164118288}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1264186031,"question-generation_PersianQA_rougeL_recall":0.2582953109,"question-generation_PersianQA_rougeL_f1_score":0.1600835412,"nlg_score":0.1129755187}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.0567952998,"question-generation_PersianQA_rougeL_recall":0.2105979358,"question-generation_PersianQA_rougeL_f1_score":0.0793499521,"nlg_score":0.0823387318}
|
leaderboard/boards_data/sentiment-analysis_deepsentipers.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.8058409951,"sentiment-analysis_deepsentipers_precision_modified":0.7717795715,"sentiment-analysis_deepsentipers_recall_modified":0.8211827366,"sentiment-analysis_deepsentipers_fscore_modified":0.7889064935,"sentiment-analysis_deepsentipers_acc":0.8062770563,"sentiment-analysis_deepsentipers_precision":0.7721972011,"sentiment-analysis_deepsentipers_recall":0.8216270995,"sentiment-analysis_deepsentipers_fscore":0.7893333909,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994591671,"nlu_score":0.7143086066}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6817691478,"sentiment-analysis_deepsentipers_precision_modified":0.6546744642,"sentiment-analysis_deepsentipers_recall_modified":0.7378694789,"sentiment-analysis_deepsentipers_fscore_modified":0.6356142977,"sentiment-analysis_deepsentipers_acc":0.6817691478,"sentiment-analysis_deepsentipers_precision":0.6546744642,"sentiment-analysis_deepsentipers_recall":0.7378694789,"sentiment-analysis_deepsentipers_fscore":0.6356142977,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6241793507}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.738403452,"sentiment-analysis_deepsentipers_precision_modified":0.706763853,"sentiment-analysis_deepsentipers_recall_modified":0.7658510846,"sentiment-analysis_deepsentipers_fscore_modified":0.726373242,"sentiment-analysis_deepsentipers_acc":0.738403452,"sentiment-analysis_deepsentipers_precision":0.706763853,"sentiment-analysis_deepsentipers_recall":0.7658510846,"sentiment-analysis_deepsentipers_fscore":0.726373242,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.8047464941,"sentiment-analysis_deepsentipers_precision_modified":0.7661826532,"sentiment-analysis_deepsentipers_recall_modified":0.8089861144,"sentiment-analysis_deepsentipers_fscore_modified":0.7830417049,"sentiment-analysis_deepsentipers_acc":0.8047464941,"sentiment-analysis_deepsentipers_precision":0.7661826532,"sentiment-analysis_deepsentipers_recall":0.8089861144,"sentiment-analysis_deepsentipers_fscore":0.7830417049,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":null,"sentiment-analysis_deepsentipers_precision_modified":null,"sentiment-analysis_deepsentipers_recall_modified":null,"sentiment-analysis_deepsentipers_fscore_modified":null,"sentiment-analysis_deepsentipers_acc":null,"sentiment-analysis_deepsentipers_precision":null,"sentiment-analysis_deepsentipers_recall":null,"sentiment-analysis_deepsentipers_fscore":null,"sentiment-analysis_deepsentipers_valid_output_ratio":null,"nlu_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7664509169,"sentiment-analysis_deepsentipers_precision_modified":0.7235774595,"sentiment-analysis_deepsentipers_recall_modified":0.785720049,"sentiment-analysis_deepsentipers_fscore_modified":0.7440236575,"sentiment-analysis_deepsentipers_acc":0.766864544,"sentiment-analysis_deepsentipers_precision":0.7239679492,"sentiment-analysis_deepsentipers_recall":0.786144075,"sentiment-analysis_deepsentipers_fscore":0.7444251813,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.699116864}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7556634304,"sentiment-analysis_deepsentipers_precision_modified":0.7100962569,"sentiment-analysis_deepsentipers_recall_modified":0.796296032,"sentiment-analysis_deepsentipers_fscore_modified":0.7198160026,"sentiment-analysis_deepsentipers_acc":0.7556634304,"sentiment-analysis_deepsentipers_precision":0.7100962569,"sentiment-analysis_deepsentipers_recall":0.796296032,"sentiment-analysis_deepsentipers_fscore":0.7198160026,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7459546926,"sentiment-analysis_deepsentipers_precision_modified":0.696002467,"sentiment-analysis_deepsentipers_recall_modified":0.7725731976,"sentiment-analysis_deepsentipers_fscore_modified":0.7160207999,"sentiment-analysis_deepsentipers_acc":0.7459546926,"sentiment-analysis_deepsentipers_precision":0.696002467,"sentiment-analysis_deepsentipers_recall":0.7725731976,"sentiment-analysis_deepsentipers_fscore":0.7160207999,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7820927724,"sentiment-analysis_deepsentipers_precision_modified":0.7592820571,"sentiment-analysis_deepsentipers_recall_modified":0.7768252647,"sentiment-analysis_deepsentipers_fscore_modified":0.7562669975,"sentiment-analysis_deepsentipers_acc":0.7820927724,"sentiment-analysis_deepsentipers_precision":0.7592820571,"sentiment-analysis_deepsentipers_recall":0.7768252647,"sentiment-analysis_deepsentipers_fscore":0.7562669975,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7521691974,"sentiment-analysis_deepsentipers_precision_modified":0.7149147283,"sentiment-analysis_deepsentipers_recall_modified":0.7661218172,"sentiment-analysis_deepsentipers_fscore_modified":0.7340307684,"sentiment-analysis_deepsentipers_acc":0.7550353838,"sentiment-analysis_deepsentipers_precision":0.7176389542,"sentiment-analysis_deepsentipers_recall":0.7690411709,"sentiment-analysis_deepsentipers_fscore":0.7368278372,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9962039046,"nlu_score":0.3749414991}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.5949298813,"sentiment-analysis_deepsentipers_precision_modified":0.6633283768,"sentiment-analysis_deepsentipers_recall_modified":0.6739827327,"sentiment-analysis_deepsentipers_fscore_modified":0.597099001,"sentiment-analysis_deepsentipers_acc":0.5958941113,"sentiment-analysis_deepsentipers_precision":0.6644034633,"sentiment-analysis_deepsentipers_recall":0.6750750872,"sentiment-analysis_deepsentipers_fscore":0.5980667466,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877,"nlu_score":0.5661558794}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7669902913,"sentiment-analysis_deepsentipers_precision_modified":0.7411642414,"sentiment-analysis_deepsentipers_recall_modified":0.7990679398,"sentiment-analysis_deepsentipers_fscore_modified":0.7346216275,"sentiment-analysis_deepsentipers_acc":0.7669902913,"sentiment-analysis_deepsentipers_precision":0.7411642414,"sentiment-analysis_deepsentipers_recall":0.7990679398,"sentiment-analysis_deepsentipers_fscore":0.7346216275,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6855447681,"sentiment-analysis_deepsentipers_precision_modified":0.6408552737,"sentiment-analysis_deepsentipers_recall_modified":0.7180772523,"sentiment-analysis_deepsentipers_fscore_modified":0.6446920024,"sentiment-analysis_deepsentipers_acc":0.6855447681,"sentiment-analysis_deepsentipers_precision":0.6408552737,"sentiment-analysis_deepsentipers_recall":0.7180772523,"sentiment-analysis_deepsentipers_fscore":0.6446920024,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.5121418762}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.5199568501,"sentiment-analysis_deepsentipers_precision_modified":0.4907692439,"sentiment-analysis_deepsentipers_recall_modified":0.5047701764,"sentiment-analysis_deepsentipers_fscore_modified":0.4457895794,"sentiment-analysis_deepsentipers_acc":0.571767497,"sentiment-analysis_deepsentipers_precision":0.5396715174,"sentiment-analysis_deepsentipers_recall":0.5550675605,"sentiment-analysis_deepsentipers_fscore":0.4902098934,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9093851133,"nlu_score":0.3619547874}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7642934196,"sentiment-analysis_deepsentipers_precision_modified":0.7287131406,"sentiment-analysis_deepsentipers_recall_modified":0.7801104156,"sentiment-analysis_deepsentipers_fscore_modified":0.7434018552,"sentiment-analysis_deepsentipers_acc":0.7951739618,"sentiment-analysis_deepsentipers_precision":0.7581560958,"sentiment-analysis_deepsentipers_recall":0.8116300284,"sentiment-analysis_deepsentipers_fscore":0.7734382938,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9611650485,"nlu_score":0.3928685253}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7324703344,"sentiment-analysis_deepsentipers_precision_modified":0.7023773257,"sentiment-analysis_deepsentipers_recall_modified":0.7824931708,"sentiment-analysis_deepsentipers_fscore_modified":0.6905624385,"sentiment-analysis_deepsentipers_acc":0.7324703344,"sentiment-analysis_deepsentipers_precision":0.7023773257,"sentiment-analysis_deepsentipers_recall":0.7824931708,"sentiment-analysis_deepsentipers_fscore":0.6905624385,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7869471413,"sentiment-analysis_deepsentipers_precision_modified":0.7486325068,"sentiment-analysis_deepsentipers_recall_modified":0.811119619,"sentiment-analysis_deepsentipers_fscore_modified":0.7669134988,"sentiment-analysis_deepsentipers_acc":0.7869471413,"sentiment-analysis_deepsentipers_precision":0.7486325068,"sentiment-analysis_deepsentipers_recall":0.811119619,"sentiment-analysis_deepsentipers_fscore":0.7669134988,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7713052859,"sentiment-analysis_deepsentipers_precision_modified":0.7288724929,"sentiment-analysis_deepsentipers_recall_modified":0.803675275,"sentiment-analysis_deepsentipers_fscore_modified":0.753174206,"sentiment-analysis_deepsentipers_acc":0.7713052859,"sentiment-analysis_deepsentipers_precision":0.7288724929,"sentiment-analysis_deepsentipers_recall":0.803675275,"sentiment-analysis_deepsentipers_fscore":0.753174206,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":null,"sentiment-analysis_deepsentipers_precision_modified":null,"sentiment-analysis_deepsentipers_recall_modified":null,"sentiment-analysis_deepsentipers_fscore_modified":null,"sentiment-analysis_deepsentipers_acc":null,"sentiment-analysis_deepsentipers_precision":null,"sentiment-analysis_deepsentipers_recall":null,"sentiment-analysis_deepsentipers_fscore":null,"sentiment-analysis_deepsentipers_valid_output_ratio":null,"nlu_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7740021575,"sentiment-analysis_deepsentipers_precision_modified":0.7295627073,"sentiment-analysis_deepsentipers_recall_modified":0.7969121455,"sentiment-analysis_deepsentipers_fscore_modified":0.7492633779,"sentiment-analysis_deepsentipers_acc":0.7744198597,"sentiment-analysis_deepsentipers_precision":0.729956427,"sentiment-analysis_deepsentipers_recall":0.7973422114,"sentiment-analysis_deepsentipers_fscore":0.7496677294,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.7050532433}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6154261057,"sentiment-analysis_deepsentipers_precision_modified":0.6519864557,"sentiment-analysis_deepsentipers_recall_modified":0.6762525877,"sentiment-analysis_deepsentipers_fscore_modified":0.5290317996,"sentiment-analysis_deepsentipers_acc":0.6154261057,"sentiment-analysis_deepsentipers_precision":0.6519864557,"sentiment-analysis_deepsentipers_recall":0.6762525877,"sentiment-analysis_deepsentipers_fscore":0.5290317996,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.531045981}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7448759439,"sentiment-analysis_deepsentipers_precision_modified":0.7197594162,"sentiment-analysis_deepsentipers_recall_modified":0.7773395601,"sentiment-analysis_deepsentipers_fscore_modified":0.7035572334,"sentiment-analysis_deepsentipers_acc":0.7448759439,"sentiment-analysis_deepsentipers_precision":0.7197594162,"sentiment-analysis_deepsentipers_recall":0.7773395601,"sentiment-analysis_deepsentipers_fscore":0.7035572334,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7411003236,"sentiment-analysis_deepsentipers_precision_modified":0.7010084925,"sentiment-analysis_deepsentipers_recall_modified":0.7529009939,"sentiment-analysis_deepsentipers_fscore_modified":0.7172781226,"sentiment-analysis_deepsentipers_acc":0.7411003236,"sentiment-analysis_deepsentipers_precision":0.7010084925,"sentiment-analysis_deepsentipers_recall":0.7529009939,"sentiment-analysis_deepsentipers_fscore":0.7172781226,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.3737864078,"sentiment-analysis_deepsentipers_precision_modified":0.4303148768,"sentiment-analysis_deepsentipers_recall_modified":0.3053254234,"sentiment-analysis_deepsentipers_fscore_modified":0.2934454786,"sentiment-analysis_deepsentipers_acc":0.6209677419,"sentiment-analysis_deepsentipers_precision":0.7148779405,"sentiment-analysis_deepsentipers_recall":0.5072341711,"sentiment-analysis_deepsentipers_fscore":0.4874981338,"sentiment-analysis_deepsentipers_valid_output_ratio":0.6019417476,"nlu_score":0.3916645306}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7988133765,"sentiment-analysis_deepsentipers_precision_modified":0.7580375513,"sentiment-analysis_deepsentipers_recall_modified":0.8108044611,"sentiment-analysis_deepsentipers_fscore_modified":0.7757714496,"sentiment-analysis_deepsentipers_acc":0.7988133765,"sentiment-analysis_deepsentipers_precision":0.7580375513,"sentiment-analysis_deepsentipers_recall":0.8108044611,"sentiment-analysis_deepsentipers_fscore":0.7757714496,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6278317152,"sentiment-analysis_deepsentipers_precision_modified":0.5954545705,"sentiment-analysis_deepsentipers_recall_modified":0.6239967818,"sentiment-analysis_deepsentipers_fscore_modified":0.6073033689,"sentiment-analysis_deepsentipers_acc":0.7288666249,"sentiment-analysis_deepsentipers_precision":0.691279132,"sentiment-analysis_deepsentipers_recall":0.7244145482,"sentiment-analysis_deepsentipers_fscore":0.7050347188,"sentiment-analysis_deepsentipers_valid_output_ratio":0.8613807983,"nlu_score":0.6361186163}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7502696872,"sentiment-analysis_deepsentipers_precision_modified":0.7162099301,"sentiment-analysis_deepsentipers_recall_modified":0.7649004728,"sentiment-analysis_deepsentipers_fscore_modified":0.716460892,"sentiment-analysis_deepsentipers_acc":0.7502696872,"sentiment-analysis_deepsentipers_precision":0.7162099301,"sentiment-analysis_deepsentipers_recall":0.7649004728,"sentiment-analysis_deepsentipers_fscore":0.716460892,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6255818412}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.3149946063,"sentiment-analysis_deepsentipers_precision_modified":0.6011059335,"sentiment-analysis_deepsentipers_recall_modified":0.4700288555,"sentiment-analysis_deepsentipers_fscore_modified":0.3135968578,"sentiment-analysis_deepsentipers_acc":0.3155051324,"sentiment-analysis_deepsentipers_precision":0.6020801732,"sentiment-analysis_deepsentipers_recall":0.4707906527,"sentiment-analysis_deepsentipers_fscore":0.3141051185,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877,"nlu_score":0.1368924446}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.0787486516,"sentiment-analysis_deepsentipers_precision_modified":0.1370950606,"sentiment-analysis_deepsentipers_recall_modified":0.1281381117,"sentiment-analysis_deepsentipers_fscore_modified":0.0722798642,"sentiment-analysis_deepsentipers_acc":0.2106782107,"sentiment-analysis_deepsentipers_precision":0.3667737986,"sentiment-analysis_deepsentipers_recall":0.3428110522,"sentiment-analysis_deepsentipers_fscore":0.1933721042,"sentiment-analysis_deepsentipers_valid_output_ratio":0.3737864078,"nlu_score":0.046805056}
|
leaderboard/boards_data/sts_FarSICK.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8606070195,"sts_FarSICK_corrcoef":0.8606070195,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8471466571,"sts_FarSICK_corrcoef":0.8471466571,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6241793507}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8332013424,"sts_FarSICK_corrcoef":0.8332013424,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8624442565,"sts_FarSICK_corrcoef":0.8624442565,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":null,"sts_FarSICK_corrcoef":null,"sts_FarSICK_valid_output_ratio":null,"nlu_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8767598269,"sts_FarSICK_corrcoef":0.8767598269,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8796836219,"sts_FarSICK_corrcoef":0.8796836219,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8509006434,"sts_FarSICK_corrcoef":0.8509006434,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8550824218,"sts_FarSICK_corrcoef":0.8550824218,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.2533632205,"sts_FarSICK_corrcoef":0.8617796616,"sts_FarSICK_valid_output_ratio":0.294,"nlu_score":0.3749414991}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8346099969,"sts_FarSICK_corrcoef":0.8346099969,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.5661558794}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8641781993,"sts_FarSICK_corrcoef":0.8641781993,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8363152655,"sts_FarSICK_corrcoef":0.8430597434,"sts_FarSICK_valid_output_ratio":0.992,"nlu_score":0.5121418762}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.6678492429,"sts_FarSICK_corrcoef":0.6913553239,"sts_FarSICK_valid_output_ratio":0.966,"nlu_score":0.3619547874}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.0,"nlu_score":0.3928685253}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8274969834,"sts_FarSICK_corrcoef":0.8274969834,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.86471356,"sts_FarSICK_corrcoef":0.86471356,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8671704383,"sts_FarSICK_corrcoef":0.8671704383,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":null,"sts_FarSICK_corrcoef":null,"sts_FarSICK_valid_output_ratio":null,"nlu_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8492628764,"sts_FarSICK_corrcoef":0.8492628764,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8375953381,"sts_FarSICK_corrcoef":0.8375953381,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.531045981}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8720703866,"sts_FarSICK_corrcoef":0.8720703866,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8497629768,"sts_FarSICK_corrcoef":0.8497629768,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.5531047251,"sts_FarSICK_corrcoef":0.8039312865,"sts_FarSICK_valid_output_ratio":0.688,"nlu_score":0.3916645306}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8612153956,"sts_FarSICK_corrcoef":0.8612153956,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8547994421,"sts_FarSICK_corrcoef":0.8547994421,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6361186163}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8643540763,"sts_FarSICK_corrcoef":0.8643540763,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6255818412}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.0,"nlu_score":0.1368924446}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.018,"nlu_score":0.046805056}
|
leaderboard/boards_data/sts_SynPerSTS.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9639002346,"sts_SynPerSTS_corrcoef":0.9639002346,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9198771683,"sts_SynPerSTS_corrcoef":0.9198771683,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6241793507}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9445586775,"sts_SynPerSTS_corrcoef":0.9445586775,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9620104912,"sts_SynPerSTS_corrcoef":0.9620104912,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":null,"sts_SynPerSTS_corrcoef":null,"sts_SynPerSTS_valid_output_ratio":null,"nlu_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.950218242,"sts_SynPerSTS_corrcoef":0.950218242,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9583160478,"sts_SynPerSTS_corrcoef":0.9583160478,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9603899338,"sts_SynPerSTS_corrcoef":0.9603899338,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.961773092,"sts_SynPerSTS_corrcoef":0.961773092,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.3217761614,"sts_SynPerSTS_corrcoef":0.8791698399,"sts_SynPerSTS_valid_output_ratio":0.366,"nlu_score":0.3749414991}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9424987971,"sts_SynPerSTS_corrcoef":0.9424987971,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.5661558794}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9550693345,"sts_SynPerSTS_corrcoef":0.9550693345,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9175164452,"sts_SynPerSTS_corrcoef":0.9324354118,"sts_SynPerSTS_valid_output_ratio":0.984,"nlu_score":0.5121418762}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.7233454448,"sts_SynPerSTS_corrcoef":0.8037171609,"sts_SynPerSTS_valid_output_ratio":0.9,"nlu_score":0.3619547874}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.0,"nlu_score":0.3928685253}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9409955022,"sts_SynPerSTS_corrcoef":0.9409955022,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9622305784,"sts_SynPerSTS_corrcoef":0.9622305784,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9590342543,"sts_SynPerSTS_corrcoef":0.9590342543,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":null,"sts_SynPerSTS_corrcoef":null,"sts_SynPerSTS_valid_output_ratio":null,"nlu_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9665566215,"sts_SynPerSTS_corrcoef":0.9665566215,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9009001164,"sts_SynPerSTS_corrcoef":0.9009001164,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.531045981}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.930027057,"sts_SynPerSTS_corrcoef":0.930027057,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9555087155,"sts_SynPerSTS_corrcoef":0.9555087155,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.4432810096,"sts_SynPerSTS_corrcoef":0.7363471921,"sts_SynPerSTS_valid_output_ratio":0.602,"nlu_score":0.3916645306}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9586779662,"sts_SynPerSTS_corrcoef":0.9586779662,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9581074422,"sts_SynPerSTS_corrcoef":0.9581074422,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6361186163}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9421493238,"sts_SynPerSTS_corrcoef":0.9421493238,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6255818412}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.0,"nlu_score":0.1368924446}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.02,"nlu_score":0.046805056}
|
leaderboard/boards_data/summarization_PnSummary.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1188323392,"summarization_PnSummary_rougeL_recall":0.3948447809,"summarization_PnSummary_rougeL_f1_score":0.1786530476,"nlg_score":0.1779340777}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1095844839,"summarization_PnSummary_rougeL_recall":0.3735331299,"summarization_PnSummary_rougeL_f1_score":0.1645385252,"nlg_score":0.0949943578}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1227039295,"summarization_PnSummary_rougeL_recall":0.4315497639,"summarization_PnSummary_rougeL_f1_score":0.1856517383,"nlg_score":0.1880477876}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1234743619,"summarization_PnSummary_rougeL_recall":0.376111826,"summarization_PnSummary_rougeL_f1_score":0.1808600563,"nlg_score":0.194675133}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":null,"summarization_PnSummary_rougeL_recall":null,"summarization_PnSummary_rougeL_f1_score":null,"nlg_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1156871575,"summarization_PnSummary_rougeL_recall":0.3630716995,"summarization_PnSummary_rougeL_f1_score":0.1697348346,"nlg_score":0.1196804312}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1124574222,"summarization_PnSummary_rougeL_recall":0.3717393409,"summarization_PnSummary_rougeL_f1_score":0.1673025553,"nlg_score":0.1067134448}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1123870374,"summarization_PnSummary_rougeL_recall":0.4032007327,"summarization_PnSummary_rougeL_f1_score":0.17115848,"nlg_score":0.16056333}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1089978489,"summarization_PnSummary_rougeL_recall":0.3936021933,"summarization_PnSummary_rougeL_f1_score":0.1662525669,"nlg_score":0.1679338638}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.132916538,"summarization_PnSummary_rougeL_recall":0.3579358655,"summarization_PnSummary_rougeL_f1_score":0.1887379797,"nlg_score":0.1089333827}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1029257145,"summarization_PnSummary_rougeL_recall":0.4347811424,"summarization_PnSummary_rougeL_f1_score":0.1621438757,"nlg_score":0.1319091735}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0811186656,"summarization_PnSummary_rougeL_recall":0.3940089293,"summarization_PnSummary_rougeL_f1_score":0.1316106196,"nlg_score":0.0934094344}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1067208324,"summarization_PnSummary_rougeL_recall":0.4109136551,"summarization_PnSummary_rougeL_f1_score":0.1648475797,"nlg_score":0.1389297212}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.111370713,"summarization_PnSummary_rougeL_recall":0.3732014316,"summarization_PnSummary_rougeL_f1_score":0.1661125342,"nlg_score":0.0682994522}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1094933227,"summarization_PnSummary_rougeL_recall":0.3979476995,"summarization_PnSummary_rougeL_f1_score":0.1674664883,"nlg_score":0.1196400535}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.2237919051,"summarization_PnSummary_rougeL_recall":0.3532978852,"summarization_PnSummary_rougeL_f1_score":0.2484855426,"nlg_score":0.2010896964}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1156493376,"summarization_PnSummary_rougeL_recall":0.403347998,"summarization_PnSummary_rougeL_f1_score":0.1750055649,"nlg_score":0.1901206806}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1160048958,"summarization_PnSummary_rougeL_recall":0.3980422927,"summarization_PnSummary_rougeL_f1_score":0.1751797476,"nlg_score":0.1810678527}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":null,"summarization_PnSummary_rougeL_recall":null,"summarization_PnSummary_rougeL_f1_score":null,"nlg_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1228424623,"summarization_PnSummary_rougeL_recall":0.3750771332,"summarization_PnSummary_rougeL_f1_score":0.1793201723,"nlg_score":0.178231145}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1298447221,"summarization_PnSummary_rougeL_recall":0.3548911672,"summarization_PnSummary_rougeL_f1_score":0.1841564462,"nlg_score":0.1641995602}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1120916238,"summarization_PnSummary_rougeL_recall":0.3610411286,"summarization_PnSummary_rougeL_f1_score":0.1660826543,"nlg_score":0.1665903777}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.109255822,"summarization_PnSummary_rougeL_recall":0.3979273385,"summarization_PnSummary_rougeL_f1_score":0.1669061111,"nlg_score":0.1557270864}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0921640152,"summarization_PnSummary_rougeL_recall":0.4401953868,"summarization_PnSummary_rougeL_f1_score":0.1480945013,"nlg_score":0.0944140383}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1134979628,"summarization_PnSummary_rougeL_recall":0.3909794734,"summarization_PnSummary_rougeL_f1_score":0.1716841943,"nlg_score":0.18964968}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0849469928,"summarization_PnSummary_rougeL_recall":0.3820724231,"summarization_PnSummary_rougeL_f1_score":0.1359575611,"nlg_score":0.0880621978}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1212751301,"summarization_PnSummary_rougeL_recall":0.3923323141,"summarization_PnSummary_rougeL_f1_score":0.1804727387,"nlg_score":0.164118288}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.151465786,"summarization_PnSummary_rougeL_recall":0.3775823327,"summarization_PnSummary_rougeL_f1_score":0.203395452,"nlg_score":0.1129755187}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.2061378815,"summarization_PnSummary_rougeL_recall":0.2755376589,"summarization_PnSummary_rougeL_f1_score":0.2192316506,"nlg_score":0.0823387318}
|
leaderboard/boards_data/summarization_SamSUM-fa.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1538512444,"summarization_SamSUM-fa_rougeL_recall":0.3849531288,"summarization_SamSUM-fa_rougeL_f1_score":0.2115502707,"nlg_score":0.1779340777}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1638274716,"summarization_SamSUM-fa_rougeL_recall":0.3535878882,"summarization_SamSUM-fa_rougeL_f1_score":0.2134854664,"nlg_score":0.0949943578}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1591262985,"summarization_SamSUM-fa_rougeL_recall":0.4163090512,"summarization_SamSUM-fa_rougeL_f1_score":0.2208876443,"nlg_score":0.1880477876}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1681357159,"summarization_SamSUM-fa_rougeL_recall":0.3567938895,"summarization_SamSUM-fa_rougeL_f1_score":0.2189693454,"nlg_score":0.194675133}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":null,"summarization_SamSUM-fa_rougeL_recall":null,"summarization_SamSUM-fa_rougeL_f1_score":null,"nlg_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1658145118,"summarization_SamSUM-fa_rougeL_recall":0.3677760479,"summarization_SamSUM-fa_rougeL_f1_score":0.2189237562,"nlg_score":0.1196804312}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1588367988,"summarization_SamSUM-fa_rougeL_recall":0.3735722635,"summarization_SamSUM-fa_rougeL_f1_score":0.2131671502,"nlg_score":0.1067134448}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1469468837,"summarization_SamSUM-fa_rougeL_recall":0.3743807014,"summarization_SamSUM-fa_rougeL_f1_score":0.2022859929,"nlg_score":0.16056333}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1514618398,"summarization_SamSUM-fa_rougeL_recall":0.3683020708,"summarization_SamSUM-fa_rougeL_f1_score":0.2063212948,"nlg_score":0.1679338638}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1772724525,"summarization_SamSUM-fa_rougeL_recall":0.341583677,"summarization_SamSUM-fa_rougeL_f1_score":0.2233271064,"nlg_score":0.1089333827}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.147286408,"summarization_SamSUM-fa_rougeL_recall":0.4066657958,"summarization_SamSUM-fa_rougeL_f1_score":0.2072278176,"nlg_score":0.1319091735}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.0893319419,"summarization_SamSUM-fa_rougeL_recall":0.3701712252,"summarization_SamSUM-fa_rougeL_f1_score":0.1392333016,"nlg_score":0.0934094344}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1429609514,"summarization_SamSUM-fa_rougeL_recall":0.397717388,"summarization_SamSUM-fa_rougeL_f1_score":0.2013136641,"nlg_score":0.1389297212}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1340334866,"summarization_SamSUM-fa_rougeL_recall":0.3184206946,"summarization_SamSUM-fa_rougeL_f1_score":0.179098961,"nlg_score":0.0682994522}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1338082958,"summarization_SamSUM-fa_rougeL_recall":0.397938928,"summarization_SamSUM-fa_rougeL_f1_score":0.1933390916,"nlg_score":0.1196400535}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1632163927,"summarization_SamSUM-fa_rougeL_recall":0.387510969,"summarization_SamSUM-fa_rougeL_f1_score":0.2157634129,"nlg_score":0.2010896964}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.171454009,"summarization_SamSUM-fa_rougeL_recall":0.3692597258,"summarization_SamSUM-fa_rougeL_f1_score":0.2248722593,"nlg_score":0.1901206806}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1578034675,"summarization_SamSUM-fa_rougeL_recall":0.3902121243,"summarization_SamSUM-fa_rougeL_f1_score":0.2156396673,"nlg_score":0.1810678527}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":null,"summarization_SamSUM-fa_rougeL_recall":null,"summarization_SamSUM-fa_rougeL_f1_score":null,"nlg_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1808561992,"summarization_SamSUM-fa_rougeL_recall":0.414509553,"summarization_SamSUM-fa_rougeL_f1_score":0.2406998552,"nlg_score":0.178231145}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1944265929,"summarization_SamSUM-fa_rougeL_recall":0.3761499249,"summarization_SamSUM-fa_rougeL_f1_score":0.242617187,"nlg_score":0.1641995602}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.16175156,"summarization_SamSUM-fa_rougeL_recall":0.3477483743,"summarization_SamSUM-fa_rougeL_f1_score":0.209834706,"nlg_score":0.1665903777}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1463365551,"summarization_SamSUM-fa_rougeL_recall":0.3856017289,"summarization_SamSUM-fa_rougeL_f1_score":0.2024070197,"nlg_score":0.1557270864}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1180795687,"summarization_SamSUM-fa_rougeL_recall":0.3922712004,"summarization_SamSUM-fa_rougeL_f1_score":0.170765794,"nlg_score":0.0944140383}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.165108522,"summarization_SamSUM-fa_rougeL_recall":0.3982318891,"summarization_SamSUM-fa_rougeL_f1_score":0.2240082992,"nlg_score":0.18964968}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1081719649,"summarization_SamSUM-fa_rougeL_recall":0.3726803698,"summarization_SamSUM-fa_rougeL_f1_score":0.1606804283,"nlg_score":0.0880621978}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1523824175,"summarization_SamSUM-fa_rougeL_recall":0.3838683519,"summarization_SamSUM-fa_rougeL_f1_score":0.2083553767,"nlg_score":0.164118288}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1385750553,"summarization_SamSUM-fa_rougeL_recall":0.3133561002,"summarization_SamSUM-fa_rougeL_f1_score":0.1819150852,"nlg_score":0.1129755187}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1565749742,"summarization_SamSUM-fa_rougeL_recall":0.2642298658,"summarization_SamSUM-fa_rougeL_f1_score":0.1759907012,"nlg_score":0.0823387318}
|
leaderboard/boards_data/tone-classification_SynTone.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8198757764,"tone-classification_SynTone_precision_modified":0.8180456965,"tone-classification_SynTone_recall_modified":0.5977640757,"tone-classification_SynTone_fscore_modified":0.6364434216,"tone-classification_SynTone_acc":0.8859060403,"tone-classification_SynTone_precision":0.8839285714,"tone-classification_SynTone_recall":0.6459061489,"tone-classification_SynTone_fscore":0.68770061,"tone-classification_SynTone_valid_output_ratio":0.9254658385,"nlu_score":0.7143086066}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.5031055901,"tone-classification_SynTone_precision_modified":0.5228364877,"tone-classification_SynTone_recall_modified":0.5168736971,"tone-classification_SynTone_fscore_modified":0.4644759375,"tone-classification_SynTone_acc":0.5094339623,"tone-classification_SynTone_precision":0.5294130473,"tone-classification_SynTone_recall":0.523375253,"tone-classification_SynTone_fscore":0.4703184021,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.6241793507}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6770186335,"tone-classification_SynTone_precision_modified":0.6041666667,"tone-classification_SynTone_recall_modified":0.5499765318,"tone-classification_SynTone_fscore_modified":0.5393404488,"tone-classification_SynTone_acc":0.6770186335,"tone-classification_SynTone_precision":0.6041666667,"tone-classification_SynTone_recall":0.5499765318,"tone-classification_SynTone_fscore":0.5393404488,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9130434783,"tone-classification_SynTone_precision_modified":0.8396595026,"tone-classification_SynTone_recall_modified":0.7058371736,"tone-classification_SynTone_fscore_modified":0.748745873,"tone-classification_SynTone_acc":0.9130434783,"tone-classification_SynTone_precision":0.8396595026,"tone-classification_SynTone_recall":0.7058371736,"tone-classification_SynTone_fscore":0.748745873,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":null,"tone-classification_SynTone_precision_modified":null,"tone-classification_SynTone_recall_modified":null,"tone-classification_SynTone_fscore_modified":null,"tone-classification_SynTone_acc":null,"tone-classification_SynTone_precision":null,"tone-classification_SynTone_recall":null,"tone-classification_SynTone_fscore":null,"tone-classification_SynTone_valid_output_ratio":null,"nlu_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8260869565,"tone-classification_SynTone_precision_modified":0.7005172378,"tone-classification_SynTone_recall_modified":0.6637288786,"tone-classification_SynTone_fscore_modified":0.6775611485,"tone-classification_SynTone_acc":0.8260869565,"tone-classification_SynTone_precision":0.7005172378,"tone-classification_SynTone_recall":0.6637288786,"tone-classification_SynTone_fscore":0.6775611485,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7888198758,"tone-classification_SynTone_precision_modified":0.6529418051,"tone-classification_SynTone_recall_modified":0.7187467998,"tone-classification_SynTone_fscore_modified":0.6745690521,"tone-classification_SynTone_acc":0.7888198758,"tone-classification_SynTone_precision":0.6529418051,"tone-classification_SynTone_recall":0.7187467998,"tone-classification_SynTone_fscore":0.6745690521,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.4968944099,"tone-classification_SynTone_precision_modified":0.5363835928,"tone-classification_SynTone_recall_modified":0.5772956136,"tone-classification_SynTone_fscore_modified":0.4755414981,"tone-classification_SynTone_acc":0.4968944099,"tone-classification_SynTone_precision":0.5363835928,"tone-classification_SynTone_recall":0.5772956136,"tone-classification_SynTone_fscore":0.4755414981,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8074534161,"tone-classification_SynTone_precision_modified":0.6799424424,"tone-classification_SynTone_recall_modified":0.6338304318,"tone-classification_SynTone_fscore_modified":0.6490392995,"tone-classification_SynTone_acc":0.8074534161,"tone-classification_SynTone_precision":0.6799424424,"tone-classification_SynTone_recall":0.6338304318,"tone-classification_SynTone_fscore":0.6490392995,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0,"nlu_score":0.3749414991}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7763975155,"tone-classification_SynTone_precision_modified":0.6774948824,"tone-classification_SynTone_recall_modified":0.67683866,"tone-classification_SynTone_fscore_modified":0.668356732,"tone-classification_SynTone_acc":0.7911392405,"tone-classification_SynTone_precision":0.6903587093,"tone-classification_SynTone_recall":0.689690027,"tone-classification_SynTone_fscore":0.6810470497,"tone-classification_SynTone_valid_output_ratio":0.9813664596,"nlu_score":0.5661558794}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8260869565,"tone-classification_SynTone_precision_modified":0.7339379455,"tone-classification_SynTone_recall_modified":0.6971283495,"tone-classification_SynTone_fscore_modified":0.7109712868,"tone-classification_SynTone_acc":0.8260869565,"tone-classification_SynTone_precision":0.7339379455,"tone-classification_SynTone_recall":0.6971283495,"tone-classification_SynTone_fscore":0.7109712868,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6583850932,"tone-classification_SynTone_precision_modified":0.541342126,"tone-classification_SynTone_recall_modified":0.5629697742,"tone-classification_SynTone_fscore_modified":0.5384255059,"tone-classification_SynTone_acc":0.6666666667,"tone-classification_SynTone_precision":0.5481514609,"tone-classification_SynTone_recall":0.570051155,"tone-classification_SynTone_fscore":0.5451981537,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.5121418762}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0248447205,"tone-classification_SynTone_precision_modified":0.0258799172,"tone-classification_SynTone_recall_modified":0.0266193434,"tone-classification_SynTone_fscore_modified":0.0174833218,"tone-classification_SynTone_acc":0.4,"tone-classification_SynTone_precision":0.4166666667,"tone-classification_SynTone_recall":0.4285714286,"tone-classification_SynTone_fscore":0.2814814815,"tone-classification_SynTone_valid_output_ratio":0.0621118012,"nlu_score":0.3619547874}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0,"nlu_score":0.3928685253}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6832298137,"tone-classification_SynTone_precision_modified":0.6414593698,"tone-classification_SynTone_recall_modified":0.7386456733,"tone-classification_SynTone_fscore_modified":0.6520765046,"tone-classification_SynTone_acc":0.6832298137,"tone-classification_SynTone_precision":0.6414593698,"tone-classification_SynTone_recall":0.7386456733,"tone-classification_SynTone_fscore":0.6520765046,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9130434783,"tone-classification_SynTone_precision_modified":0.7790862291,"tone-classification_SynTone_recall_modified":0.747838795,"tone-classification_SynTone_fscore_modified":0.7624467793,"tone-classification_SynTone_acc":0.9130434783,"tone-classification_SynTone_precision":0.7790862291,"tone-classification_SynTone_recall":0.747838795,"tone-classification_SynTone_fscore":0.7624467793,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.850931677,"tone-classification_SynTone_precision_modified":0.7478203083,"tone-classification_SynTone_recall_modified":0.6482356204,"tone-classification_SynTone_fscore_modified":0.6776329308,"tone-classification_SynTone_acc":0.850931677,"tone-classification_SynTone_precision":0.7478203083,"tone-classification_SynTone_recall":0.6482356204,"tone-classification_SynTone_fscore":0.6776329308,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":null,"tone-classification_SynTone_precision_modified":null,"tone-classification_SynTone_recall_modified":null,"tone-classification_SynTone_fscore_modified":null,"tone-classification_SynTone_acc":null,"tone-classification_SynTone_precision":null,"tone-classification_SynTone_recall":null,"tone-classification_SynTone_fscore":null,"tone-classification_SynTone_valid_output_ratio":null,"nlu_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.7763975155,"tone-classification_SynTone_precision_modified":0.6667611945,"tone-classification_SynTone_recall_modified":0.6423579109,"tone-classification_SynTone_fscore_modified":0.6405012061,"tone-classification_SynTone_acc":0.7763975155,"tone-classification_SynTone_precision":0.6667611945,"tone-classification_SynTone_recall":0.6423579109,"tone-classification_SynTone_fscore":0.6405012061,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7453416149,"tone-classification_SynTone_precision_modified":0.5770588432,"tone-classification_SynTone_recall_modified":0.5070698686,"tone-classification_SynTone_fscore_modified":0.5221093948,"tone-classification_SynTone_acc":0.7547169811,"tone-classification_SynTone_precision":0.584317445,"tone-classification_SynTone_recall":0.513448106,"tone-classification_SynTone_fscore":0.5286768085,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.531045981}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8322981366,"tone-classification_SynTone_precision_modified":0.7291450859,"tone-classification_SynTone_recall_modified":0.5849377027,"tone-classification_SynTone_fscore_modified":0.6077296942,"tone-classification_SynTone_acc":0.8322981366,"tone-classification_SynTone_precision":0.7291450859,"tone-classification_SynTone_recall":0.5849377027,"tone-classification_SynTone_fscore":0.6077296942,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6397515528,"tone-classification_SynTone_precision_modified":0.5483185514,"tone-classification_SynTone_recall_modified":0.590333248,"tone-classification_SynTone_fscore_modified":0.530467546,"tone-classification_SynTone_acc":0.6397515528,"tone-classification_SynTone_precision":0.5483185514,"tone-classification_SynTone_recall":0.590333248,"tone-classification_SynTone_fscore":0.530467546,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.2919254658,"tone-classification_SynTone_precision_modified":0.193413297,"tone-classification_SynTone_recall_modified":0.2361166008,"tone-classification_SynTone_fscore_modified":0.1873840673,"tone-classification_SynTone_acc":0.5802469136,"tone-classification_SynTone_precision":0.3844387755,"tone-classification_SynTone_recall":0.4693181818,"tone-classification_SynTone_fscore":0.3724547511,"tone-classification_SynTone_valid_output_ratio":0.5031055901,"nlu_score":0.3916645306}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9068322981,"tone-classification_SynTone_precision_modified":0.8215992694,"tone-classification_SynTone_recall_modified":0.7311721283,"tone-classification_SynTone_fscore_modified":0.7679761027,"tone-classification_SynTone_acc":0.9068322981,"tone-classification_SynTone_precision":0.8215992694,"tone-classification_SynTone_recall":0.7311721283,"tone-classification_SynTone_fscore":0.7679761027,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.4347826087,"tone-classification_SynTone_precision_modified":0.3863322077,"tone-classification_SynTone_recall_modified":0.383431452,"tone-classification_SynTone_fscore_modified":0.3837887153,"tone-classification_SynTone_acc":0.7865168539,"tone-classification_SynTone_precision":0.6988706228,"tone-classification_SynTone_recall":0.6936231884,"tone-classification_SynTone_fscore":0.6942694738,"tone-classification_SynTone_valid_output_ratio":0.5527950311,"nlu_score":0.6361186163}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.347826087,"tone-classification_SynTone_precision_modified":0.4069335674,"tone-classification_SynTone_recall_modified":0.3420272309,"tone-classification_SynTone_fscore_modified":0.3323819164,"tone-classification_SynTone_acc":0.5333333333,"tone-classification_SynTone_precision":0.6239648033,"tone-classification_SynTone_recall":0.5244417541,"tone-classification_SynTone_fscore":0.5096522718,"tone-classification_SynTone_valid_output_ratio":0.652173913,"nlu_score":0.6255818412}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0,"nlu_score":0.1368924446}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0062111801,"nlu_score":0.046805056}
|
leaderboard/boards_data/topic-classification_sid.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.792,"topic-classification_sid_precision_modified":0.662532811,"topic-classification_sid_recall_modified":0.6635073397,"topic-classification_sid_fscore_modified":0.6583038933,"topic-classification_sid_acc":0.792,"topic-classification_sid_precision":0.662532811,"topic-classification_sid_recall":0.6635073397,"topic-classification_sid_fscore":0.6583038933,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.562,"topic-classification_sid_precision_modified":0.4846723602,"topic-classification_sid_recall_modified":0.454656985,"topic-classification_sid_fscore_modified":0.424509489,"topic-classification_sid_acc":0.5928270042,"topic-classification_sid_precision":0.511257764,"topic-classification_sid_recall":0.4795959757,"topic-classification_sid_fscore":0.4477948196,"topic-classification_sid_valid_output_ratio":0.948,"nlu_score":0.6241793507}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.682,"topic-classification_sid_precision_modified":0.5730049986,"topic-classification_sid_recall_modified":0.5717337985,"topic-classification_sid_fscore_modified":0.5606248332,"topic-classification_sid_acc":0.7239915074,"topic-classification_sid_precision":0.6082855612,"topic-classification_sid_recall":0.6069360918,"topic-classification_sid_fscore":0.595143135,"topic-classification_sid_valid_output_ratio":0.942,"nlu_score":0.6297634971}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.748,"topic-classification_sid_precision_modified":0.6428566774,"topic-classification_sid_recall_modified":0.6633522535,"topic-classification_sid_fscore_modified":0.628605048,"topic-classification_sid_acc":0.748,"topic-classification_sid_precision":0.6428566774,"topic-classification_sid_recall":0.6633522535,"topic-classification_sid_fscore":0.628605048,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":null,"topic-classification_sid_precision_modified":null,"topic-classification_sid_recall_modified":null,"topic-classification_sid_fscore_modified":null,"topic-classification_sid_acc":null,"topic-classification_sid_precision":null,"topic-classification_sid_recall":null,"topic-classification_sid_fscore":null,"topic-classification_sid_valid_output_ratio":null,"nlu_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.658,"topic-classification_sid_precision_modified":0.6006169042,"topic-classification_sid_recall_modified":0.5559595512,"topic-classification_sid_fscore_modified":0.5315039094,"topic-classification_sid_acc":0.6659919028,"topic-classification_sid_precision":0.6079118464,"topic-classification_sid_recall":0.5627120963,"topic-classification_sid_fscore":0.5379594225,"topic-classification_sid_valid_output_ratio":0.988,"nlu_score":0.699116864}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.702,"topic-classification_sid_precision_modified":0.6070538637,"topic-classification_sid_recall_modified":0.5902772191,"topic-classification_sid_fscore_modified":0.5805725473,"topic-classification_sid_acc":0.7076612903,"topic-classification_sid_precision":0.6119494594,"topic-classification_sid_recall":0.5950375192,"topic-classification_sid_fscore":0.585254584,"topic-classification_sid_valid_output_ratio":0.992,"nlu_score":0.6898261633}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.636,"topic-classification_sid_precision_modified":0.6248180645,"topic-classification_sid_recall_modified":0.5200071748,"topic-classification_sid_fscore_modified":0.5212205085,"topic-classification_sid_acc":0.636,"topic-classification_sid_precision":0.6248180645,"topic-classification_sid_recall":0.5200071748,"topic-classification_sid_fscore":0.5212205085,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.742,"topic-classification_sid_precision_modified":0.6167254178,"topic-classification_sid_recall_modified":0.6125584783,"topic-classification_sid_fscore_modified":0.5990165281,"topic-classification_sid_acc":0.7449799197,"topic-classification_sid_precision":0.6192022267,"topic-classification_sid_recall":0.6150185525,"topic-classification_sid_fscore":0.6014222169,"topic-classification_sid_valid_output_ratio":0.996,"nlu_score":0.6714091535}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.75,"topic-classification_sid_precision_modified":0.6327306402,"topic-classification_sid_recall_modified":0.6347455045,"topic-classification_sid_fscore_modified":0.6231971632,"topic-classification_sid_acc":0.75,"topic-classification_sid_precision":0.6327306402,"topic-classification_sid_recall":0.6347455045,"topic-classification_sid_fscore":0.6231971632,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.3749414991}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.656,"topic-classification_sid_precision_modified":0.5819241823,"topic-classification_sid_recall_modified":0.5649560499,"topic-classification_sid_fscore_modified":0.5472284688,"topic-classification_sid_acc":0.7038626609,"topic-classification_sid_precision":0.6243821699,"topic-classification_sid_recall":0.6061760192,"topic-classification_sid_fscore":0.5871550095,"topic-classification_sid_valid_output_ratio":0.932,"nlu_score":0.5661558794}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.77,"topic-classification_sid_precision_modified":0.6561573641,"topic-classification_sid_recall_modified":0.6752129415,"topic-classification_sid_fscore_modified":0.6425647774,"topic-classification_sid_acc":0.77,"topic-classification_sid_precision":0.6561573641,"topic-classification_sid_recall":0.6752129415,"topic-classification_sid_fscore":0.6425647774,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.368,"topic-classification_sid_precision_modified":0.6014683953,"topic-classification_sid_recall_modified":0.2970267773,"topic-classification_sid_fscore_modified":0.2767247094,"topic-classification_sid_acc":0.3739837398,"topic-classification_sid_precision":0.6112483692,"topic-classification_sid_recall":0.301856481,"topic-classification_sid_fscore":0.2812242981,"topic-classification_sid_valid_output_ratio":0.984,"nlu_score":0.5121418762}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.38,"topic-classification_sid_precision_modified":0.2019117794,"topic-classification_sid_recall_modified":0.1756256003,"topic-classification_sid_fscore_modified":0.1195613397,"topic-classification_sid_acc":0.3830645161,"topic-classification_sid_precision":0.2035401003,"topic-classification_sid_recall":0.1770419358,"topic-classification_sid_fscore":0.1205255441,"topic-classification_sid_valid_output_ratio":0.992,"nlu_score":0.3619547874}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.628,"topic-classification_sid_precision_modified":0.5459969989,"topic-classification_sid_recall_modified":0.52365232,"topic-classification_sid_fscore_modified":0.5199604173,"topic-classification_sid_acc":0.6840958606,"topic-classification_sid_precision":0.5947679727,"topic-classification_sid_recall":0.5704273638,"topic-classification_sid_fscore":0.5664056834,"topic-classification_sid_valid_output_ratio":0.918,"nlu_score":0.3928685253}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.594,"topic-classification_sid_precision_modified":0.5608272475,"topic-classification_sid_recall_modified":0.5329233323,"topic-classification_sid_fscore_modified":0.527792484,"topic-classification_sid_acc":0.6359743041,"topic-classification_sid_precision":0.6004574384,"topic-classification_sid_recall":0.5705817263,"topic-classification_sid_fscore":0.5650883126,"topic-classification_sid_valid_output_ratio":0.934,"nlu_score":0.6800109206}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.734,"topic-classification_sid_precision_modified":0.6555308571,"topic-classification_sid_recall_modified":0.6460010178,"topic-classification_sid_fscore_modified":0.6267962167,"topic-classification_sid_acc":0.734,"topic-classification_sid_precision":0.6555308571,"topic-classification_sid_recall":0.6460010178,"topic-classification_sid_fscore":0.6267962167,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.704,"topic-classification_sid_precision_modified":0.6679390306,"topic-classification_sid_recall_modified":0.6259469635,"topic-classification_sid_fscore_modified":0.6265189311,"topic-classification_sid_acc":0.7054108216,"topic-classification_sid_precision":0.6692775858,"topic-classification_sid_recall":0.6272013662,"topic-classification_sid_fscore":0.62777448,"topic-classification_sid_valid_output_ratio":0.998,"nlu_score":0.6459120734}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":null,"topic-classification_sid_precision_modified":null,"topic-classification_sid_recall_modified":null,"topic-classification_sid_fscore_modified":null,"topic-classification_sid_acc":null,"topic-classification_sid_precision":null,"topic-classification_sid_recall":null,"topic-classification_sid_fscore":null,"topic-classification_sid_valid_output_ratio":null,"nlu_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.784,"topic-classification_sid_precision_modified":0.6819447861,"topic-classification_sid_recall_modified":0.6518325808,"topic-classification_sid_fscore_modified":0.6519138204,"topic-classification_sid_acc":0.7903225806,"topic-classification_sid_precision":0.6874443408,"topic-classification_sid_recall":0.6570892952,"topic-classification_sid_fscore":0.6571711899,"topic-classification_sid_valid_output_ratio":0.992,"nlu_score":0.7050532433}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.412,"topic-classification_sid_precision_modified":0.3819473808,"topic-classification_sid_recall_modified":0.2194110821,"topic-classification_sid_fscore_modified":0.166159266,"topic-classification_sid_acc":0.4735632184,"topic-classification_sid_precision":0.439019978,"topic-classification_sid_recall":0.2521966461,"topic-classification_sid_fscore":0.190987662,"topic-classification_sid_valid_output_ratio":0.87,"nlu_score":0.531045981}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.462,"topic-classification_sid_precision_modified":0.4918469172,"topic-classification_sid_recall_modified":0.3261812324,"topic-classification_sid_fscore_modified":0.304777991,"topic-classification_sid_acc":0.5191011236,"topic-classification_sid_precision":0.5526369856,"topic-classification_sid_recall":0.3664957667,"topic-classification_sid_fscore":0.3424471809,"topic-classification_sid_valid_output_ratio":0.89,"nlu_score":0.6262096694}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.56,"topic-classification_sid_precision_modified":0.5309838171,"topic-classification_sid_recall_modified":0.4706044677,"topic-classification_sid_fscore_modified":0.484170357,"topic-classification_sid_acc":0.5702647658,"topic-classification_sid_precision":0.5407167181,"topic-classification_sid_recall":0.4792306188,"topic-classification_sid_fscore":0.4930451701,"topic-classification_sid_valid_output_ratio":0.982,"nlu_score":0.5968415875}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.078,"topic-classification_sid_precision_modified":0.1626278832,"topic-classification_sid_recall_modified":0.0869379377,"topic-classification_sid_fscore_modified":0.061595189,"topic-classification_sid_acc":0.1211180124,"topic-classification_sid_precision":0.2525277689,"topic-classification_sid_recall":0.1349967977,"topic-classification_sid_fscore":0.0956447035,"topic-classification_sid_valid_output_ratio":0.644,"nlu_score":0.3916645306}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.756,"topic-classification_sid_precision_modified":0.6530505866,"topic-classification_sid_recall_modified":0.6684817133,"topic-classification_sid_fscore_modified":0.6358572631,"topic-classification_sid_acc":0.756,"topic-classification_sid_precision":0.6530505866,"topic-classification_sid_recall":0.6684817133,"topic-classification_sid_fscore":0.6358572631,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.608,"topic-classification_sid_precision_modified":0.5971774069,"topic-classification_sid_recall_modified":0.5095088497,"topic-classification_sid_fscore_modified":0.5160494942,"topic-classification_sid_acc":0.6333333333,"topic-classification_sid_precision":0.6220597988,"topic-classification_sid_recall":0.5307383851,"topic-classification_sid_fscore":0.5375515565,"topic-classification_sid_valid_output_ratio":0.96,"nlu_score":0.6361186163}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.586,"topic-classification_sid_precision_modified":0.5883032084,"topic-classification_sid_recall_modified":0.4720717732,"topic-classification_sid_fscore_modified":0.4937437004,"topic-classification_sid_acc":0.586,"topic-classification_sid_precision":0.5883032084,"topic-classification_sid_recall":0.4720717732,"topic-classification_sid_fscore":0.4937437004,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6255818412}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.018,"topic-classification_sid_precision_modified":0.1357154412,"topic-classification_sid_recall_modified":0.1259808206,"topic-classification_sid_fscore_modified":0.0108903706,"topic-classification_sid_acc":0.0184804928,"topic-classification_sid_precision":0.1393382353,"topic-classification_sid_recall":0.1293437584,"topic-classification_sid_fscore":0.0111810786,"topic-classification_sid_valid_output_ratio":0.974,"nlu_score":0.1368924446}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.0,"topic-classification_sid_precision_modified":0.0,"topic-classification_sid_recall_modified":0.0,"topic-classification_sid_fscore_modified":0.0,"topic-classification_sid_acc":0.0,"topic-classification_sid_precision":0.0,"topic-classification_sid_recall":0.0,"topic-classification_sid_fscore":0.0,"topic-classification_sid_valid_output_ratio":0.0,"nlu_score":0.046805056}
|
leaderboard/boards_data/translation-ar2fa_ar2fa.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1471879954,"translation-ar2fa_ar2fa_sahife_bleu":0.1294214814,"translation-ar2fa_ar2fa_nahj_bleu":0.0642841927,"translation-ar2fa_ar2fa_quran_bleu":0.2437131219,"nlg_score":0.1779340777}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0186923531,"translation-ar2fa_ar2fa_sahife_bleu":0.0174521967,"translation-ar2fa_ar2fa_nahj_bleu":0.0097734226,"translation-ar2fa_ar2fa_quran_bleu":0.0284054936,"nlg_score":0.0949943578}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1414109272,"translation-ar2fa_ar2fa_sahife_bleu":0.136408042,"translation-ar2fa_ar2fa_nahj_bleu":0.0653197648,"translation-ar2fa_ar2fa_quran_bleu":0.2187004167,"nlg_score":0.1880477876}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1534130086,"translation-ar2fa_ar2fa_sahife_bleu":0.1250461134,"translation-ar2fa_ar2fa_nahj_bleu":0.0624466634,"translation-ar2fa_ar2fa_quran_bleu":0.2681979318,"nlg_score":0.194675133}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":null,"translation-ar2fa_ar2fa_sahife_bleu":null,"translation-ar2fa_ar2fa_nahj_bleu":null,"translation-ar2fa_ar2fa_quran_bleu":null,"nlg_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.032619077,"translation-ar2fa_ar2fa_sahife_bleu":0.0333185867,"translation-ar2fa_ar2fa_nahj_bleu":0.0106299838,"translation-ar2fa_ar2fa_quran_bleu":0.0528092057,"nlg_score":0.1196804312}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0465792583,"translation-ar2fa_ar2fa_sahife_bleu":0.023795336,"translation-ar2fa_ar2fa_nahj_bleu":0.0121091058,"translation-ar2fa_ar2fa_quran_bleu":0.1021098256,"nlg_score":0.1067134448}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0910450298,"translation-ar2fa_ar2fa_sahife_bleu":0.0862679894,"translation-ar2fa_ar2fa_nahj_bleu":0.0558129824,"translation-ar2fa_ar2fa_quran_bleu":0.1292925153,"nlg_score":0.16056333}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0985860814,"translation-ar2fa_ar2fa_sahife_bleu":0.0857687109,"translation-ar2fa_ar2fa_nahj_bleu":0.0622600203,"translation-ar2fa_ar2fa_quran_bleu":0.1459132099,"nlg_score":0.1679338638}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.0581992714,"translation-ar2fa_ar2fa_sahife_bleu":0.0540221076,"translation-ar2fa_ar2fa_nahj_bleu":0.0233017704,"translation-ar2fa_ar2fa_quran_bleu":0.095529061,"nlg_score":0.1089333827}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0594554675,"translation-ar2fa_ar2fa_sahife_bleu":0.0539986603,"translation-ar2fa_ar2fa_nahj_bleu":0.035240584,"translation-ar2fa_ar2fa_quran_bleu":0.0879164142,"nlg_score":0.1319091735}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0302818033,"translation-ar2fa_ar2fa_sahife_bleu":0.0272381325,"translation-ar2fa_ar2fa_nahj_bleu":0.0129029913,"translation-ar2fa_ar2fa_quran_bleu":0.0498353456,"nlg_score":0.0934094344}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0636385541,"translation-ar2fa_ar2fa_sahife_bleu":0.0557180428,"translation-ar2fa_ar2fa_nahj_bleu":0.0539968488,"translation-ar2fa_ar2fa_quran_bleu":0.0807186853,"nlg_score":0.1389297212}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.01007549,"translation-ar2fa_ar2fa_sahife_bleu":0.0116017776,"translation-ar2fa_ar2fa_nahj_bleu":0.0067782437,"translation-ar2fa_ar2fa_quran_bleu":0.0116815864,"nlg_score":0.0682994522}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0608470749,"translation-ar2fa_ar2fa_sahife_bleu":0.0636783644,"translation-ar2fa_ar2fa_nahj_bleu":0.0258604511,"translation-ar2fa_ar2fa_quran_bleu":0.091253078,"nlg_score":0.1196400535}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1302111402,"translation-ar2fa_ar2fa_sahife_bleu":0.1104606951,"translation-ar2fa_ar2fa_nahj_bleu":0.0742081609,"translation-ar2fa_ar2fa_quran_bleu":0.2031644157,"nlg_score":0.2010896964}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.143500711,"translation-ar2fa_ar2fa_sahife_bleu":0.1221294429,"translation-ar2fa_ar2fa_nahj_bleu":0.069521493,"translation-ar2fa_ar2fa_quran_bleu":0.235152236,"nlg_score":0.1901206806}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1397574972,"translation-ar2fa_ar2fa_sahife_bleu":0.1273211367,"translation-ar2fa_ar2fa_nahj_bleu":0.0658485892,"translation-ar2fa_ar2fa_quran_bleu":0.2224073202,"nlg_score":0.1810678527}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":null,"translation-ar2fa_ar2fa_sahife_bleu":null,"translation-ar2fa_ar2fa_nahj_bleu":null,"translation-ar2fa_ar2fa_quran_bleu":null,"nlg_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.15661924,"translation-ar2fa_ar2fa_sahife_bleu":0.1122809429,"translation-ar2fa_ar2fa_nahj_bleu":0.0629397909,"translation-ar2fa_ar2fa_quran_bleu":0.2899530138,"nlg_score":0.178231145}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1144863268,"translation-ar2fa_ar2fa_sahife_bleu":0.1190971594,"translation-ar2fa_ar2fa_nahj_bleu":0.0648109303,"translation-ar2fa_ar2fa_quran_bleu":0.157067121,"nlg_score":0.1641995602}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1315367808,"translation-ar2fa_ar2fa_sahife_bleu":0.1063921688,"translation-ar2fa_ar2fa_nahj_bleu":0.0642188893,"translation-ar2fa_ar2fa_quran_bleu":0.2206333896,"nlg_score":0.1665903777}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0821020713,"translation-ar2fa_ar2fa_sahife_bleu":0.0730469461,"translation-ar2fa_ar2fa_nahj_bleu":0.0579031327,"translation-ar2fa_ar2fa_quran_bleu":0.1141461882,"nlg_score":0.1557270864}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0352516229,"translation-ar2fa_ar2fa_sahife_bleu":0.031818336,"translation-ar2fa_ar2fa_nahj_bleu":0.0219225394,"translation-ar2fa_ar2fa_quran_bleu":0.0513475391,"nlg_score":0.0944140383}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1542520457,"translation-ar2fa_ar2fa_sahife_bleu":0.1283925803,"translation-ar2fa_ar2fa_nahj_bleu":0.0660434951,"translation-ar2fa_ar2fa_quran_bleu":0.2639096342,"nlg_score":0.18964968}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0334933514,"translation-ar2fa_ar2fa_sahife_bleu":0.0313812328,"translation-ar2fa_ar2fa_nahj_bleu":0.013862611,"translation-ar2fa_ar2fa_quran_bleu":0.0542546733,"nlg_score":0.0880621978}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.090408759,"translation-ar2fa_ar2fa_sahife_bleu":0.0778953352,"translation-ar2fa_ar2fa_nahj_bleu":0.0610049198,"translation-ar2fa_ar2fa_quran_bleu":0.13085583,"nlg_score":0.164118288}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0381647164,"translation-ar2fa_ar2fa_sahife_bleu":0.0517672982,"translation-ar2fa_ar2fa_nahj_bleu":0.0235396776,"translation-ar2fa_ar2fa_quran_bleu":0.0384559215,"nlg_score":0.1129755187}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0056487667,"translation-ar2fa_ar2fa_sahife_bleu":0.0084650778,"translation-ar2fa_ar2fa_nahj_bleu":0.0073044047,"translation-ar2fa_ar2fa_quran_bleu":0.0012595996,"nlg_score":0.0823387318}
|
leaderboard/boards_data/translation-en2fa_en2fa.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1799534349,"translation-en2fa_en2fa_epoque_bleu":0.4004213933,"translation-en2fa_en2fa_mizan_bleu":0.1703393716,"translation-en2fa_en2fa_quran_bleu":0.1225698669,"translation-en2fa_en2fa_sahife_bleu":0.0832764011,"translation-en2fa_en2fa_nahj_bleu":0.0439108113,"translation-en2fa_en2fa_tep_bleu":0.0595417592,"nlg_score":0.1779340777}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0366912467,"translation-en2fa_en2fa_epoque_bleu":0.0623359898,"translation-en2fa_en2fa_mizan_bleu":0.0442763597,"translation-en2fa_en2fa_quran_bleu":0.0309309044,"translation-en2fa_en2fa_sahife_bleu":0.0330663757,"translation-en2fa_en2fa_nahj_bleu":0.0124767847,"translation-en2fa_en2fa_tep_bleu":0.0116612774,"nlg_score":0.0949943578}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.2018158808,"translation-en2fa_en2fa_epoque_bleu":0.4332944681,"translation-en2fa_en2fa_mizan_bleu":0.1925182751,"translation-en2fa_en2fa_quran_bleu":0.1530925462,"translation-en2fa_en2fa_sahife_bleu":0.1026499453,"translation-en2fa_en2fa_nahj_bleu":0.051968827,"translation-en2fa_en2fa_tep_bleu":0.0708487287,"nlg_score":0.1880477876}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1974288311,"translation-en2fa_en2fa_epoque_bleu":0.4102902123,"translation-en2fa_en2fa_mizan_bleu":0.1898606624,"translation-en2fa_en2fa_quran_bleu":0.1638084791,"translation-en2fa_en2fa_sahife_bleu":0.1095493859,"translation-en2fa_en2fa_nahj_bleu":0.0487097316,"translation-en2fa_en2fa_tep_bleu":0.0737497745,"nlg_score":0.194675133}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":null,"translation-en2fa_en2fa_epoque_bleu":null,"translation-en2fa_en2fa_mizan_bleu":null,"translation-en2fa_en2fa_quran_bleu":null,"translation-en2fa_en2fa_sahife_bleu":null,"translation-en2fa_en2fa_nahj_bleu":null,"translation-en2fa_en2fa_tep_bleu":null,"nlg_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.08817485,"translation-en2fa_en2fa_epoque_bleu":0.1886801725,"translation-en2fa_en2fa_mizan_bleu":0.0879987558,"translation-en2fa_en2fa_quran_bleu":0.0657922023,"translation-en2fa_en2fa_sahife_bleu":0.0296141618,"translation-en2fa_en2fa_nahj_bleu":0.0192266597,"translation-en2fa_en2fa_tep_bleu":0.0366296874,"nlg_score":0.1196804312}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0439502467,"translation-en2fa_en2fa_epoque_bleu":0.0932804064,"translation-en2fa_en2fa_mizan_bleu":0.0446467932,"translation-en2fa_en2fa_quran_bleu":0.0435800727,"translation-en2fa_en2fa_sahife_bleu":0.0197005921,"translation-en2fa_en2fa_nahj_bleu":0.0132822652,"translation-en2fa_en2fa_tep_bleu":0.0087342692,"nlg_score":0.1067134448}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1535253787,"translation-en2fa_en2fa_epoque_bleu":0.3553678809,"translation-en2fa_en2fa_mizan_bleu":0.1285441922,"translation-en2fa_en2fa_quran_bleu":0.0857809616,"translation-en2fa_en2fa_sahife_bleu":0.0787025343,"translation-en2fa_en2fa_nahj_bleu":0.0404850935,"translation-en2fa_en2fa_tep_bleu":0.0586129062,"nlg_score":0.16056333}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1623218856,"translation-en2fa_en2fa_epoque_bleu":0.3677879105,"translation-en2fa_en2fa_mizan_bleu":0.147599732,"translation-en2fa_en2fa_quran_bleu":0.0938457658,"translation-en2fa_en2fa_sahife_bleu":0.0698903005,"translation-en2fa_en2fa_nahj_bleu":0.0435129812,"translation-en2fa_en2fa_tep_bleu":0.0620337306,"nlg_score":0.1679338638}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.056370201,"translation-en2fa_en2fa_epoque_bleu":0.1154392548,"translation-en2fa_en2fa_mizan_bleu":0.0484324583,"translation-en2fa_en2fa_quran_bleu":0.0612465488,"translation-en2fa_en2fa_sahife_bleu":0.0466818991,"translation-en2fa_en2fa_nahj_bleu":0.0218444477,"translation-en2fa_en2fa_tep_bleu":0.0118186665,"nlg_score":0.1089333827}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1458447378,"translation-en2fa_en2fa_epoque_bleu":0.3541508677,"translation-en2fa_en2fa_mizan_bleu":0.1259468635,"translation-en2fa_en2fa_quran_bleu":0.0887225632,"translation-en2fa_en2fa_sahife_bleu":0.0672732746,"translation-en2fa_en2fa_nahj_bleu":0.0407327793,"translation-en2fa_en2fa_tep_bleu":0.0293172873,"nlg_score":0.1319091735}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0438887609,"translation-en2fa_en2fa_epoque_bleu":0.0714603918,"translation-en2fa_en2fa_mizan_bleu":0.0595250407,"translation-en2fa_en2fa_quran_bleu":0.0428487402,"translation-en2fa_en2fa_sahife_bleu":0.0258372032,"translation-en2fa_en2fa_nahj_bleu":0.0133722454,"translation-en2fa_en2fa_tep_bleu":0.0142899909,"nlg_score":0.0934094344}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1217211215,"translation-en2fa_en2fa_epoque_bleu":0.2916268514,"translation-en2fa_en2fa_mizan_bleu":0.091925603,"translation-en2fa_en2fa_quran_bleu":0.065498518,"translation-en2fa_en2fa_sahife_bleu":0.0612237455,"translation-en2fa_en2fa_nahj_bleu":0.0385824628,"translation-en2fa_en2fa_tep_bleu":0.0453883692,"nlg_score":0.1389297212}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0460704645,"translation-en2fa_en2fa_epoque_bleu":0.1309820272,"translation-en2fa_en2fa_mizan_bleu":0.0316650952,"translation-en2fa_en2fa_quran_bleu":0.0134401079,"translation-en2fa_en2fa_sahife_bleu":0.0141114981,"translation-en2fa_en2fa_nahj_bleu":0.0127654414,"translation-en2fa_en2fa_tep_bleu":0.0065463218,"nlg_score":0.0682994522}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0951102411,"translation-en2fa_en2fa_epoque_bleu":0.2204131973,"translation-en2fa_en2fa_mizan_bleu":0.0772021612,"translation-en2fa_en2fa_quran_bleu":0.0914129011,"translation-en2fa_en2fa_sahife_bleu":0.0555605793,"translation-en2fa_en2fa_nahj_bleu":0.0296371925,"translation-en2fa_en2fa_tep_bleu":0.0145962694,"nlg_score":0.1196400535}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.2019260724,"translation-en2fa_en2fa_epoque_bleu":0.4752747269,"translation-en2fa_en2fa_mizan_bleu":0.165706346,"translation-en2fa_en2fa_quran_bleu":0.1194336982,"translation-en2fa_en2fa_sahife_bleu":0.0819129449,"translation-en2fa_en2fa_nahj_bleu":0.0545857968,"translation-en2fa_en2fa_tep_bleu":0.0782996247,"nlg_score":0.2010896964}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1979467916,"translation-en2fa_en2fa_epoque_bleu":0.4460981632,"translation-en2fa_en2fa_mizan_bleu":0.1745376389,"translation-en2fa_en2fa_quran_bleu":0.137406774,"translation-en2fa_en2fa_sahife_bleu":0.091586235,"translation-en2fa_en2fa_nahj_bleu":0.0490159552,"translation-en2fa_en2fa_tep_bleu":0.072776086,"nlg_score":0.1901206806}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.2014428857,"translation-en2fa_en2fa_epoque_bleu":0.4710672433,"translation-en2fa_en2fa_mizan_bleu":0.1830885263,"translation-en2fa_en2fa_quran_bleu":0.1141518863,"translation-en2fa_en2fa_sahife_bleu":0.0806159411,"translation-en2fa_en2fa_nahj_bleu":0.0504089542,"translation-en2fa_en2fa_tep_bleu":0.0648627292,"nlg_score":0.1810678527}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":null,"translation-en2fa_en2fa_epoque_bleu":null,"translation-en2fa_en2fa_mizan_bleu":null,"translation-en2fa_en2fa_quran_bleu":null,"translation-en2fa_en2fa_sahife_bleu":null,"translation-en2fa_en2fa_nahj_bleu":null,"translation-en2fa_en2fa_tep_bleu":null,"nlg_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1718324934,"translation-en2fa_en2fa_epoque_bleu":0.364783925,"translation-en2fa_en2fa_mizan_bleu":0.1532613543,"translation-en2fa_en2fa_quran_bleu":0.1620975016,"translation-en2fa_en2fa_sahife_bleu":0.0967871625,"translation-en2fa_en2fa_nahj_bleu":0.0457580774,"translation-en2fa_en2fa_tep_bleu":0.05756103,"nlg_score":0.178231145}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1506934102,"translation-en2fa_en2fa_epoque_bleu":0.2951032905,"translation-en2fa_en2fa_mizan_bleu":0.1500681264,"translation-en2fa_en2fa_quran_bleu":0.1104277702,"translation-en2fa_en2fa_sahife_bleu":0.092222972,"translation-en2fa_en2fa_nahj_bleu":0.0497623005,"translation-en2fa_en2fa_tep_bleu":0.0692905167,"nlg_score":0.1641995602}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1831593088,"translation-en2fa_en2fa_epoque_bleu":0.4052150706,"translation-en2fa_en2fa_mizan_bleu":0.1692823494,"translation-en2fa_en2fa_quran_bleu":0.1400476579,"translation-en2fa_en2fa_sahife_bleu":0.0812805634,"translation-en2fa_en2fa_nahj_bleu":0.048146149,"translation-en2fa_en2fa_tep_bleu":0.0610881446,"nlg_score":0.1665903777}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1550276898,"translation-en2fa_en2fa_epoque_bleu":0.3721582216,"translation-en2fa_en2fa_mizan_bleu":0.1231599039,"translation-en2fa_en2fa_quran_bleu":0.0882213453,"translation-en2fa_en2fa_sahife_bleu":0.0725213197,"translation-en2fa_en2fa_nahj_bleu":0.0424186358,"translation-en2fa_en2fa_tep_bleu":0.0528718634,"nlg_score":0.1557270864}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0472831089,"translation-en2fa_en2fa_epoque_bleu":0.0950858392,"translation-en2fa_en2fa_mizan_bleu":0.0348348322,"translation-en2fa_en2fa_quran_bleu":0.0417444578,"translation-en2fa_en2fa_sahife_bleu":0.044168541,"translation-en2fa_en2fa_nahj_bleu":0.0239185439,"translation-en2fa_en2fa_tep_bleu":0.0188699837,"nlg_score":0.0944140383}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.2099911906,"translation-en2fa_en2fa_epoque_bleu":0.4805793807,"translation-en2fa_en2fa_mizan_bleu":0.1904867707,"translation-en2fa_en2fa_quran_bleu":0.1412389522,"translation-en2fa_en2fa_sahife_bleu":0.0861059288,"translation-en2fa_en2fa_nahj_bleu":0.0528683421,"translation-en2fa_en2fa_tep_bleu":0.0688528109,"nlg_score":0.18964968}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0409401575,"translation-en2fa_en2fa_epoque_bleu":0.0902479461,"translation-en2fa_en2fa_mizan_bleu":0.0327725294,"translation-en2fa_en2fa_quran_bleu":0.0443958388,"translation-en2fa_en2fa_sahife_bleu":0.0278897851,"translation-en2fa_en2fa_nahj_bleu":0.0148027555,"translation-en2fa_en2fa_tep_bleu":0.0071499459,"nlg_score":0.0880621978}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1617787549,"translation-en2fa_en2fa_epoque_bleu":0.3821529147,"translation-en2fa_en2fa_mizan_bleu":0.1337537913,"translation-en2fa_en2fa_quran_bleu":0.0860909143,"translation-en2fa_en2fa_sahife_bleu":0.0770506908,"translation-en2fa_en2fa_nahj_bleu":0.0441728515,"translation-en2fa_en2fa_tep_bleu":0.0587014819,"nlg_score":0.164118288}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1074044673,"translation-en2fa_en2fa_epoque_bleu":0.2689676347,"translation-en2fa_en2fa_mizan_bleu":0.0784179406,"translation-en2fa_en2fa_quran_bleu":0.0573255404,"translation-en2fa_en2fa_sahife_bleu":0.0534655564,"translation-en2fa_en2fa_nahj_bleu":0.0373749355,"translation-en2fa_en2fa_tep_bleu":0.0279497965,"nlg_score":0.1129755187}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0373710986,"translation-en2fa_en2fa_epoque_bleu":0.0773774592,"translation-en2fa_en2fa_mizan_bleu":0.034234366,"translation-en2fa_en2fa_quran_bleu":0.0258474786,"translation-en2fa_en2fa_sahife_bleu":0.0240302635,"translation-en2fa_en2fa_nahj_bleu":0.0149718554,"translation-en2fa_en2fa_tep_bleu":0.0146400693,"nlg_score":0.0823387318}
|
leaderboard/boards_data/translation-fa2ar_fa2ar.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0761269879,"translation-fa2ar_fa2ar_nahj_bleu":0.0321440801,"translation-fa2ar_fa2ar_sahife_bleu":0.0613632957,"translation-fa2ar_fa2ar_quran_bleu":0.134873588,"nlg_score":0.1779340777}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0067928767,"translation-fa2ar_fa2ar_nahj_bleu":0.0056689454,"translation-fa2ar_fa2ar_sahife_bleu":0.009024465,"translation-fa2ar_fa2ar_quran_bleu":0.0056852198,"nlg_score":0.0949943578}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0922998074,"translation-fa2ar_fa2ar_nahj_bleu":0.0511154919,"translation-fa2ar_fa2ar_sahife_bleu":0.0589808221,"translation-fa2ar_fa2ar_quran_bleu":0.1668031083,"nlg_score":0.1880477876}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0827618418,"translation-fa2ar_fa2ar_nahj_bleu":0.038434531,"translation-fa2ar_fa2ar_sahife_bleu":0.0781455938,"translation-fa2ar_fa2ar_quran_bleu":0.1317054007,"nlg_score":0.194675133}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":null,"translation-fa2ar_fa2ar_nahj_bleu":null,"translation-fa2ar_fa2ar_sahife_bleu":null,"translation-fa2ar_fa2ar_quran_bleu":null,"nlg_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0098333287,"translation-fa2ar_fa2ar_nahj_bleu":0.0072190824,"translation-fa2ar_fa2ar_sahife_bleu":0.0110570977,"translation-fa2ar_fa2ar_quran_bleu":0.0112238061,"nlg_score":0.1196804312}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0100630648,"translation-fa2ar_fa2ar_nahj_bleu":0.0071647909,"translation-fa2ar_fa2ar_sahife_bleu":0.0101185743,"translation-fa2ar_fa2ar_quran_bleu":0.0129058292,"nlg_score":0.1067134448}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0494411806,"translation-fa2ar_fa2ar_nahj_bleu":0.0369805868,"translation-fa2ar_fa2ar_sahife_bleu":0.0567654991,"translation-fa2ar_fa2ar_quran_bleu":0.0545774559,"nlg_score":0.16056333}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0507003531,"translation-fa2ar_fa2ar_nahj_bleu":0.0316047659,"translation-fa2ar_fa2ar_sahife_bleu":0.0534488007,"translation-fa2ar_fa2ar_quran_bleu":0.0670474926,"nlg_score":0.1679338638}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0331262129,"translation-fa2ar_fa2ar_nahj_bleu":0.0202107323,"translation-fa2ar_fa2ar_sahife_bleu":0.0280883311,"translation-fa2ar_fa2ar_quran_bleu":0.0510795752,"nlg_score":0.1089333827}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0192357288,"translation-fa2ar_fa2ar_nahj_bleu":0.0151369319,"translation-fa2ar_fa2ar_sahife_bleu":0.0245784397,"translation-fa2ar_fa2ar_quran_bleu":0.0179918148,"nlg_score":0.1319091735}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0198691873,"translation-fa2ar_fa2ar_nahj_bleu":0.0113771734,"translation-fa2ar_fa2ar_sahife_bleu":0.0154846482,"translation-fa2ar_fa2ar_quran_bleu":0.0327457404,"nlg_score":0.0934094344}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0351351131,"translation-fa2ar_fa2ar_nahj_bleu":0.0313503027,"translation-fa2ar_fa2ar_sahife_bleu":0.042075565,"translation-fa2ar_fa2ar_quran_bleu":0.0319794715,"nlg_score":0.1389297212}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0045158138,"translation-fa2ar_fa2ar_nahj_bleu":0.004600061,"translation-fa2ar_fa2ar_sahife_bleu":0.0052362431,"translation-fa2ar_fa2ar_quran_bleu":0.0037111373,"nlg_score":0.0682994522}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0318976507,"translation-fa2ar_fa2ar_nahj_bleu":0.0222927973,"translation-fa2ar_fa2ar_sahife_bleu":0.0296757253,"translation-fa2ar_fa2ar_quran_bleu":0.0437244293,"nlg_score":0.1196400535}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0860361971,"translation-fa2ar_fa2ar_nahj_bleu":0.0440530096,"translation-fa2ar_fa2ar_sahife_bleu":0.0833828112,"translation-fa2ar_fa2ar_quran_bleu":0.1306727704,"nlg_score":0.2010896964}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0689994984,"translation-fa2ar_fa2ar_nahj_bleu":0.0397020785,"translation-fa2ar_fa2ar_sahife_bleu":0.0751264317,"translation-fa2ar_fa2ar_quran_bleu":0.092169985,"nlg_score":0.1901206806}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0652599666,"translation-fa2ar_fa2ar_nahj_bleu":0.0373134355,"translation-fa2ar_fa2ar_sahife_bleu":0.0688517527,"translation-fa2ar_fa2ar_quran_bleu":0.0896147118,"nlg_score":0.1810678527}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":null,"translation-fa2ar_fa2ar_nahj_bleu":null,"translation-fa2ar_fa2ar_sahife_bleu":null,"translation-fa2ar_fa2ar_quran_bleu":null,"nlg_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.079257203,"translation-fa2ar_fa2ar_nahj_bleu":0.0338415847,"translation-fa2ar_fa2ar_sahife_bleu":0.0570744002,"translation-fa2ar_fa2ar_quran_bleu":0.146855624,"nlg_score":0.178231145}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0852951832,"translation-fa2ar_fa2ar_nahj_bleu":0.0464072569,"translation-fa2ar_fa2ar_sahife_bleu":0.0713426227,"translation-fa2ar_fa2ar_quran_bleu":0.1381356701,"nlg_score":0.1641995602}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0568324844,"translation-fa2ar_fa2ar_nahj_bleu":0.03267488,"translation-fa2ar_fa2ar_sahife_bleu":0.0579381183,"translation-fa2ar_fa2ar_quran_bleu":0.0798844549,"nlg_score":0.1665903777}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0423318046,"translation-fa2ar_fa2ar_nahj_bleu":0.0329089717,"translation-fa2ar_fa2ar_sahife_bleu":0.0445101244,"translation-fa2ar_fa2ar_quran_bleu":0.0495763178,"nlg_score":0.1557270864}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0167121698,"translation-fa2ar_fa2ar_nahj_bleu":0.0182214992,"translation-fa2ar_fa2ar_sahife_bleu":0.0203567578,"translation-fa2ar_fa2ar_quran_bleu":0.0115582526,"nlg_score":0.0944140383}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0656699851,"translation-fa2ar_fa2ar_nahj_bleu":0.0347167128,"translation-fa2ar_fa2ar_sahife_bleu":0.0732417084,"translation-fa2ar_fa2ar_quran_bleu":0.0890515341,"nlg_score":0.18964968}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0198485582,"translation-fa2ar_fa2ar_nahj_bleu":0.0111873845,"translation-fa2ar_fa2ar_sahife_bleu":0.015856468,"translation-fa2ar_fa2ar_quran_bleu":0.032501822,"nlg_score":0.0880621978}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0483297895,"translation-fa2ar_fa2ar_nahj_bleu":0.0310247441,"translation-fa2ar_fa2ar_sahife_bleu":0.0512375201,"translation-fa2ar_fa2ar_quran_bleu":0.0627271043,"nlg_score":0.164118288}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0136530376,"translation-fa2ar_fa2ar_nahj_bleu":0.0110489285,"translation-fa2ar_fa2ar_sahife_bleu":0.0135009036,"translation-fa2ar_fa2ar_quran_bleu":0.0164092807,"nlg_score":0.1129755187}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0164489799,"translation-fa2ar_fa2ar_nahj_bleu":0.0152537955,"translation-fa2ar_fa2ar_sahife_bleu":0.0220286512,"translation-fa2ar_fa2ar_quran_bleu":0.012064493,"nlg_score":0.0823387318}
|
leaderboard/boards_data/translation-fa2en_fa2en.jsonl
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2247897554,"translation-fa2en_fa2en_tep_bleu":0.1341840946,"translation-fa2en_fa2en_mizan_bleu":0.1909021288,"translation-fa2en_fa2en_quran_bleu":0.1740971535,"translation-fa2en_fa2en_epoque_bleu":0.4544315204,"translation-fa2en_fa2en_nahj_bleu":0.0877235615,"translation-fa2en_fa2en_sahife_bleu":0.0975791022,"nlg_score":0.1779340777}
|
2 |
+
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.016856047,"translation-fa2en_fa2en_tep_bleu":0.0085125001,"translation-fa2en_fa2en_mizan_bleu":0.013661635,"translation-fa2en_fa2en_quran_bleu":0.0181666202,"translation-fa2en_fa2en_epoque_bleu":0.0301282339,"translation-fa2en_fa2en_nahj_bleu":0.0122360126,"translation-fa2en_fa2en_sahife_bleu":0.0110323989,"nlg_score":0.0949943578}
|
3 |
+
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2337569687,"translation-fa2en_fa2en_tep_bleu":0.1386371644,"translation-fa2en_fa2en_mizan_bleu":0.2129637469,"translation-fa2en_fa2en_quran_bleu":0.1702102457,"translation-fa2en_fa2en_epoque_bleu":0.478211182,"translation-fa2en_fa2en_nahj_bleu":0.083013513,"translation-fa2en_fa2en_sahife_bleu":0.072000292,"nlg_score":0.1880477876}
|
4 |
+
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2307102128,"translation-fa2en_fa2en_tep_bleu":0.1527807458,"translation-fa2en_fa2en_mizan_bleu":0.1927067243,"translation-fa2en_fa2en_quran_bleu":0.1628198329,"translation-fa2en_fa2en_epoque_bleu":0.4676472481,"translation-fa2en_fa2en_nahj_bleu":0.0810494281,"translation-fa2en_fa2en_sahife_bleu":0.1009417344,"nlg_score":0.194675133}
|
5 |
+
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":null,"translation-fa2en_fa2en_tep_bleu":null,"translation-fa2en_fa2en_mizan_bleu":null,"translation-fa2en_fa2en_quran_bleu":null,"translation-fa2en_fa2en_epoque_bleu":null,"translation-fa2en_fa2en_nahj_bleu":null,"translation-fa2en_fa2en_sahife_bleu":null,"nlg_score":null}
|
6 |
+
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0257184881,"translation-fa2en_fa2en_tep_bleu":0.011593122,"translation-fa2en_fa2en_mizan_bleu":0.0215328963,"translation-fa2en_fa2en_quran_bleu":0.0262056878,"translation-fa2en_fa2en_epoque_bleu":0.047221295,"translation-fa2en_fa2en_nahj_bleu":0.0178557856,"translation-fa2en_fa2en_sahife_bleu":0.0169922826,"nlg_score":0.1196804312}
|
7 |
+
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0199585579,"translation-fa2en_fa2en_tep_bleu":0.0097804397,"translation-fa2en_fa2en_mizan_bleu":0.0144809896,"translation-fa2en_fa2en_quran_bleu":0.0259691427,"translation-fa2en_fa2en_epoque_bleu":0.0345304173,"translation-fa2en_fa2en_nahj_bleu":0.0150589625,"translation-fa2en_fa2en_sahife_bleu":0.0157047184,"nlg_score":0.1067134448}
|
8 |
+
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2145488085,"translation-fa2en_fa2en_tep_bleu":0.1307272464,"translation-fa2en_fa2en_mizan_bleu":0.1697754862,"translation-fa2en_fa2en_quran_bleu":0.1552415558,"translation-fa2en_fa2en_epoque_bleu":0.4513682579,"translation-fa2en_fa2en_nahj_bleu":0.0842673472,"translation-fa2en_fa2en_sahife_bleu":0.0853787118,"nlg_score":0.16056333}
|
9 |
+
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.217991447,"translation-fa2en_fa2en_tep_bleu":0.1271542443,"translation-fa2en_fa2en_mizan_bleu":0.1728081337,"translation-fa2en_fa2en_quran_bleu":0.158860515,"translation-fa2en_fa2en_epoque_bleu":0.4572670962,"translation-fa2en_fa2en_nahj_bleu":0.0902445729,"translation-fa2en_fa2en_sahife_bleu":0.0945000287,"nlg_score":0.1679338638}
|
10 |
+
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0691353117,"translation-fa2en_fa2en_tep_bleu":0.0320908261,"translation-fa2en_fa2en_mizan_bleu":0.0535229905,"translation-fa2en_fa2en_quran_bleu":0.0800143919,"translation-fa2en_fa2en_epoque_bleu":0.133977443,"translation-fa2en_fa2en_nahj_bleu":0.0362958954,"translation-fa2en_fa2en_sahife_bleu":0.0393317574,"nlg_score":0.1089333827}
|
11 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1451163884,"translation-fa2en_fa2en_tep_bleu":0.0393307601,"translation-fa2en_fa2en_mizan_bleu":0.1009347025,"translation-fa2en_fa2en_quran_bleu":0.0929688918,"translation-fa2en_fa2en_epoque_bleu":0.3660914464,"translation-fa2en_fa2en_nahj_bleu":0.0536507876,"translation-fa2en_fa2en_sahife_bleu":0.05038339,"nlg_score":0.1319091735}
|
12 |
+
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0450244679,"translation-fa2en_fa2en_tep_bleu":0.0166138322,"translation-fa2en_fa2en_mizan_bleu":0.0478141187,"translation-fa2en_fa2en_quran_bleu":0.0426202225,"translation-fa2en_fa2en_epoque_bleu":0.0802277942,"translation-fa2en_fa2en_nahj_bleu":0.0252662094,"translation-fa2en_fa2en_sahife_bleu":0.0268950031,"nlg_score":0.0934094344}
|
13 |
+
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1840809218,"translation-fa2en_fa2en_tep_bleu":0.1011436783,"translation-fa2en_fa2en_mizan_bleu":0.149157222,"translation-fa2en_fa2en_quran_bleu":0.1377761662,"translation-fa2en_fa2en_epoque_bleu":0.3802946233,"translation-fa2en_fa2en_nahj_bleu":0.0851756367,"translation-fa2en_fa2en_sahife_bleu":0.0857201524,"nlg_score":0.1389297212}
|
14 |
+
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0146059874,"translation-fa2en_fa2en_tep_bleu":0.0065306354,"translation-fa2en_fa2en_mizan_bleu":0.0119363121,"translation-fa2en_fa2en_quran_bleu":0.0152281808,"translation-fa2en_fa2en_epoque_bleu":0.0274143056,"translation-fa2en_fa2en_nahj_bleu":0.0094070307,"translation-fa2en_fa2en_sahife_bleu":0.0093811964,"nlg_score":0.0682994522}
|
15 |
+
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.14443262,"translation-fa2en_fa2en_tep_bleu":0.0636878051,"translation-fa2en_fa2en_mizan_bleu":0.1045784226,"translation-fa2en_fa2en_quran_bleu":0.1065169191,"translation-fa2en_fa2en_epoque_bleu":0.3331896819,"translation-fa2en_fa2en_nahj_bleu":0.0573420672,"translation-fa2en_fa2en_sahife_bleu":0.0526154809,"nlg_score":0.1196400535}
|
16 |
+
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2559078555,"translation-fa2en_fa2en_tep_bleu":0.1687480056,"translation-fa2en_fa2en_mizan_bleu":0.2113676707,"translation-fa2en_fa2en_quran_bleu":0.2008290856,"translation-fa2en_fa2en_epoque_bleu":0.5099219192,"translation-fa2en_fa2en_nahj_bleu":0.0984185664,"translation-fa2en_fa2en_sahife_bleu":0.1125739279,"nlg_score":0.2010896964}
|
17 |
+
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2389011537,"translation-fa2en_fa2en_tep_bleu":0.1431825698,"translation-fa2en_fa2en_mizan_bleu":0.2056729072,"translation-fa2en_fa2en_quran_bleu":0.1776018574,"translation-fa2en_fa2en_epoque_bleu":0.4842161688,"translation-fa2en_fa2en_nahj_bleu":0.0886384727,"translation-fa2en_fa2en_sahife_bleu":0.1045044839,"nlg_score":0.1901206806}
|
18 |
+
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2332592983,"translation-fa2en_fa2en_tep_bleu":0.1497847918,"translation-fa2en_fa2en_mizan_bleu":0.1972270386,"translation-fa2en_fa2en_quran_bleu":0.1725699648,"translation-fa2en_fa2en_epoque_bleu":0.4678973942,"translation-fa2en_fa2en_nahj_bleu":0.090543674,"translation-fa2en_fa2en_sahife_bleu":0.1008380909,"nlg_score":0.1810678527}
|
19 |
+
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":null,"translation-fa2en_fa2en_tep_bleu":null,"translation-fa2en_fa2en_mizan_bleu":null,"translation-fa2en_fa2en_quran_bleu":null,"translation-fa2en_fa2en_epoque_bleu":null,"translation-fa2en_fa2en_nahj_bleu":null,"translation-fa2en_fa2en_sahife_bleu":null,"nlg_score":null}
|
20 |
+
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0757086487,"translation-fa2en_fa2en_tep_bleu":0.0316922994,"translation-fa2en_fa2en_mizan_bleu":0.0530331645,"translation-fa2en_fa2en_quran_bleu":0.1028139165,"translation-fa2en_fa2en_epoque_bleu":0.157367237,"translation-fa2en_fa2en_nahj_bleu":0.0336372263,"translation-fa2en_fa2en_sahife_bleu":0.0279485156,"nlg_score":0.178231145}
|
21 |
+
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1892370035,"translation-fa2en_fa2en_tep_bleu":0.1290684643,"translation-fa2en_fa2en_mizan_bleu":0.1721408901,"translation-fa2en_fa2en_quran_bleu":0.1736791408,"translation-fa2en_fa2en_epoque_bleu":0.346100597,"translation-fa2en_fa2en_nahj_bleu":0.0776400174,"translation-fa2en_fa2en_sahife_bleu":0.08279759,"nlg_score":0.1641995602}
|
22 |
+
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2165819036,"translation-fa2en_fa2en_tep_bleu":0.13491043,"translation-fa2en_fa2en_mizan_bleu":0.1810957829,"translation-fa2en_fa2en_quran_bleu":0.164168601,"translation-fa2en_fa2en_epoque_bleu":0.4383628208,"translation-fa2en_fa2en_nahj_bleu":0.0942939662,"translation-fa2en_fa2en_sahife_bleu":0.0827637394,"nlg_score":0.1665903777}
|
23 |
+
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2024225184,"translation-fa2en_fa2en_tep_bleu":0.1163127945,"translation-fa2en_fa2en_mizan_bleu":0.1649009947,"translation-fa2en_fa2en_quran_bleu":0.1513328968,"translation-fa2en_fa2en_epoque_bleu":0.4171232399,"translation-fa2en_fa2en_nahj_bleu":0.0857999462,"translation-fa2en_fa2en_sahife_bleu":0.0929479364,"nlg_score":0.1557270864}
|
24 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0901939948,"translation-fa2en_fa2en_tep_bleu":0.0521908916,"translation-fa2en_fa2en_mizan_bleu":0.0828690879,"translation-fa2en_fa2en_quran_bleu":0.0756298248,"translation-fa2en_fa2en_epoque_bleu":0.1645619674,"translation-fa2en_fa2en_nahj_bleu":0.048616237,"translation-fa2en_fa2en_sahife_bleu":0.0518842318,"nlg_score":0.0944140383}
|
25 |
+
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.234039473,"translation-fa2en_fa2en_tep_bleu":0.1597644653,"translation-fa2en_fa2en_mizan_bleu":0.1946759365,"translation-fa2en_fa2en_quran_bleu":0.1638938233,"translation-fa2en_fa2en_epoque_bleu":0.474760879,"translation-fa2en_fa2en_nahj_bleu":0.0825458621,"translation-fa2en_fa2en_sahife_bleu":0.0952634494,"nlg_score":0.18964968}
|
26 |
+
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0414094379,"translation-fa2en_fa2en_tep_bleu":0.019539618,"translation-fa2en_fa2en_mizan_bleu":0.0346087447,"translation-fa2en_fa2en_quran_bleu":0.0396858881,"translation-fa2en_fa2en_epoque_bleu":0.0798341141,"translation-fa2en_fa2en_nahj_bleu":0.0244191809,"translation-fa2en_fa2en_sahife_bleu":0.0231626908,"nlg_score":0.0880621978}
|
27 |
+
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2177785793,"translation-fa2en_fa2en_tep_bleu":0.1189948472,"translation-fa2en_fa2en_mizan_bleu":0.1793626928,"translation-fa2en_fa2en_quran_bleu":0.1718006478,"translation-fa2en_fa2en_epoque_bleu":0.4500382308,"translation-fa2en_fa2en_nahj_bleu":0.0836776138,"translation-fa2en_fa2en_sahife_bleu":0.1034067477,"nlg_score":0.164118288}
|
28 |
+
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0862123314,"translation-fa2en_fa2en_tep_bleu":0.0334491641,"translation-fa2en_fa2en_mizan_bleu":0.0758837027,"translation-fa2en_fa2en_quran_bleu":0.0892296624,"translation-fa2en_fa2en_epoque_bleu":0.1688644918,"translation-fa2en_fa2en_nahj_bleu":0.042819328,"translation-fa2en_fa2en_sahife_bleu":0.0473482715,"nlg_score":0.1129755187}
|
29 |
+
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0423299736,"translation-fa2en_fa2en_tep_bleu":0.0124774953,"translation-fa2en_fa2en_mizan_bleu":0.0314077643,"translation-fa2en_fa2en_quran_bleu":0.0294898862,"translation-fa2en_fa2en_epoque_bleu":0.1006673489,"translation-fa2en_fa2en_nahj_bleu":0.0117672852,"translation-fa2en_fa2en_sahife_bleu":0.0246608556,"nlg_score":0.0823387318}
|
leaderboard/leaderboard.py
ADDED
@@ -0,0 +1,605 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# leaderboard/leaderboard.py
|
2 |
+
import gradio as gr
|
3 |
+
import pandas as pd
|
4 |
+
import logging
|
5 |
+
from pathlib import Path
|
6 |
+
import yaml
|
7 |
+
from typing import Dict, List, Union, Optional, Any
|
8 |
+
import numpy as np
|
9 |
+
|
10 |
+
# --- Logging Setup ---
|
11 |
+
logging.basicConfig(
|
12 |
+
level=logging.INFO,
|
13 |
+
format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
|
14 |
+
)
|
15 |
+
logger = logging.getLogger(__name__)
|
16 |
+
|
17 |
+
# --- Path Definitions ---
|
18 |
+
LEADERBOARD_DIR = Path(__file__).resolve().parent
|
19 |
+
CONFIG_FILE_PATH = LEADERBOARD_DIR / "leaderboard_config.yaml"
|
20 |
+
DATA_DIR = LEADERBOARD_DIR / "boards_data"
|
21 |
+
|
22 |
+
class ColumnConfig:
|
23 |
+
|
24 |
+
def __init__(self, config_path: Path):
|
25 |
+
self.config_path = config_path
|
26 |
+
self.column_display_names_map: Dict[str, str] = {}
|
27 |
+
self.task_tab_names_map: Dict[str, str] = {}
|
28 |
+
|
29 |
+
default_task_tab_names = {
|
30 |
+
"all": "Overall", "mt_bench": "MT-Bench", "ifeval": "IFEval",
|
31 |
+
"MMLU": "MMLU", "persian_csr": "PerCoR",
|
32 |
+
"persian_nlg": "Persian NLG", "persian_nlu": "Persian NLU"
|
33 |
+
}
|
34 |
+
default_column_names = {
|
35 |
+
"Model Name": "Model", "model_url": "URL",
|
36 |
+
"parameters_count": "⚙️ Params", "source_type": "Source",
|
37 |
+
"Average": "Average", "Rank": "🏆 Rank", "score_mean": "score_mean (main)",
|
38 |
+
"strict_instruction_accuracy": "strict_instruction_accuracy (main)", "acc": "accuracy (main)",
|
39 |
+
"nlg_score": "nlg_score (main)", "nlu_score": "nlu_score (main)",
|
40 |
+
}
|
41 |
+
|
42 |
+
if self.config_path and self.config_path.exists():
|
43 |
+
try:
|
44 |
+
with open(self.config_path, 'r', encoding='utf-8') as f:
|
45 |
+
config = yaml.safe_load(f)
|
46 |
+
loaded_column_names = config.get('column_names', {})
|
47 |
+
self.column_display_names_map = {**default_column_names, **loaded_column_names}
|
48 |
+
loaded_task_names = config.get('task_display_names', {})
|
49 |
+
self.task_tab_names_map = {**default_task_tab_names, **loaded_task_names}
|
50 |
+
except Exception as e:
|
51 |
+
logger.error(f"Error loading UI name configurations from {self.config_path}: {e}. Using defaults.")
|
52 |
+
self.column_display_names_map = default_column_names
|
53 |
+
self.task_tab_names_map = default_task_tab_names
|
54 |
+
else:
|
55 |
+
logger.warning(f"UI Name configuration file '{self.config_path.name}' not found. Using defaults.")
|
56 |
+
self.column_display_names_map = default_column_names
|
57 |
+
self.task_tab_names_map = default_task_tab_names
|
58 |
+
|
59 |
+
def get_column_display_name(self, original_col_name: str) -> str:
|
60 |
+
return self.column_display_names_map.get(original_col_name, original_col_name.replace("_", " "))
|
61 |
+
|
62 |
+
def get_task_tab_name(self, task_key: str) -> str:
|
63 |
+
return self.task_tab_names_map.get(task_key, task_key.replace("_", " "))
|
64 |
+
|
65 |
+
def rename_dataframe_columns(self, df: pd.DataFrame) -> pd.DataFrame:
|
66 |
+
if df.empty: return df
|
67 |
+
rename_dict = {col: self.get_column_display_name(col) for col in df.columns}
|
68 |
+
return df.rename(columns=rename_dict)
|
69 |
+
|
70 |
+
|
71 |
+
class LeaderboardApp:
|
72 |
+
def __init__(self, config_path: Path):
|
73 |
+
self.config_path = config_path
|
74 |
+
self.column_config = ColumnConfig(config_path)
|
75 |
+
self.raw_dataframes: Dict[str, pd.DataFrame] = {}
|
76 |
+
self.model_display_configs: Dict[str, Dict[str, str]] = {}
|
77 |
+
|
78 |
+
self.model_identifier_column: str = "Model Name"
|
79 |
+
self.main_scores_map: Dict[str, str] = {}
|
80 |
+
self.allowed_null_columns_in_average: List[str] = ["Model Name", "model_url", "parameters_count", "source_type"]
|
81 |
+
self.tab_processing_order: List[str] = []
|
82 |
+
self.numeric_score_columns_for_bolding: List[str] = []
|
83 |
+
self.columns_to_hide: List[str] = ["model_url", "source_type"]
|
84 |
+
self.parent_child_task_map: Dict[str, List[str]] = {}
|
85 |
+
|
86 |
+
self._load_global_settings()
|
87 |
+
self._load_model_display_configs()
|
88 |
+
|
89 |
+
def _load_global_settings(self) -> None:
|
90 |
+
# ... (بدون تغییر نسبت به نسخه قبلی شما) ...
|
91 |
+
if self.config_path and self.config_path.exists():
|
92 |
+
try:
|
93 |
+
with open(self.config_path, 'r', encoding='utf-8') as f:
|
94 |
+
config = yaml.safe_load(f)
|
95 |
+
settings = config.get('global_settings', {})
|
96 |
+
self.model_identifier_column = settings.get('model_identifier_column', self.model_identifier_column)
|
97 |
+
self.main_scores_map = settings.get('main_scores_map', self.main_scores_map)
|
98 |
+
self.allowed_null_columns_in_average = settings.get('allowed_null_columns_in_average', self.allowed_null_columns_in_average)
|
99 |
+
self.tab_processing_order = settings.get('tab_processing_order', [])
|
100 |
+
self.columns_to_hide = settings.get('columns_to_hide', self.columns_to_hide)
|
101 |
+
self.parent_child_task_map = settings.get('parent_child_task_map', {})
|
102 |
+
|
103 |
+
default_numeric_bold_cols = list(self.main_scores_map.values()) if self.main_scores_map else []
|
104 |
+
self.numeric_score_columns_for_bolding = settings.get('numeric_score_columns_for_bolding', default_numeric_bold_cols)
|
105 |
+
if not self.numeric_score_columns_for_bolding and default_numeric_bold_cols:
|
106 |
+
self.numeric_score_columns_for_bolding = default_numeric_bold_cols
|
107 |
+
if 'all' in self.main_scores_map and self.main_scores_map.get('all') and \
|
108 |
+
self.main_scores_map['all'] not in self.numeric_score_columns_for_bolding:
|
109 |
+
self.numeric_score_columns_for_bolding.append(self.main_scores_map['all'])
|
110 |
+
self.numeric_score_columns_for_bolding = list(set(self.numeric_score_columns_for_bolding))
|
111 |
+
except Exception as e:
|
112 |
+
logger.error(f"Error loading global settings from {self.config_path}: {e}. Using defaults.")
|
113 |
+
else:
|
114 |
+
logger.error(f"Main configuration file '{getattr(self.config_path, 'name', 'config_path')}' not found. Critical settings will use defaults.")
|
115 |
+
|
116 |
+
|
117 |
+
def _load_model_display_configs(self) -> None:
|
118 |
+
|
119 |
+
if self.config_path and self.config_path.exists():
|
120 |
+
try:
|
121 |
+
with open(self.config_path, 'r', encoding='utf-8') as f:
|
122 |
+
config = yaml.safe_load(f)
|
123 |
+
self.model_display_configs = config.get('model_display_configs', {})
|
124 |
+
except Exception as e:
|
125 |
+
logger.error(f"Error loading model display configs: {e}")
|
126 |
+
else:
|
127 |
+
logger.warning(f"Model display config section not found in {self.config_path}.")
|
128 |
+
|
129 |
+
def load_data(self) -> None:
|
130 |
+
|
131 |
+
logger.info(f"Loading all data from: {DATA_DIR}")
|
132 |
+
if not DATA_DIR.exists() or not DATA_DIR.is_dir():
|
133 |
+
logger.error(f"Data directory {DATA_DIR} not found. Cannot load data.")
|
134 |
+
return
|
135 |
+
|
136 |
+
all_jsonl_files = list(DATA_DIR.glob("*.jsonl"))
|
137 |
+
if not all_jsonl_files:
|
138 |
+
logger.warning(f"No .jsonl files found in {DATA_DIR}. No data will be loaded.")
|
139 |
+
return
|
140 |
+
|
141 |
+
for file_path in all_jsonl_files:
|
142 |
+
task_key = file_path.stem
|
143 |
+
try:
|
144 |
+
self.raw_dataframes[task_key] = pd.read_json(file_path, lines=True)
|
145 |
+
logger.info(f"Successfully loaded '{file_path.name}' for task key '{task_key}'.")
|
146 |
+
except Exception as e:
|
147 |
+
self.raw_dataframes[task_key] = pd.DataFrame()
|
148 |
+
logger.error(f"Error loading '{file_path.name}' for task '{task_key}': {e}")
|
149 |
+
|
150 |
+
configured_task_keys = set(self.tab_processing_order)
|
151 |
+
for parent, children in self.parent_child_task_map.items():
|
152 |
+
configured_task_keys.add(parent)
|
153 |
+
if children:
|
154 |
+
configured_task_keys.update(children)
|
155 |
+
|
156 |
+
for key in configured_task_keys:
|
157 |
+
if key not in self.raw_dataframes:
|
158 |
+
self.raw_dataframes[key] = pd.DataFrame()
|
159 |
+
logger.warning(f"No data file found for configured task key '{key}'. Initialized as empty.")
|
160 |
+
|
161 |
+
def _get_benchmark_columns(self, df: pd.DataFrame) -> List[str]:
|
162 |
+
|
163 |
+
if df.empty: return []
|
164 |
+
excluded_cols = self.allowed_null_columns_in_average + ["Rank", "model_url", "Average"]
|
165 |
+
return [col for col in df.columns if col not in excluded_cols and pd.api.types.is_numeric_dtype(df[col])]
|
166 |
+
|
167 |
+
def handle_nulls_in_averages(self) -> None:
|
168 |
+
|
169 |
+
logger.info("Skipping handle_nulls_in_averages as refresh.py is expected to handle it.")
|
170 |
+
pass
|
171 |
+
|
172 |
+
def _calculate_non_null_benchmark_score_count(self, df_row: pd.Series, benchmark_cols: List[str]) -> int:
|
173 |
+
|
174 |
+
return df_row[benchmark_cols].notna().sum()
|
175 |
+
|
176 |
+
def generate_model_rankings(self) -> None:
|
177 |
+
# ... (بدون تغییر - منطق فعلی به درستی مدلهای بدون میانگین را به پایین میبرد) ...
|
178 |
+
logger.info("Generating model rankings for each tab.")
|
179 |
+
if not self.model_identifier_column:
|
180 |
+
logger.error("`model_identifier_column` is not set. Cannot perform ranking.")
|
181 |
+
return
|
182 |
+
for task_key, df in self.raw_dataframes.items():
|
183 |
+
if df.empty: continue
|
184 |
+
ranked_df = df.copy()
|
185 |
+
main_score_col_for_tab = self.main_scores_map.get(task_key)
|
186 |
+
if not main_score_col_for_tab or main_score_col_for_tab not in ranked_df.columns:
|
187 |
+
logger.warning(f"No main score column for task '{task_key}'. Ranking skipped.")
|
188 |
+
ranked_df["Rank"] = pd.NA # Use pd.NA for missing ranks
|
189 |
+
self.raw_dataframes[task_key] = ranked_df
|
190 |
+
continue
|
191 |
+
ranked_df[main_score_col_for_tab] = pd.to_numeric(ranked_df[main_score_col_for_tab], errors='coerce')
|
192 |
+
ranked_df['_has_main_score'] = ranked_df[main_score_col_for_tab].notna()
|
193 |
+
ranked_df['_sortable_main_score'] = ranked_df[main_score_col_for_tab].fillna(-np.inf)
|
194 |
+
sort_by_cols = ['_has_main_score', '_sortable_main_score', self.model_identifier_column]
|
195 |
+
ascending_order = [False, False, True]
|
196 |
+
ranked_df = ranked_df.sort_values(by=sort_by_cols, ascending=ascending_order, na_position='last')
|
197 |
+
# Assign ranks only to rows that have a main score; others get NA
|
198 |
+
ranked_df["Rank"] = pd.NA
|
199 |
+
ranked_df.loc[ranked_df['_has_main_score'], "Rank"] = range(1, ranked_df['_has_main_score'].sum() + 1)
|
200 |
+
|
201 |
+
ranked_df.drop(columns=['_has_main_score', '_sortable_main_score'], inplace=True)
|
202 |
+
self.raw_dataframes[task_key] = ranked_df
|
203 |
+
logger.info(f"Generated rankings for {task_key}.")
|
204 |
+
|
205 |
+
|
206 |
+
@staticmethod
|
207 |
+
def _format_value_as_percentage(value: Any, score_cutoff_for_percentage: float = 0.0) -> Any:
|
208 |
+
# ... (بدون تغییر - این متد ممکن است جای دیگری استفاده شود) ...
|
209 |
+
if pd.isna(value) or not isinstance(value, (int, float)): return value
|
210 |
+
if value >= score_cutoff_for_percentage and 0 <= value <= 1.0: return f"{value * 100:.2f}%"
|
211 |
+
return f"{value:.2f}" if isinstance(value, float) else value
|
212 |
+
|
213 |
+
@staticmethod
|
214 |
+
def _format_parameters_count(value: Any) -> str:
|
215 |
+
|
216 |
+
if pd.isna(value) or str(value).lower() in ["n/a", "unknown", "", "none"]: return "Unknown"
|
217 |
+
try:
|
218 |
+
num_value = float(value)
|
219 |
+
if num_value == 0: return "N/A"
|
220 |
+
if num_value >= 1_000_000_000: return f"{num_value / 1_000_000_000:.1f}B"
|
221 |
+
if num_value >= 1_000_000: return f"{num_value / 1_000_000:.1f}M"
|
222 |
+
if num_value >= 1_000: return f"{num_value / 1_000:.1f}K"
|
223 |
+
return str(int(num_value))
|
224 |
+
except ValueError: return str(value)
|
225 |
+
|
226 |
+
def _apply_general_formatting_to_cells(self, df_to_format: pd.DataFrame, task_key: str) -> pd.DataFrame:
|
227 |
+
if df_to_format.empty:
|
228 |
+
return df_to_format
|
229 |
+
|
230 |
+
formatted_df = df_to_format.copy()
|
231 |
+
is_mt_bench_tab = (str(task_key).lower() == "mt_bench")
|
232 |
+
|
233 |
+
for col_name in formatted_df.columns:
|
234 |
+
if col_name == "parameters_count":
|
235 |
+
formatted_df[col_name] = formatted_df[col_name].apply(self._format_parameters_count)
|
236 |
+
continue
|
237 |
+
|
238 |
+
if col_name == "Rank": # Rank should typically be integer or NA, no special formatting here
|
239 |
+
# Convert Rank to integer if possible, otherwise keep as is (e.g. for NA)
|
240 |
+
try:
|
241 |
+
# Attempt to convert to Int64 to handle pd.NA
|
242 |
+
formatted_df[col_name] = formatted_df[col_name].astype(pd.Int64Dtype())
|
243 |
+
except Exception:
|
244 |
+
pass # If conversion fails, leave as is
|
245 |
+
continue
|
246 |
+
|
247 |
+
|
248 |
+
new_col_values = []
|
249 |
+
for x_cell_value in formatted_df[col_name]:
|
250 |
+
original_value_for_cell = x_cell_value
|
251 |
+
|
252 |
+
numeric_x = x_cell_value
|
253 |
+
is_cell_numeric_type = isinstance(x_cell_value, (int, float, np.number))
|
254 |
+
|
255 |
+
if not is_cell_numeric_type:
|
256 |
+
try:
|
257 |
+
numeric_x = pd.to_numeric(x_cell_value)
|
258 |
+
is_cell_numeric_type = True
|
259 |
+
except ValueError:
|
260 |
+
is_cell_numeric_type = False
|
261 |
+
|
262 |
+
if pd.isna(numeric_x):
|
263 |
+
new_col_values.append("") # Display NaNs as empty strings
|
264 |
+
continue
|
265 |
+
|
266 |
+
formatted_cell_value = original_value_for_cell
|
267 |
+
|
268 |
+
if is_cell_numeric_type:
|
269 |
+
if is_mt_bench_tab: # Special handling for mt_bench tab
|
270 |
+
if isinstance(numeric_x, float):
|
271 |
+
formatted_cell_value = f"{numeric_x:.2f}"
|
272 |
+
else:
|
273 |
+
formatted_cell_value = numeric_x
|
274 |
+
else: # For all other tabs
|
275 |
+
if isinstance(numeric_x, (int, float)) and 0 <= numeric_x <= 1.0:
|
276 |
+
val_multiplied = numeric_x * 100
|
277 |
+
# If original was 0 or 1 (resulting in 0 or 100), format as integer
|
278 |
+
if numeric_x == 1.0 or numeric_x == 0.0:
|
279 |
+
formatted_cell_value = f"{val_multiplied:.0f}" # "100" or "0"
|
280 |
+
else:
|
281 |
+
# Otherwise, format to 2 decimal places (e.g., 88.00, 75.50)
|
282 |
+
formatted_cell_value = f"{val_multiplied:.2f}"
|
283 |
+
elif isinstance(numeric_x, float):
|
284 |
+
formatted_cell_value = f"{numeric_x:.2f}"
|
285 |
+
else: # Integers outside 0-1 range, etc.
|
286 |
+
formatted_cell_value = numeric_x
|
287 |
+
|
288 |
+
new_col_values.append(formatted_cell_value)
|
289 |
+
formatted_df[col_name] = new_col_values
|
290 |
+
return formatted_df
|
291 |
+
|
292 |
+
def _apply_markdown_and_bolding(self, df_with_general_formats: pd.DataFrame) -> pd.DataFrame:
|
293 |
+
# ... (بدون تغییر نسبت به نسخه قبلی شما) ...
|
294 |
+
if df_with_general_formats.empty: return df_with_general_formats
|
295 |
+
formatted_df = df_with_general_formats.copy()
|
296 |
+
|
297 |
+
model_id_col_original = self.model_identifier_column
|
298 |
+
|
299 |
+
if model_id_col_original in formatted_df.columns and 'model_url' in formatted_df.columns:
|
300 |
+
def create_markdown_link(row):
|
301 |
+
model_id_val = row[model_id_col_original]
|
302 |
+
url = row['model_url']
|
303 |
+
|
304 |
+
display_conf = self.model_display_configs.get(str(model_id_val), {})
|
305 |
+
display_name = display_conf.get('display_name', str(model_id_val))
|
306 |
+
url_for_link = display_conf.get('url', url if pd.notna(url) else 'https://google.com')
|
307 |
+
if not url_for_link or pd.isna(url_for_link): url_for_link = 'https://google.com'
|
308 |
+
return f"[{display_name}]({url_for_link})"
|
309 |
+
formatted_df[model_id_col_original] = formatted_df.apply(create_markdown_link, axis=1)
|
310 |
+
|
311 |
+
for col_name_original in self.numeric_score_columns_for_bolding:
|
312 |
+
if col_name_original in formatted_df.columns:
|
313 |
+
def to_numeric_for_max(val):
|
314 |
+
if isinstance(val, str):
|
315 |
+
# Percentage sign is no longer added, so no need to check for it here
|
316 |
+
# if val.endswith('%'):
|
317 |
+
# try: return float(val[:-1])
|
318 |
+
# except ValueError: return -np.inf
|
319 |
+
try: return float(val) # Handles "88.00", "75.50", "100", "0"
|
320 |
+
except ValueError: return -np.inf
|
321 |
+
return val if pd.notna(val) else -np.inf
|
322 |
+
|
323 |
+
numeric_series_for_max = formatted_df[col_name_original].apply(to_numeric_for_max)
|
324 |
+
|
325 |
+
if not numeric_series_for_max.empty and numeric_series_for_max.notna().any() and \
|
326 |
+
pd.api.types.is_numeric_dtype(numeric_series_for_max) and not numeric_series_for_max.eq(-np.inf).all():
|
327 |
+
max_val_numeric = numeric_series_for_max.max(skipna=True) # Ensure skipna=True for max
|
328 |
+
if pd.notna(max_val_numeric) and max_val_numeric != -np.inf:
|
329 |
+
# Iterate using index to ensure correct .loc access
|
330 |
+
for i in numeric_series_for_max.index:
|
331 |
+
current_numeric_val = numeric_series_for_max.loc[i]
|
332 |
+
if pd.notna(current_numeric_val) and current_numeric_val == max_val_numeric:
|
333 |
+
display_val_to_bold = formatted_df.loc[i, col_name_original]
|
334 |
+
if not (isinstance(display_val_to_bold, str) and display_val_to_bold.startswith("**") and display_val_to_bold.endswith("**")):
|
335 |
+
formatted_df.loc[i, col_name_original] = f"**{display_val_to_bold}**"
|
336 |
+
elif pd.isna(current_numeric_val) or current_numeric_val == -np.inf:
|
337 |
+
cell_content = formatted_df.loc[i, col_name_original]
|
338 |
+
if cell_content is None or \
|
339 |
+
(isinstance(cell_content, str) and \
|
340 |
+
cell_content.strip().lower() in ["n/a", "", "unknown", "nan"]): # Standardize NA display
|
341 |
+
formatted_df.loc[i, col_name_original] = ""
|
342 |
+
return formatted_df
|
343 |
+
# ... (بقیه متدهای LeaderboardApp بدون تغییر باقی میمانند، از جمله _get_gr_datatypes, get_prepared_dataframe, make_update_fn_for_task_closure, _create_and_bind_dataframe_component, create_gradio_interface, run_standalone) ...
|
344 |
+
|
345 |
+
@staticmethod
|
346 |
+
def _get_gr_datatypes(df_with_original_cols: pd.DataFrame, model_id_col_original_name: str, score_cols_original_names: List[str]) -> List[str]:
|
347 |
+
datatypes = []
|
348 |
+
if df_with_original_cols.empty: return []
|
349 |
+
|
350 |
+
markdown_cols_original_names = {model_id_col_original_name}
|
351 |
+
markdown_cols_original_names.add("parameters_count")
|
352 |
+
markdown_cols_original_names.update(score_cols_original_names)
|
353 |
+
|
354 |
+
for col_name_original in df_with_original_cols.columns:
|
355 |
+
if col_name_original == "Rank":
|
356 |
+
datatypes.append("number") # Rank can be number or string if NA
|
357 |
+
elif col_name_original in markdown_cols_original_names:
|
358 |
+
datatypes.append("markdown")
|
359 |
+
else:
|
360 |
+
# Most other formatted cells become strings
|
361 |
+
# Checking the dtype of the formatted column can be more robust
|
362 |
+
# For now, default to str for non-markdown, non-rank
|
363 |
+
datatypes.append("str")
|
364 |
+
return datatypes
|
365 |
+
|
366 |
+
def get_prepared_dataframe(self, task_key: str, source_filter: str = "All", name_filter_query: str = "") -> pd.DataFrame:
|
367 |
+
original_df_for_task = self.raw_dataframes.get(task_key)
|
368 |
+
if original_df_for_task is None or original_df_for_task.empty:
|
369 |
+
return pd.DataFrame()
|
370 |
+
|
371 |
+
processed_df = original_df_for_task.copy()
|
372 |
+
|
373 |
+
parent_nlu_nlg_task_keys = ["persian_nlg", "persian_nlu"]
|
374 |
+
if task_key in parent_nlu_nlg_task_keys:
|
375 |
+
cols_to_drop_due_to_object = []
|
376 |
+
for col_name in processed_df.columns:
|
377 |
+
if processed_df[col_name].apply(lambda x: isinstance(x, dict)).any():
|
378 |
+
cols_to_drop_due_to_object.append(col_name)
|
379 |
+
if cols_to_drop_due_to_object:
|
380 |
+
logger.info(f"For overview task '{task_key}', dropping object columns: {cols_to_drop_due_to_object}")
|
381 |
+
processed_df = processed_df.drop(columns=cols_to_drop_due_to_object, errors='ignore')
|
382 |
+
|
383 |
+
if 'source_type' in processed_df.columns and source_filter != "All":
|
384 |
+
processed_df = processed_df[processed_df['source_type'] == source_filter]
|
385 |
+
if processed_df.empty: return pd.DataFrame()
|
386 |
+
|
387 |
+
if name_filter_query and self.model_identifier_column in processed_df.columns:
|
388 |
+
try:
|
389 |
+
processed_df = processed_df[processed_df[self.model_identifier_column].astype(str).str.contains(name_filter_query, case=False, na=False)]
|
390 |
+
except Exception as e: logger.error(f"Name filter error: {e}")
|
391 |
+
if processed_df.empty: return pd.DataFrame()
|
392 |
+
|
393 |
+
if processed_df.empty: return pd.DataFrame()
|
394 |
+
|
395 |
+
# Apply cell formatting (this now includes the new number formatting rules)
|
396 |
+
processed_df = self._apply_general_formatting_to_cells(processed_df, task_key)
|
397 |
+
# Apply markdown and bolding
|
398 |
+
processed_df = self._apply_markdown_and_bolding(processed_df)
|
399 |
+
|
400 |
+
if self.columns_to_hide:
|
401 |
+
columns_to_drop_existing = [col for col in self.columns_to_hide if col in processed_df.columns]
|
402 |
+
if columns_to_drop_existing:
|
403 |
+
processed_df = processed_df.drop(columns=columns_to_drop_existing, errors='ignore')
|
404 |
+
|
405 |
+
if "Rank" in processed_df.columns:
|
406 |
+
# Ensure Rank is first, if it exists
|
407 |
+
cols_order = ["Rank"] + [col for col in processed_df.columns if col != "Rank"]
|
408 |
+
processed_df = processed_df[cols_order]
|
409 |
+
|
410 |
+
# Convert Rank to string for display after all operations, to handle NA consistently with other strings
|
411 |
+
if "Rank" in processed_df.columns:
|
412 |
+
processed_df["Rank"] = processed_df["Rank"].apply(lambda x: str(int(x)) if pd.notna(x) and isinstance(x, (float,int)) and x == int(x) else (str(x) if pd.notna(x) else ""))
|
413 |
+
|
414 |
+
|
415 |
+
processed_df = processed_df.fillna("") # Final fillna for display
|
416 |
+
return processed_df
|
417 |
+
|
418 |
+
def make_update_fn_for_task_closure(self, task_key_for_df_data: str):
|
419 |
+
|
420 |
+
def update_table_data(name_query_str, source_filter_str):
|
421 |
+
logger.debug(f"Updating table for task_key '{task_key_for_df_data}' with name: '{name_query_str}', source: '{source_filter_str}'")
|
422 |
+
|
423 |
+
df_original_cols_formatted_values = self.get_prepared_dataframe(
|
424 |
+
task_key_for_df_data, source_filter_str, name_query_str
|
425 |
+
)
|
426 |
+
|
427 |
+
if df_original_cols_formatted_values.empty:
|
428 |
+
base_raw_df = self.raw_dataframes.get(task_key_for_df_data, pd.DataFrame())
|
429 |
+
base_raw_df_cols = list(base_raw_df.columns) if not base_raw_df.empty else []
|
430 |
+
|
431 |
+
if base_raw_df_cols:
|
432 |
+
temp_empty_df_orig_cols = pd.DataFrame(columns=base_raw_df_cols)
|
433 |
+
if self.columns_to_hide:
|
434 |
+
cols_to_drop_now = [col for col in self.columns_to_hide if col in temp_empty_df_orig_cols.columns]
|
435 |
+
if cols_to_drop_now:
|
436 |
+
temp_empty_df_orig_cols = temp_empty_df_orig_cols.drop(columns=cols_to_drop_now)
|
437 |
+
|
438 |
+
if self.main_scores_map.get(task_key_for_df_data) and "Rank" not in temp_empty_df_orig_cols.columns:
|
439 |
+
temp_empty_df_orig_cols.insert(0, "Rank", [])
|
440 |
+
|
441 |
+
|
442 |
+
renamed_empty_df = self.column_config.rename_dataframe_columns(temp_empty_df_orig_cols)
|
443 |
+
display_headers = list(renamed_empty_df.columns)
|
444 |
+
gr_datatypes = ["str"] * len(display_headers) if display_headers else ["str"]
|
445 |
+
return gr.DataFrame(value=pd.DataFrame(columns=display_headers), headers=display_headers if display_headers else ["Info"], datatype=gr_datatypes)
|
446 |
+
else:
|
447 |
+
info_message = f"No data available for {self.column_config.get_task_tab_name(task_key_for_df_data)} with current filters."
|
448 |
+
return gr.DataFrame(value=pd.DataFrame([{"Info": info_message}]), headers=["Info"], datatype=["str"])
|
449 |
+
|
450 |
+
gr_datatypes = self._get_gr_datatypes(
|
451 |
+
df_original_cols_formatted_values,
|
452 |
+
self.model_identifier_column,
|
453 |
+
self.numeric_score_columns_for_bolding
|
454 |
+
)
|
455 |
+
|
456 |
+
df_display_cols_formatted_values = self.column_config.rename_dataframe_columns(df_original_cols_formatted_values)
|
457 |
+
display_headers = list(df_display_cols_formatted_values.columns)
|
458 |
+
|
459 |
+
return gr.DataFrame(value=df_display_cols_formatted_values, headers=display_headers, datatype=gr_datatypes)
|
460 |
+
return update_table_data
|
461 |
+
|
462 |
+
|
463 |
+
def _create_and_bind_dataframe_component(self, current_task_key: str, name_search_textbox: gr.Textbox, source_filter_radio: gr.Radio):
|
464 |
+
|
465 |
+
initial_df_original_cols = self.get_prepared_dataframe(current_task_key, "All", "")
|
466 |
+
|
467 |
+
current_display_headers = []
|
468 |
+
current_datatypes = None
|
469 |
+
df_value_for_gr_display_cols = pd.DataFrame()
|
470 |
+
|
471 |
+
if initial_df_original_cols.empty:
|
472 |
+
base_df = self.raw_dataframes.get(current_task_key, pd.DataFrame())
|
473 |
+
base_df_cols_original = list(base_df.columns) if not base_df.empty else []
|
474 |
+
|
475 |
+
if base_df_cols_original:
|
476 |
+
temp_empty_df_orig_cols = pd.DataFrame(columns=base_df_cols_original)
|
477 |
+
if self.columns_to_hide:
|
478 |
+
cols_to_drop_now = [col for col in self.columns_to_hide if col in temp_empty_df_orig_cols.columns]
|
479 |
+
if cols_to_drop_now:
|
480 |
+
temp_empty_df_orig_cols = temp_empty_df_orig_cols.drop(columns=cols_to_drop_now)
|
481 |
+
|
482 |
+
if self.main_scores_map.get(current_task_key) and "Rank" not in temp_empty_df_orig_cols.columns:
|
483 |
+
temp_empty_df_orig_cols.insert(0, "Rank", [])
|
484 |
+
|
485 |
+
initial_df_display_cols = self.column_config.rename_dataframe_columns(temp_empty_df_orig_cols)
|
486 |
+
current_display_headers = list(initial_df_display_cols.columns)
|
487 |
+
current_datatypes = ["str"] * len(current_display_headers) if current_display_headers else ["str"]
|
488 |
+
df_value_for_gr_display_cols = pd.DataFrame(columns=current_display_headers)
|
489 |
+
else:
|
490 |
+
current_display_headers = ["Info"]
|
491 |
+
current_datatypes = ["str"]
|
492 |
+
df_value_for_gr_display_cols = pd.DataFrame([{"Info":f"No data or columns configured for {self.column_config.get_task_tab_name(current_task_key)}."}])
|
493 |
+
else:
|
494 |
+
current_datatypes = self._get_gr_datatypes(
|
495 |
+
initial_df_original_cols,
|
496 |
+
self.model_identifier_column,
|
497 |
+
self.numeric_score_columns_for_bolding
|
498 |
+
)
|
499 |
+
initial_df_display_cols = self.column_config.rename_dataframe_columns(initial_df_original_cols)
|
500 |
+
current_display_headers = list(initial_df_display_cols.columns)
|
501 |
+
df_value_for_gr_display_cols = initial_df_display_cols
|
502 |
+
|
503 |
+
df_component = gr.DataFrame(
|
504 |
+
value=df_value_for_gr_display_cols,
|
505 |
+
headers=current_display_headers,
|
506 |
+
datatype=current_datatypes,
|
507 |
+
interactive=False,
|
508 |
+
wrap=True,
|
509 |
+
# height=700,
|
510 |
+
# elem_id=f"dataframe_{current_task_key}"
|
511 |
+
)
|
512 |
+
|
513 |
+
update_fn = self.make_update_fn_for_task_closure(current_task_key)
|
514 |
+
filter_inputs = [name_search_textbox, source_filter_radio]
|
515 |
+
|
516 |
+
name_search_textbox.submit(fn=update_fn, inputs=filter_inputs, outputs=[df_component])
|
517 |
+
source_filter_radio.change(fn=update_fn, inputs=filter_inputs, outputs=[df_component])
|
518 |
+
|
519 |
+
return df_component
|
520 |
+
|
521 |
+
def create_gradio_interface(self) -> gr.Blocks:
|
522 |
+
|
523 |
+
logger.info("Creating Gradio interface with potentially nested tabs.")
|
524 |
+
with gr.Blocks(theme=gr.themes.Soft(), elem_id="leaderboard_main_container") as leaderboard_ui_blocks:
|
525 |
+
if not self.tab_processing_order and not self.parent_child_task_map:
|
526 |
+
gr.Markdown("### Leaderboard Not Configured\n- `tab_processing_order` and `parent_child_task_map` are not defined or empty in `leaderboard_config.yaml`.")
|
527 |
+
return leaderboard_ui_blocks
|
528 |
+
if not self.raw_dataframes or all(df.empty for df in self.raw_dataframes.values()):
|
529 |
+
gr.Markdown("### No Data Loaded\n- No data loaded from `boards_data/`. Ensure `refresh.py` ran and JSONL files exist.")
|
530 |
+
return leaderboard_ui_blocks
|
531 |
+
|
532 |
+
with gr.Row():
|
533 |
+
name_search_textbox = gr.Textbox(label="Search by Model Name", placeholder="Type model name and press Enter...", interactive=True, scale=3)
|
534 |
+
source_filter_radio = gr.Radio(choices=["All", "Open-Source", "Closed-Source"], value="All", label="Filter by Model Source", interactive=True, scale=1)
|
535 |
+
|
536 |
+
with gr.Tabs(elem_id="main_benchmark_tabs") as main_tabs:
|
537 |
+
processed_top_level_keys = set()
|
538 |
+
|
539 |
+
for main_task_key in self.tab_processing_order:
|
540 |
+
if main_task_key in processed_top_level_keys: continue
|
541 |
+
processed_top_level_keys.add(main_task_key)
|
542 |
+
|
543 |
+
main_tab_display_label = self.column_config.get_task_tab_name(main_task_key)
|
544 |
+
|
545 |
+
with gr.TabItem(label=main_tab_display_label, id=f"main_tab_{main_task_key}"):
|
546 |
+
gr.Markdown(f"## {main_tab_display_label}")
|
547 |
+
|
548 |
+
child_task_keys_for_parent = self.parent_child_task_map.get(main_task_key, [])
|
549 |
+
|
550 |
+
if child_task_keys_for_parent:
|
551 |
+
with gr.Tabs(elem_id=f"sub_tabs_for_{main_task_key}") as sub_tabs_component:
|
552 |
+
for child_key in child_task_keys_for_parent:
|
553 |
+
if child_key not in self.raw_dataframes or self.raw_dataframes[child_key].empty: # Check if df is empty
|
554 |
+
logger.warning(f"Data for sub-task '{child_key}' under parent '{main_task_key}' not loaded or is empty. Skipping sub-tab.")
|
555 |
+
child_tab_display_label_empty = self.column_config.get_task_tab_name(child_key)
|
556 |
+
with gr.TabItem(label=child_tab_display_label_empty, id=f"sub_tab_{child_key}_empty"):
|
557 |
+
gr.Markdown(f"Data for {child_tab_display_label_empty} is not available.")
|
558 |
+
continue
|
559 |
+
processed_top_level_keys.add(child_key)
|
560 |
+
child_tab_display_label = self.column_config.get_task_tab_name(child_key)
|
561 |
+
with gr.TabItem(label=child_tab_display_label, id=f"sub_tab_{child_key}"):
|
562 |
+
self._create_and_bind_dataframe_component(child_key, name_search_textbox, source_filter_radio)
|
563 |
+
else: # This main_task_key is a STANDALONE tab
|
564 |
+
if main_task_key not in self.raw_dataframes or self.raw_dataframes[main_task_key].empty: # Check if df is empty
|
565 |
+
logger.warning(f"Data for standalone task '{main_task_key}' not loaded or is empty. Skipping tab content.")
|
566 |
+
gr.Markdown(f"Data for {main_tab_display_label} is not available.")
|
567 |
+
continue
|
568 |
+
self._create_and_bind_dataframe_component(main_task_key, name_search_textbox, source_filter_radio)
|
569 |
+
return leaderboard_ui_blocks
|
570 |
+
|
571 |
+
def run_standalone(self) -> None:
|
572 |
+
|
573 |
+
logger.info("Running LeaderboardApp in standalone mode.")
|
574 |
+
try:
|
575 |
+
self.load_data()
|
576 |
+
if not self.raw_dataframes or all(df.empty for df in self.raw_dataframes.values()):
|
577 |
+
logger.warning("No data loaded. Leaderboard might be empty or show 'No data' messages.")
|
578 |
+
self.generate_model_rankings()
|
579 |
+
demo_interface = self.create_gradio_interface()
|
580 |
+
demo_interface.launch(server_name="0.0.0.0", server_port=7860, debug=True)
|
581 |
+
except Exception as e:
|
582 |
+
logger.error(f"Error during standalone run: {e}", exc_info=True)
|
583 |
+
try:
|
584 |
+
with gr.Blocks() as error_demo: gr.Error(f"Failed to launch LeaderboardApp: {e}")
|
585 |
+
error_demo.launch(server_name="0.0.0.0", server_port=7860)
|
586 |
+
except Exception as launch_err:
|
587 |
+
logger.error(f"CRITICAL: Failed even to launch the error Gradio page: {launch_err}")
|
588 |
+
|
589 |
+
|
590 |
+
def main():
|
591 |
+
|
592 |
+
logger.info(f"Initializing LeaderboardApp with config: {CONFIG_FILE_PATH}")
|
593 |
+
if not CONFIG_FILE_PATH.exists():
|
594 |
+
logger.critical(f"CRITICAL: Config file '{CONFIG_FILE_PATH.name}' not found at {CONFIG_FILE_PATH}. App cannot start.")
|
595 |
+
try:
|
596 |
+
with gr.Blocks() as error_demo: gr.Error(f"Config File Not Found: {CONFIG_FILE_PATH}")
|
597 |
+
error_demo.launch(server_name="0.0.0.0", server_port=7860)
|
598 |
+
except Exception as launch_err:
|
599 |
+
logger.error(f"CRITICAL: Failed to launch the error Gradio page for missing config: {launch_err}")
|
600 |
+
return
|
601 |
+
app = LeaderboardApp(config_path=CONFIG_FILE_PATH)
|
602 |
+
app.run_standalone()
|
603 |
+
|
604 |
+
if __name__ == '__main__':
|
605 |
+
main()
|
leaderboard/leaderboard_config.yaml
ADDED
@@ -0,0 +1,310 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# leaderboard/leaderboard_config.yaml
|
2 |
+
|
3 |
+
# --- Column Display Names ---
|
4 |
+
# Used to rename columns from your .jsonl files for display in the UI tables.
|
5 |
+
# Format: "Original_Column_Name_In_JSONL": "Desired Display Name in UI"
|
6 |
+
column_names:
|
7 |
+
# Columns added by the updated refresh.py
|
8 |
+
"Model Name": "Model" # This 'Model Name' is the canonical ID from refresh.py. Its display will be further customized by 'model_display_configs'.
|
9 |
+
"model_url": "Link"
|
10 |
+
"parameters_count": "Parameters"
|
11 |
+
"source_type": "Source Type"
|
12 |
+
"Average": "Average"
|
13 |
+
"score_mean": "score_mean (main)"
|
14 |
+
"strict_instruction_accuracy": "strict_instruction_accuracy (main)"
|
15 |
+
"acc": "accuracy (main)"
|
16 |
+
"nlg_score": "nlg_score (main)"
|
17 |
+
"nlu_score": "nlu_score (main)"
|
18 |
+
|
19 |
+
# Common score columns (these are examples, use your actual metric names from .jsonl)
|
20 |
+
# "Average": "Overall AVG" # For the 'all' table summary
|
21 |
+
# "strict_instruction_accuracy": "IFEval Acc."
|
22 |
+
# "score_mean": "MT-Bench Score"
|
23 |
+
# "acc": "Accuracy" # Generic accuracy (e.g., for MMLU, persian_csr)
|
24 |
+
# "nlg_score": "NLG Score"
|
25 |
+
# "nlu_score": "NLU Score"
|
26 |
+
# Add other specific metric columns from your .jsonl files that you want to display or rename
|
27 |
+
# e.g., "exact_match": "Exact Match", "f1": "F1 Score"
|
28 |
+
|
29 |
+
# --- Task (Tab) Display Names & Identifiers ---
|
30 |
+
# Defines the display names for UI tabs and identifies tasks for processing.
|
31 |
+
# Keys MUST match the base filenames of .jsonl files produced by refresh.py (e.g., "MMLU" for "MMLU.jsonl").
|
32 |
+
# These keys are also used in 'main_scores_map' and 'tab_processing_order'.
|
33 |
+
task_display_names:
|
34 |
+
all: "🏆 Overall Benchmark"
|
35 |
+
mt_bench: "Persian MT-Bench"
|
36 |
+
ifeval: "Persian IFEval"
|
37 |
+
MMLU: "PerMMLU"
|
38 |
+
persian_csr: "PerCoR"
|
39 |
+
persian_nlg: "Persian NLG" # Overview tab
|
40 |
+
persian_nlu: "Persian NLU"
|
41 |
+
question-generation_PersianQA: "PersianQA (QG)"
|
42 |
+
translation-en2fa_en2fa: "Translation (en2fa)"
|
43 |
+
translation-fa2en_fa2en: "Translation (fa2en)"
|
44 |
+
translation-ar2fa_ar2fa: "Translation (ar2fa)"
|
45 |
+
translation-fa2ar_fa2ar: "Translation (fa2ar)"
|
46 |
+
summarization_SamSUM-fa: "SamSum-Fa (Summarizaion)"
|
47 |
+
summarization_PnSummary: "PnSummary (Summarizaion)"
|
48 |
+
sentiment-analysis_deepsentipers: "DeepSentiPers (SA)"
|
49 |
+
sts_SynPerSTS: "SynPerSTS (STS)"
|
50 |
+
ner_arman: "Arman (NER)"
|
51 |
+
keyword-extraction_SynKeywords: "SynKeywords (Keyword Extraction)"
|
52 |
+
tone-classification_SynTone: "SynTone (Tone Classification)"
|
53 |
+
sts_FarSICK: "FarSICK (STS)"
|
54 |
+
paraphrase-detection_FarsiParaphraseDetection: "FarsiParaphraseDetection (Paraphrase Detection)"
|
55 |
+
nli_farstail: "Farstail (NLI)"
|
56 |
+
paraphrase-detection_parsinlu: "ParsiNLU (Paraphrase Detection)" # Assuming this was a typo or duplicate key and intended for update
|
57 |
+
extractive-qa_PQuAD: "PQuAD (Extractive QA)"
|
58 |
+
topic-classification_sid: "SID (Topic Classification)"
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
# --- Model Display Configurations ---
|
63 |
+
# Customize how model names are displayed and potentially override their URLs.
|
64 |
+
# Keys here MUST be the canonical model identifiers found in the "Model Name" column
|
65 |
+
# (or whatever column is specified by 'model_identifier_column' in global_settings)
|
66 |
+
# of the .jsonl files generated by refresh.py.
|
67 |
+
model_display_configs:
|
68 |
+
"claude-3-7-sonnet-20250219":
|
69 |
+
display_name: "Claude 3.7 Sonnet"
|
70 |
+
url: "https://www.anthropic.com/news/claude-3-7-sonnet"
|
71 |
+
|
72 |
+
"gpt-4.1":
|
73 |
+
display_name: "GPT-4.1"
|
74 |
+
url: "https://openai.com/index/gpt-4-1/"
|
75 |
+
|
76 |
+
"gpt-4o":
|
77 |
+
display_name: "GPT-4o"
|
78 |
+
url: "https://openai.com/index/hello-gpt-4o/"
|
79 |
+
|
80 |
+
"gpt-4.1-mini":
|
81 |
+
display_name: "GPT-4.1 Mini"
|
82 |
+
url: "https://openai.com/index/gpt-4-1/"
|
83 |
+
|
84 |
+
"deepseek-chat":
|
85 |
+
display_name: "DeepSeek-V3"
|
86 |
+
url: "https://api-docs.deepseek.com/"
|
87 |
+
|
88 |
+
"gemma-3-27b-it":
|
89 |
+
display_name: "Gemma 3 27B IT"
|
90 |
+
url: "https://huggingface.co/google/gemma-3-27b-it"
|
91 |
+
|
92 |
+
"gpt-4o-mini":
|
93 |
+
display_name: "GPT-4o Mini"
|
94 |
+
url: "https://openai.com/index/hello-gpt-4o/"
|
95 |
+
|
96 |
+
"Qwen3-32B":
|
97 |
+
display_name: "Qwen3-32B"
|
98 |
+
url: "https://huggingface.co/Qwen/Qwen3-32B"
|
99 |
+
|
100 |
+
"Llama-3.3-70B-Instruct":
|
101 |
+
display_name: "Llama 3.3 70B Instruct"
|
102 |
+
url: "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct"
|
103 |
+
|
104 |
+
"gemma-3-12b-it":
|
105 |
+
display_name: "Gemma 3 12B IT"
|
106 |
+
url: "https://huggingface.co/google/gemma-3-12b-it"
|
107 |
+
|
108 |
+
"Qwen3-14B":
|
109 |
+
display_name: "Qwen3-14B"
|
110 |
+
url: "https://huggingface.co/Qwen/Qwen3-14B"
|
111 |
+
|
112 |
+
"Mistral-Small-3.1-24B-Instruct-2503":
|
113 |
+
display_name: "Mistral Small 3.1 24B Instruct"
|
114 |
+
url: "https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503"
|
115 |
+
|
116 |
+
"claude-3-5-haiku-20241022":
|
117 |
+
display_name: "Claude 3.5 Haiku"
|
118 |
+
url: "https://www.anthropic.com/claude/haiku"
|
119 |
+
|
120 |
+
"gpt-4.1-nano":
|
121 |
+
display_name: "GPT-4.1 Nano"
|
122 |
+
url: "https://openai.com/index/gpt-4-1/"
|
123 |
+
|
124 |
+
"Qwen3-8B":
|
125 |
+
display_name: "Qwen3-8B"
|
126 |
+
url: "https://huggingface.co/Qwen/Qwen3-8B"
|
127 |
+
|
128 |
+
"gemma-3-4b-it":
|
129 |
+
display_name: "Gemma 3 4B IT"
|
130 |
+
url: "https://huggingface.co/google/gemma-3-4b-it"
|
131 |
+
|
132 |
+
"aya-expanse-32b":
|
133 |
+
display_name: "Aya Expanse 32B"
|
134 |
+
url: "https://huggingface.co/CohereLabs/aya-expanse-32b"
|
135 |
+
|
136 |
+
"Qwen3-4B":
|
137 |
+
display_name: "Qwen3-4B"
|
138 |
+
url: "https://huggingface.co/Qwen/Qwen3-4B"
|
139 |
+
|
140 |
+
"gemma-3-1b-it":
|
141 |
+
display_name: "Gemma 3 1B IT"
|
142 |
+
url: "https://huggingface.co/google/gemma-3-1b-it"
|
143 |
+
|
144 |
+
"Mistral-7B-Instruct-v0.3":
|
145 |
+
display_name: "Mistral 7B Instruct v0.3"
|
146 |
+
url: "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3"
|
147 |
+
|
148 |
+
"Llama-3.2-3B-Instruct":
|
149 |
+
display_name: "Llama 3.2 3B Instruct"
|
150 |
+
url: "https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct"
|
151 |
+
|
152 |
+
"Llama-3.2-1B-Instruct":
|
153 |
+
display_name: "Llama 3.2 1B Instruct"
|
154 |
+
url: "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct"
|
155 |
+
|
156 |
+
"o4-mini":
|
157 |
+
display_name: "GPT-o4 Mini"
|
158 |
+
url: "https://openai.com/index/introducing-o3-and-o4-mini/"
|
159 |
+
|
160 |
+
"deepseek-reasoner":
|
161 |
+
display_name: "DeepSeek-R1"
|
162 |
+
url: "https://api-docs.deepseek.com/guides/reasoning_model"
|
163 |
+
|
164 |
+
"gemini-2.0-flash":
|
165 |
+
display_name: "Gemini 2.0 Flash"
|
166 |
+
url: "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-0-flash"
|
167 |
+
|
168 |
+
"gemini-2.5-flash-preview-05-20":
|
169 |
+
display_name: "Gemini 2.5 Flash Preview"
|
170 |
+
url: "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash"
|
171 |
+
|
172 |
+
"Qwen3-30B-A3B":
|
173 |
+
display_name: "Qwen3-30B-A3B"
|
174 |
+
url: "https://huggingface.co/Qwen/Qwen3-30B-A3B"
|
175 |
+
|
176 |
+
"c4ai-command-r-plus":
|
177 |
+
display_name: "Command R Plus"
|
178 |
+
url: "https://huggingface.co/CohereLabs/c4ai-command-r-plus"
|
179 |
+
|
180 |
+
"c4ai-command-r-v01":
|
181 |
+
display_name: "Command R v01"
|
182 |
+
url: "https://huggingface.co/CohereLabs/c4ai-command-r-v01"
|
183 |
+
|
184 |
+
"c4ai-command-a-03-2025":
|
185 |
+
display_name: "Command A"
|
186 |
+
url: "https://huggingface.co/CohereLabs/c4ai-command-a-03-2025"
|
187 |
+
|
188 |
+
|
189 |
+
|
190 |
+
# Add one entry for each model whose display name or URL you want to customize.
|
191 |
+
# If a model ID from your data is not listed here, its raw ID will be used as its name.
|
192 |
+
|
193 |
+
# --- Global Settings ---
|
194 |
+
# Various settings controlling leaderboard behavior and data interpretation.
|
195 |
+
global_settings:
|
196 |
+
# The actual column name in your .jsonl DataFrames that holds the canonical model identifier.
|
197 |
+
# This identifier is used as the key to look up entries in 'model_display_configs'.
|
198 |
+
model_identifier_column: "Model Name"
|
199 |
+
|
200 |
+
# Defines the primary score column used for ranking within each task's table.
|
201 |
+
# Keys MUST match the keys in 'task_display_names' and 'tab_processing_order'.
|
202 |
+
# Values MUST be actual column names present in the corresponding .jsonl data files.
|
203 |
+
main_scores_map:
|
204 |
+
all: "Average"
|
205 |
+
mt_bench: "score_mean"
|
206 |
+
ifeval: "strict_instruction_accuracy"
|
207 |
+
MMLU: "acc"
|
208 |
+
persian_csr: "acc"
|
209 |
+
persian_nlg: "nlg_score"
|
210 |
+
persian_nlu: "nlu_score"
|
211 |
+
question-generation_PersianQA: "nlg_score"
|
212 |
+
translation-en2fa_en2fa: "nlg_score"
|
213 |
+
translation-fa2en_fa2en: "nlg_score"
|
214 |
+
translation-ar2fa_ar2fa: "nlg_score"
|
215 |
+
translation-fa2ar_fa2ar: "nlg_score"
|
216 |
+
summarization_SamSUM-fa: "nlg_score"
|
217 |
+
summarization_PnSummary: "nlg_score"
|
218 |
+
|
219 |
+
sentiment-analysis_deepsentipers: "nlu_score"
|
220 |
+
sts_SynPerSTS: "nlu_score"
|
221 |
+
ner_arman: "nlu_score"
|
222 |
+
keyword-extraction_SynKeywords: "nlu_score"
|
223 |
+
tone-classification_SynTone: "nlu_score"
|
224 |
+
sts_FarSICK: "nlu_score"
|
225 |
+
paraphrase-detection_FarsiParaphraseDetection: "nlu_score"
|
226 |
+
nli_farstail: "nlu_score"
|
227 |
+
paraphrase-detection_parsinlu: "nlu_score"
|
228 |
+
extractive-qa_PQuAD: "nlu_score"
|
229 |
+
topic-classification_sid: "nlu_score"
|
230 |
+
|
231 |
+
|
232 |
+
|
233 |
+
# Original column names in DataFrames that are allowed to have null values if an 'Average'
|
234 |
+
# score is calculated by leaderboard.py (though refresh.py now handles the 'all' table Average).
|
235 |
+
# Primarily non-score informational columns.
|
236 |
+
allowed_null_columns_in_average:
|
237 |
+
- "Model Name"
|
238 |
+
- "model_url"
|
239 |
+
- "parameters_count"
|
240 |
+
- "source_type"
|
241 |
+
|
242 |
+
# A score threshold, its specific use in formatting needs to be defined in leaderboard.py if used.
|
243 |
+
# For example, conditional formatting for scores above this value. (Currently not actively used in provided Python code for formatting).
|
244 |
+
score_cutoff_for_formatting: 0.0
|
245 |
+
|
246 |
+
# Defines the order of tabs in the UI and the order for loading data files by leaderboard.py.
|
247 |
+
# Keys MUST match:
|
248 |
+
# 1. Keys in 'task_display_names'.
|
249 |
+
# 2. Keys in 'main_scores_map'.
|
250 |
+
# 3. The base names of .jsonl files generated by refresh.py (e.g., "MMLU" for "MMLU.jsonl").
|
251 |
+
tab_processing_order:
|
252 |
+
- "all"
|
253 |
+
- "mt_bench"
|
254 |
+
- "ifeval"
|
255 |
+
- "MMLU"
|
256 |
+
- "persian_csr"
|
257 |
+
- "persian_nlg"
|
258 |
+
- "persian_nlu"
|
259 |
+
|
260 |
+
numeric_score_columns_for_bolding: # List of ORIGINAL column names
|
261 |
+
# For the "Overall Benchmark" tab (all.jsonl)
|
262 |
+
- "Average"
|
263 |
+
- "Persian IFEval"
|
264 |
+
- "Persian MT-Bench"
|
265 |
+
- "PerMMLU"
|
266 |
+
- "PerCoR"
|
267 |
+
- "Persian NLU"
|
268 |
+
- "Persian NLG"
|
269 |
+
|
270 |
+
# For individual task tabs (if you want to keep their main scores bolded there)
|
271 |
+
# These are typically the values from your 'main_scores_map'
|
272 |
+
- "score_mean" # For mt_bench tab
|
273 |
+
- "strict_instruction_accuracy" # For ifeval tab
|
274 |
+
- "acc" # For MMLU, persian_csr tabs
|
275 |
+
# "nlg_score" and "nlu_score" are already covered if "Persian NLG"
|
276 |
+
# and "Persian NLU" are the actual column names in those specific tabs too.
|
277 |
+
# If persian_nlg.jsonl uses "nlg_score" as its main column, and
|
278 |
+
# persian_nlu.jsonl uses "nlu_score", then you can add them for those specific tabs:
|
279 |
+
- "nlg_score" # For persian_nlg tab (if it's different from "Persian NLG")
|
280 |
+
- "nlu_score" # For persian_nlu tab (if it's different from "Persian NLU")
|
281 |
+
# Add any other specific metric columns from other .jsonl files
|
282 |
+
# that you want to have their max value bolded in their respective tabs.
|
283 |
+
|
284 |
+
# Add this list:
|
285 |
+
columns_to_hide: # List of ORIGINAL column names you don't want to display
|
286 |
+
- "model_url"
|
287 |
+
- "source_type"
|
288 |
+
# - "another_column_to_hide"
|
289 |
+
|
290 |
+
parent_child_task_map:
|
291 |
+
persian_nlg: # Parent task key
|
292 |
+
- "question-generation_PersianQA"
|
293 |
+
- "translation-en2fa_en2fa"
|
294 |
+
- "translation-fa2en_fa2en"
|
295 |
+
- "translation-ar2fa_ar2fa"
|
296 |
+
- "translation-fa2ar_fa2ar"
|
297 |
+
- "summarization_SamSUM-fa"
|
298 |
+
- "summarization_PnSummary"
|
299 |
+
persian_nlu: # Parent task key
|
300 |
+
- "sentiment-analysis_deepsentipers"
|
301 |
+
- "sts_SynPerSTS"
|
302 |
+
- "ner_arman"
|
303 |
+
- "keyword-extraction_SynKeywords"
|
304 |
+
- "tone-classification_SynTone"
|
305 |
+
- "sts_FarSICK"
|
306 |
+
- "paraphrase-detection_FarsiParaphraseDetection"
|
307 |
+
- "nli_farstail"
|
308 |
+
- "paraphrase-detection_parsinlu" # Assumin g this was a typo or duplicate key
|
309 |
+
- "extractive-qa_PQuAD"
|
310 |
+
- "topic-classification_sid"
|
leaderboard/refresh.py
ADDED
@@ -0,0 +1,441 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# leaderboard/refresh.py
|
2 |
+
|
3 |
+
import json
|
4 |
+
import logging
|
5 |
+
from pathlib import Path
|
6 |
+
from typing import Any, Dict, List, Optional
|
7 |
+
import pandas as pd
|
8 |
+
import yaml
|
9 |
+
|
10 |
+
# --- Logging Setup ---
|
11 |
+
logging.basicConfig(
|
12 |
+
level=logging.INFO,
|
13 |
+
format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
|
14 |
+
)
|
15 |
+
logger = logging.getLogger(__name__)
|
16 |
+
|
17 |
+
# --- Path Definitions ---
|
18 |
+
SCRIPT_DIR = Path(__file__).resolve().parent
|
19 |
+
PROJECT_ROOT = SCRIPT_DIR.parent
|
20 |
+
|
21 |
+
# --- Default Input/Output Paths ---
|
22 |
+
DEFAULT_MODELS_FOLDER = PROJECT_ROOT.parent / "llm-leaderboard/models_info"
|
23 |
+
DEFAULT_RESULTS_FOLDER = PROJECT_ROOT.parent / "llm-leaderboard/results"
|
24 |
+
OUTPUT_FOLDER = SCRIPT_DIR / "boards_data"
|
25 |
+
CONFIG_FILE_PATH = SCRIPT_DIR / "leaderboard_config.yaml"
|
26 |
+
TEMPLATE_FOLDER = SCRIPT_DIR / "template_jsons"
|
27 |
+
|
28 |
+
# --- Constants for Subtask Processing ---
|
29 |
+
NLU_NLG_TASK_KEYS = ["persian_nlu", "persian_nlg"]
|
30 |
+
|
31 |
+
ALL_LEADERBOARD_COLUMNS = [
|
32 |
+
'Model Name', 'model_url', 'parameters_count', 'source_type', 'Average',
|
33 |
+
'Persian IFEval', 'Persian MT-Bench', "PerMMLU",
|
34 |
+
"PerCoR", "Persian NLU", "Persian NLG"
|
35 |
+
]
|
36 |
+
|
37 |
+
|
38 |
+
def load_tasks_from_config(config_path: Path) -> Dict[str, str]:
|
39 |
+
|
40 |
+
if not config_path.exists():
|
41 |
+
logger.error(f"Configuration file not found: {config_path}. Cannot load tasks.")
|
42 |
+
return {}
|
43 |
+
try:
|
44 |
+
with open(config_path, 'r', encoding='utf-8') as f:
|
45 |
+
config_data = yaml.safe_load(f)
|
46 |
+
tasks_from_config = config_data.get('task_display_names', {})
|
47 |
+
if not isinstance(tasks_from_config, dict):
|
48 |
+
logger.error(f"'task_display_names' in {config_path} is not a dictionary.")
|
49 |
+
return {}
|
50 |
+
processed_tasks = {k: v for k, v in tasks_from_config.items() if str(k).lower() != 'all'}
|
51 |
+
if not processed_tasks:
|
52 |
+
logger.warning(f"No tasks in {config_path} under 'task_display_names' (excluding 'all').")
|
53 |
+
return processed_tasks
|
54 |
+
except Exception as e:
|
55 |
+
logger.error(f"Error loading config {config_path}: {e}")
|
56 |
+
return {}
|
57 |
+
|
58 |
+
class ModelEvaluationProcessor:
|
59 |
+
def __init__(
|
60 |
+
self,
|
61 |
+
models_info_path: Path,
|
62 |
+
results_base_path: Path,
|
63 |
+
output_path: Path,
|
64 |
+
template_jsons_path: Path,
|
65 |
+
) -> None:
|
66 |
+
|
67 |
+
self.models_info_path = models_info_path
|
68 |
+
self.results_base_path = results_base_path
|
69 |
+
self.output_path = output_path
|
70 |
+
self.template_folder = template_jsons_path
|
71 |
+
self.output_path.mkdir(parents=True, exist_ok=True)
|
72 |
+
|
73 |
+
self.tasks_config = load_tasks_from_config(CONFIG_FILE_PATH)
|
74 |
+
if not self.tasks_config:
|
75 |
+
logger.error("Tasks config is empty. Processing might be affected.")
|
76 |
+
|
77 |
+
self.main_scores_map = {
|
78 |
+
"ifeval": "strict_instruction_accuracy",
|
79 |
+
"mt_bench": "score_mean",
|
80 |
+
"MMLU": "acc",
|
81 |
+
"persian_csr": "acc",
|
82 |
+
"persian_nlg": "nlg_score",
|
83 |
+
"persian_nlu": "nlu_score",
|
84 |
+
}
|
85 |
+
def _load_template(self, task_key: str) -> Dict[str, Any]:
|
86 |
+
|
87 |
+
path = self.template_folder / f"{task_key}.json"
|
88 |
+
try:
|
89 |
+
return json.loads(path.read_text(encoding="utf-8"))
|
90 |
+
except FileNotFoundError:
|
91 |
+
logger.warning(f"Template file not found for task_key {task_key} at {path}. Using empty template.")
|
92 |
+
return {}
|
93 |
+
except Exception as e:
|
94 |
+
logger.error(f"Cannot load template for task_key {task_key} from {path}: {e}")
|
95 |
+
return {}
|
96 |
+
|
97 |
+
def _deep_override(self, base: Any, override: Any) -> Any:
|
98 |
+
if isinstance(base, dict) and isinstance(override, dict):
|
99 |
+
merged = {}
|
100 |
+
for k, v_base in base.items():
|
101 |
+
if k in override and override[k] is not None and override[k] != -1:
|
102 |
+
merged[k] = self._deep_override(v_base, override[k])
|
103 |
+
else:
|
104 |
+
merged[k] = v_base
|
105 |
+
# for k, v_override in override.items():
|
106 |
+
# if k not in merged:
|
107 |
+
# merged[k] = v_override
|
108 |
+
return merged
|
109 |
+
elif override is not None and override != -1:
|
110 |
+
return override
|
111 |
+
else:
|
112 |
+
return base
|
113 |
+
|
114 |
+
|
115 |
+
def _load_model_raw_results(self, model_folder_name: str, task_key: str) -> Dict[str, Any]:
|
116 |
+
|
117 |
+
results_filename = f"{model_folder_name}___{task_key}.json"
|
118 |
+
results_file_path = self.results_base_path / results_filename
|
119 |
+
|
120 |
+
if results_file_path.exists():
|
121 |
+
try:
|
122 |
+
with open(results_file_path, 'r', encoding='utf-8') as f:
|
123 |
+
data = json.load(f)
|
124 |
+
return data if isinstance(data, dict) else {}
|
125 |
+
except json.JSONDecodeError as e:
|
126 |
+
logger.error(f"JSONDecodeError for model '{model_folder_name}', task_key '{task_key}' from {results_file_path}: {e}")
|
127 |
+
except Exception as e:
|
128 |
+
logger.error(f"Error loading results for model '{model_folder_name}', task_key '{task_key}' from {results_file_path}: {e}")
|
129 |
+
else:
|
130 |
+
logger.warning(f"Results file not found for model '{model_folder_name}', task_key '{task_key}' at {results_file_path}")
|
131 |
+
return {}
|
132 |
+
|
133 |
+
def load_and_fill_task_results(self, model_folder_name: str, task_key: str) -> Dict[str, Any]:
|
134 |
+
|
135 |
+
template = self._load_template(task_key)
|
136 |
+
raw_results = self._load_model_raw_results(model_folder_name, task_key)
|
137 |
+
return self._deep_override(template, raw_results)
|
138 |
+
|
139 |
+
def clean_previous_subtask_files(self) -> None:
|
140 |
+
|
141 |
+
logger.info("Cleaning previous NLU/NLG subtask JSONL files...")
|
142 |
+
for task_key_prefix in NLU_NLG_TASK_KEYS:
|
143 |
+
for result_file in self.results_base_path.glob(f"*___{task_key_prefix}.json"):
|
144 |
+
try:
|
145 |
+
task_data_content = result_file.read_text(encoding="utf-8")
|
146 |
+
if not task_data_content.strip():
|
147 |
+
logger.debug(f"Skipping empty result file for subtask cleaning: {result_file}")
|
148 |
+
continue
|
149 |
+
task_data = json.loads(task_data_content)
|
150 |
+
|
151 |
+
main_score_for_this_task_prefix = self.main_scores_map.get(task_key_prefix)
|
152 |
+
|
153 |
+
for subtask_name in task_data:
|
154 |
+
if subtask_name == main_score_for_this_task_prefix:
|
155 |
+
continue
|
156 |
+
if isinstance(task_data.get(subtask_name), dict):
|
157 |
+
subtask_output_path = self.output_path / f"{subtask_name}.jsonl"
|
158 |
+
if subtask_output_path.exists():
|
159 |
+
subtask_output_path.unlink()
|
160 |
+
logger.info(f"Deleted previous subtask file: {subtask_output_path}")
|
161 |
+
except json.JSONDecodeError as e:
|
162 |
+
logger.warning(f"Failed to decode JSON for subtask cleaning from {result_file}: {e}")
|
163 |
+
except Exception as e:
|
164 |
+
logger.warning(f"Failed to inspect/delete subtask files based on {result_file}: {e}")
|
165 |
+
|
166 |
+
def _process_subtask_data(self, task_results: Dict[str, Any], base_model_info: Dict[str, Any], parent_task_main_score_key: Optional[str], parent_task_key_for_log: str) -> None:
|
167 |
+
|
168 |
+
parent_task_main_score_value = task_results.get(parent_task_main_score_key) if parent_task_main_score_key else None
|
169 |
+
|
170 |
+
for subtask_name, subtask_scores_dict in task_results.items():
|
171 |
+
if subtask_name == parent_task_main_score_key:
|
172 |
+
continue
|
173 |
+
if not isinstance(subtask_scores_dict, dict):
|
174 |
+
logger.debug(f"Skipping entry '{subtask_name}' in '{parent_task_key_for_log}': not a dictionary of subtask scores.")
|
175 |
+
continue
|
176 |
+
|
177 |
+
row_data = base_model_info.copy()
|
178 |
+
row_data.update(subtask_scores_dict)
|
179 |
+
|
180 |
+
if parent_task_main_score_key:
|
181 |
+
row_data[parent_task_main_score_key] = parent_task_main_score_value
|
182 |
+
|
183 |
+
subtask_output_file = f"{subtask_name}.jsonl"
|
184 |
+
subtask_output_path = self.output_path / subtask_output_file
|
185 |
+
|
186 |
+
try:
|
187 |
+
current_entries = []
|
188 |
+
if subtask_output_path.exists():
|
189 |
+
existing_df = pd.read_json(subtask_output_path, lines=True)
|
190 |
+
if not existing_df.empty and 'Model Name' in existing_df.columns:
|
191 |
+
current_entries = existing_df[existing_df['Model Name'] != row_data['Model Name']].to_dict(orient='records')
|
192 |
+
|
193 |
+
current_entries.append(row_data)
|
194 |
+
updated_df = pd.DataFrame(current_entries)
|
195 |
+
updated_df.to_json(subtask_output_path, orient="records", lines=True, force_ascii=False)
|
196 |
+
logger.debug(f"Updated subtask file: {subtask_output_path} for model {base_model_info.get('Model Name')}, parent task {parent_task_key_for_log}")
|
197 |
+
except Exception as e:
|
198 |
+
logger.error(f"Error updating subtask file {subtask_output_path} for parent {parent_task_key_for_log}: {e}")
|
199 |
+
def process_nlu_nlg_subtasks(self, model_details: Dict[str, Any], model_folder_name: str, canonical_model_name: str) -> None:
|
200 |
+
|
201 |
+
common_subtask_model_info = {
|
202 |
+
"Model Name": canonical_model_name,
|
203 |
+
"model_url": model_details.get('model_url', model_details.get('link', model_details.get('homepage', 'https://google.com'))),
|
204 |
+
"parameters_count": str(model_details.get('n_parameters', "N/A")),
|
205 |
+
"source_type": "Closed-Source" # Default, will be refined
|
206 |
+
}
|
207 |
+
parameters_count_raw = model_details.get('n_parameters', None)
|
208 |
+
if parameters_count_raw is not None:
|
209 |
+
is_open_source_candidate = False
|
210 |
+
if isinstance(parameters_count_raw, (int, float)) and parameters_count_raw > 0:
|
211 |
+
is_open_source_candidate = True
|
212 |
+
elif isinstance(parameters_count_raw, str) and \
|
213 |
+
str(parameters_count_raw).strip().lower() not in ["", "n/a", "unknown", "private", "confidential", "tbd", "null", "closed"]:
|
214 |
+
is_open_source_candidate = True
|
215 |
+
common_subtask_model_info["source_type"] = "Open-Source" if is_open_source_candidate else "Closed-Source"
|
216 |
+
|
217 |
+
for task_key_for_subtasks in NLU_NLG_TASK_KEYS:
|
218 |
+
if task_key_for_subtasks not in self.tasks_config:
|
219 |
+
logger.debug(f"Subtask processing for '{task_key_for_subtasks}' skipped: not in tasks_config.")
|
220 |
+
continue
|
221 |
+
|
222 |
+
logger.info(f"Processing subtasks for '{task_key_for_subtasks}' for model '{canonical_model_name}'...")
|
223 |
+
parent_task_full_results = self.load_and_fill_task_results(model_folder_name, task_key_for_subtasks)
|
224 |
+
main_score_key_for_parent_task = self.main_scores_map.get(task_key_for_subtasks)
|
225 |
+
if not main_score_key_for_parent_task:
|
226 |
+
logger.warning(f"No main score key in main_scores_map for parent task '{task_key_for_subtasks}'.")
|
227 |
+
|
228 |
+
self._process_subtask_data(
|
229 |
+
parent_task_full_results,
|
230 |
+
common_subtask_model_info,
|
231 |
+
main_score_key_for_parent_task,
|
232 |
+
task_key_for_subtasks
|
233 |
+
)
|
234 |
+
def process_models(self) -> Dict[str, pd.DataFrame]:
|
235 |
+
processed_task_data: Dict[str, List[Dict[str, Any]]] = {task_key: [] for task_key in self.tasks_config.keys()}
|
236 |
+
all_models_summary_data: List[Dict[str, Any]] = []
|
237 |
+
|
238 |
+
if not self.models_info_path.exists() or not self.models_info_path.is_dir():
|
239 |
+
logger.critical(f"Configured MODELS_FOLDER path does not exist or is not a directory: {self.models_info_path}")
|
240 |
+
empty_dfs = {key: pd.DataFrame() for key in self.tasks_config.keys()}
|
241 |
+
empty_dfs["all"] = pd.DataFrame()
|
242 |
+
return empty_dfs
|
243 |
+
|
244 |
+
model_info_files = list(self.models_info_path.glob("*.json"))
|
245 |
+
if not model_info_files:
|
246 |
+
logger.warning(f"No model info files (*.json) found in {self.models_info_path}. No models will be processed.")
|
247 |
+
empty_dfs = {key: pd.DataFrame() for key in self.tasks_config.keys()}
|
248 |
+
empty_dfs["all"] = pd.DataFrame()
|
249 |
+
return empty_dfs
|
250 |
+
|
251 |
+
for model_info_file in model_info_files:
|
252 |
+
model_folder_name = model_info_file.stem
|
253 |
+
try:
|
254 |
+
with open(model_info_file, 'r', encoding='utf-8') as f:
|
255 |
+
model_details = json.load(f)
|
256 |
+
|
257 |
+
canonical_model_name = model_details.get('name_for_leaderboard',
|
258 |
+
model_details.get('model_hf_id',
|
259 |
+
model_details.get('name', model_folder_name)))
|
260 |
+
model_url = model_details.get('model_url', model_details.get('link', model_details.get('homepage', 'https_google.com')))
|
261 |
+
if not model_url: model_url = 'https_google.com'
|
262 |
+
|
263 |
+
parameters_count_raw = model_details.get('n_parameters', None)
|
264 |
+
parameters_count_display = str(parameters_count_raw) if parameters_count_raw is not None else "N/A"
|
265 |
+
|
266 |
+
source_type = "Closed-Source"
|
267 |
+
if parameters_count_raw is not None:
|
268 |
+
is_open_source_candidate = False
|
269 |
+
if isinstance(parameters_count_raw, (int, float)) and parameters_count_raw > 0:
|
270 |
+
is_open_source_candidate = True
|
271 |
+
elif isinstance(parameters_count_raw, str) and \
|
272 |
+
str(parameters_count_raw).strip().lower() not in ["", "n/a", "unknown", "private", "confidential", "tbd", "null", "closed"]:
|
273 |
+
is_open_source_candidate = True
|
274 |
+
source_type = "Open-Source" if is_open_source_candidate else "Closed-Source"
|
275 |
+
|
276 |
+
except Exception as e:
|
277 |
+
logger.error(f"Error loading/parsing model info from {model_info_file}: {e}. Skipping '{model_folder_name}'.")
|
278 |
+
continue
|
279 |
+
|
280 |
+
logger.info(f"Processing model: {canonical_model_name} (source ID: {model_folder_name})")
|
281 |
+
|
282 |
+
current_model_scores_for_summary: Dict[str, Any] = {
|
283 |
+
"Model Name": canonical_model_name,
|
284 |
+
"model_url": model_url,
|
285 |
+
"parameters_count": parameters_count_display,
|
286 |
+
"source_type": source_type
|
287 |
+
}
|
288 |
+
|
289 |
+
for task_key, task_display_name in self.tasks_config.items():
|
290 |
+
task_specific_results = self.load_and_fill_task_results(model_folder_name, task_key)
|
291 |
+
main_score_metric_name = self.main_scores_map.get(task_key)
|
292 |
+
task_data_entry_for_specific_jsonl: Dict[str, Any] = {
|
293 |
+
"Model Name": canonical_model_name,
|
294 |
+
"model_url": model_url,
|
295 |
+
"parameters_count": parameters_count_display,
|
296 |
+
"source_type": source_type
|
297 |
+
}
|
298 |
+
|
299 |
+
if isinstance(task_specific_results, dict) and task_specific_results:
|
300 |
+
for metric, value in task_specific_results.items():
|
301 |
+
task_data_entry_for_specific_jsonl[metric] = value
|
302 |
+
|
303 |
+
if main_score_metric_name and main_score_metric_name in task_specific_results:
|
304 |
+
score_value = task_specific_results[main_score_metric_name]
|
305 |
+
if task_key == "mt_bench" and score_value is not None:
|
306 |
+
try:
|
307 |
+
score_value = float(score_value) / 10.0
|
308 |
+
except (ValueError, TypeError):
|
309 |
+
logger.warning(f"Could not convert mt_bench score '{score_value}' to float for division for model {canonical_model_name}")
|
310 |
+
score_value = pd.NA
|
311 |
+
current_model_scores_for_summary[task_display_name] = score_value
|
312 |
+
elif main_score_metric_name:
|
313 |
+
logger.warning(f"Main score metric '{main_score_metric_name}' for task '{task_key}' (Display: {task_display_name}) not found for model '{canonical_model_name}'. Will be NA.")
|
314 |
+
current_model_scores_for_summary[task_display_name] = pd.NA
|
315 |
+
task_data_entry_for_specific_jsonl[main_score_metric_name] = pd.NA
|
316 |
+
else:
|
317 |
+
logger.warning(f"No valid results data for model '{canonical_model_name}', task_key '{task_key}'. Scores will be NA.")
|
318 |
+
if main_score_metric_name:
|
319 |
+
task_data_entry_for_specific_jsonl[main_score_metric_name] = pd.NA
|
320 |
+
current_model_scores_for_summary[task_display_name] = pd.NA
|
321 |
+
|
322 |
+
processed_task_data[task_key].append(task_data_entry_for_specific_jsonl)
|
323 |
+
|
324 |
+
all_models_summary_data.append(current_model_scores_for_summary)
|
325 |
+
self.process_nlu_nlg_subtasks(model_details, model_folder_name, canonical_model_name)
|
326 |
+
|
327 |
+
final_dataframes: Dict[str, pd.DataFrame] = {}
|
328 |
+
for task_key, data_list in processed_task_data.items():
|
329 |
+
df = pd.DataFrame(data_list) if data_list else pd.DataFrame()
|
330 |
+
main_score_col = self.main_scores_map.get(task_key)
|
331 |
+
if not df.empty and main_score_col and main_score_col in df.columns:
|
332 |
+
try:
|
333 |
+
df[main_score_col] = pd.to_numeric(df[main_score_col], errors='coerce')
|
334 |
+
# Sort by main score (NaNs will go last or first depending on na_position, default is last)
|
335 |
+
df = df.sort_values(by=main_score_col, ascending=False, na_position='last')
|
336 |
+
except Exception as e:
|
337 |
+
logger.warning(f"Could not sort dataframe for task {task_key} by score {main_score_col}: {e}")
|
338 |
+
final_dataframes[task_key] = df
|
339 |
+
if df.empty:
|
340 |
+
logger.warning(f"No data processed for task '{task_key}'. Resulting DataFrame is empty.")
|
341 |
+
|
342 |
+
if all_models_summary_data:
|
343 |
+
all_df = pd.DataFrame(all_models_summary_data)
|
344 |
+
score_cols_for_average = []
|
345 |
+
for _, task_display_name_for_avg in self.tasks_config.items():
|
346 |
+
if task_display_name_for_avg in all_df.columns:
|
347 |
+
numeric_col = pd.to_numeric(all_df[task_display_name_for_avg], errors='coerce')
|
348 |
+
if numeric_col.notna().any(): # Check if there is at least one non-NA numeric value
|
349 |
+
all_df[task_display_name_for_avg] = numeric_col
|
350 |
+
score_cols_for_average.append(task_display_name_for_avg)
|
351 |
+
else: # All values are NA or non-numeric
|
352 |
+
all_df[task_display_name_for_avg] = pd.NA # Ensure column is NA if not usable
|
353 |
+
logger.warning(f"Column '{task_display_name_for_avg}' for averaging in 'all' table is not numeric or all NaN. Excluding from average calculation and setting to NA.")
|
354 |
+
if score_cols_for_average:
|
355 |
+
try:
|
356 |
+
# Calculate mean; it will be NaN if any constituent score for a row is NaN.
|
357 |
+
all_df["Average"] = all_df[score_cols_for_average].mean(axis=1, skipna=False)
|
358 |
+
# Round only non-NaN averages
|
359 |
+
all_df.loc[all_df["Average"].notna(), "Average"] = all_df.loc[all_df["Average"].notna(), "Average"].round(4)
|
360 |
+
except Exception as e:
|
361 |
+
logger.error(f"Error calculating 'Average' for 'all' table: {e}. Average column might be NA or incorrect.")
|
362 |
+
all_df["Average"] = pd.NA # Fallback to NA
|
363 |
+
else:
|
364 |
+
logger.warning("No valid numeric score columns found to calculate 'Average' for 'all' table.")
|
365 |
+
all_df["Average"] = pd.NA # Assign pd.NA if no columns to average
|
366 |
+
|
367 |
+
# Sort 'all' table by Average (NaNs will be placed last by default with ascending=False)
|
368 |
+
if "Average" in all_df.columns: # Check if 'Average' column exists
|
369 |
+
# NaNs are typically sorted to the end by default when ascending=False or na_position='last'
|
370 |
+
all_df = all_df.sort_values(by="Average", ascending=False, na_position='last')
|
371 |
+
|
372 |
+
|
373 |
+
existing_cols_in_order = [col for col in ALL_LEADERBOARD_COLUMNS if col in all_df.columns]
|
374 |
+
other_cols = [col for col in all_df.columns if col not in existing_cols_in_order]
|
375 |
+
all_df = all_df[existing_cols_in_order + other_cols]
|
376 |
+
|
377 |
+
final_dataframes["all"] = all_df
|
378 |
+
else:
|
379 |
+
final_dataframes["all"] = pd.DataFrame()
|
380 |
+
logger.warning("No summary data collected for the 'all' table.")
|
381 |
+
|
382 |
+
return final_dataframes
|
383 |
+
|
384 |
+
def save_dataframe_as_jsonl(self, df: pd.DataFrame, filename_base: str) -> None:
|
385 |
+
|
386 |
+
if df is None or df.empty:
|
387 |
+
logger.warning(f"DataFrame for '{filename_base}.jsonl' is empty or None. Skipping save.")
|
388 |
+
return
|
389 |
+
output_file_path = self.output_path / f"{filename_base}.jsonl"
|
390 |
+
try:
|
391 |
+
df.to_json(output_file_path, orient="records", lines=True, force_ascii=False, index=False)
|
392 |
+
logger.info(f"Saved data to {output_file_path}")
|
393 |
+
except Exception as e:
|
394 |
+
logger.error(f"Failed to save DataFrame to {output_file_path}: {e}")
|
395 |
+
def run(self) -> None:
|
396 |
+
|
397 |
+
logger.info("Starting data processing pipeline in ModelEvaluationProcessor...")
|
398 |
+
self.clean_previous_subtask_files()
|
399 |
+
processed_dataframes = self.process_models()
|
400 |
+
for task_key_or_name, df in processed_dataframes.items():
|
401 |
+
self.save_dataframe_as_jsonl(df, task_key_or_name)
|
402 |
+
logger.info("Data processing pipeline completed successfully!")
|
403 |
+
|
404 |
+
def main() -> None:
|
405 |
+
|
406 |
+
models_folder_to_use = DEFAULT_MODELS_FOLDER
|
407 |
+
results_folder_to_use = DEFAULT_RESULTS_FOLDER
|
408 |
+
template_folder_to_use = TEMPLATE_FOLDER
|
409 |
+
|
410 |
+
logger.info(f"Refresh script running from: {SCRIPT_DIR}")
|
411 |
+
logger.info(f"CONFIGURED Input 'models_info' Path: {models_folder_to_use}")
|
412 |
+
logger.info(f"CONFIGURED Input 'results' Path: {results_folder_to_use}")
|
413 |
+
logger.info(f"CONFIGURED Input 'template_jsons' Path: {template_folder_to_use}")
|
414 |
+
logger.info(f"Outputting processed data to (inside 'leaderboard' dir): {OUTPUT_FOLDER}")
|
415 |
+
logger.info(f"Using configuration file (inside 'leaderboard' dir): {CONFIG_FILE_PATH}")
|
416 |
+
|
417 |
+
if not CONFIG_FILE_PATH.exists():
|
418 |
+
logger.critical(f"CRITICAL: Config file not found at {CONFIG_FILE_PATH}. Ensure '{CONFIG_FILE_PATH.name}' exists in '{SCRIPT_DIR}'.")
|
419 |
+
return
|
420 |
+
if not models_folder_to_use.exists() or not models_folder_to_use.is_dir():
|
421 |
+
logger.critical(f"CRITICAL: Input 'models_info' directory not found at {models_folder_to_use} or is not a directory.")
|
422 |
+
return
|
423 |
+
if not results_folder_to_use.exists() or not results_folder_to_use.is_dir():
|
424 |
+
logger.critical(f"CRITICAL: Input 'results' directory not found at {results_folder_to_use} or is not a directory.")
|
425 |
+
return
|
426 |
+
if not template_folder_to_use.exists() or not template_folder_to_use.is_dir():
|
427 |
+
logger.warning(f"WARNING: 'template_jsons' directory not found at {template_folder_to_use}. Template filling might not work as expected.")
|
428 |
+
|
429 |
+
try:
|
430 |
+
processor = ModelEvaluationProcessor(
|
431 |
+
models_info_path=models_folder_to_use,
|
432 |
+
results_base_path=results_folder_to_use,
|
433 |
+
output_path=OUTPUT_FOLDER,
|
434 |
+
template_jsons_path=template_folder_to_use,
|
435 |
+
)
|
436 |
+
processor.run()
|
437 |
+
except Exception as e:
|
438 |
+
logger.error(f"Unhandled exception in main: {e}", exc_info=True)
|
439 |
+
|
440 |
+
if __name__ == "__main__":
|
441 |
+
main()
|
leaderboard/template_jsons/MMLU.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"acc": null,
|
3 |
+
"cinema_acc": null,
|
4 |
+
"emergency_number_acc": null,
|
5 |
+
"foods_acc": null,
|
6 |
+
"games_acc": null,
|
7 |
+
"herbal_drugs_acc": null,
|
8 |
+
"places_acc": null,
|
9 |
+
"poetry_acc": null,
|
10 |
+
"politicians_acc": null,
|
11 |
+
"popular_people_acc": null,
|
12 |
+
"Government_law_acc": null,
|
13 |
+
"proverbs_acc": null,
|
14 |
+
"religous_acc": null,
|
15 |
+
"social_manners_acc": null,
|
16 |
+
"souvenirs_acc": null,
|
17 |
+
"sports_acc": null,
|
18 |
+
"GPK_acc": null,
|
19 |
+
"SPK_acc": null,
|
20 |
+
"UPK_acc": null
|
21 |
+
}
|
leaderboard/template_jsons/MMLU_full.json
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"acc": null,
|
3 |
+
"cinema_acc": null,
|
4 |
+
"emergency_number_acc": null,
|
5 |
+
"foods_acc": null,
|
6 |
+
"games_acc": null,
|
7 |
+
"herbal_drugs_acc": null,
|
8 |
+
"places_acc": null,
|
9 |
+
"poetry_acc": null,
|
10 |
+
"politicians_acc": null,
|
11 |
+
"popular_people_acc": null,
|
12 |
+
"Government_law_acc": null,
|
13 |
+
"proverbs_acc": null,
|
14 |
+
"religous_acc": null,
|
15 |
+
"social_manners_acc": null,
|
16 |
+
"souvenirs_acc": null,
|
17 |
+
"sports_acc": null,
|
18 |
+
"/class4/math4/_acc": null,
|
19 |
+
"/class4/farsi4/_acc": null,
|
20 |
+
"/class4/science4/_acc": null,
|
21 |
+
"/class4/religion4/_acc": null,
|
22 |
+
"/class4/social4/_acc": null,
|
23 |
+
"/class5/math5/_acc": null,
|
24 |
+
"/class5/farsi5/_acc": null,
|
25 |
+
"/class5/science5/_acc": null,
|
26 |
+
"/class5/religion5/_acc": null,
|
27 |
+
"/class5/social5/_acc": null,
|
28 |
+
"/class6/math6/_acc": null,
|
29 |
+
"/class6/farsi6/_acc": null,
|
30 |
+
"/class6/quran6/_acc": null,
|
31 |
+
"/class6/reigion6/_acc": null,
|
32 |
+
"/class6/social6/_acc": null,
|
33 |
+
"/class6/thinking6/_acc": null,
|
34 |
+
"/class6/science6/_acc": null,
|
35 |
+
"/class7/mathematic7/_acc": null,
|
36 |
+
"/class7/olum7/_acc": null,
|
37 |
+
"/class7/social7/_acc": null,
|
38 |
+
"/class7/farsi7/_acc": null,
|
39 |
+
"/class7/payam_asemani7/_acc": null,
|
40 |
+
"/class7/English7/_acc": null,
|
41 |
+
"/class7/quran7/_acc": null,
|
42 |
+
"/class8/mathematic8/_acc": null,
|
43 |
+
"/class8/farsi8/_acc": null,
|
44 |
+
"/class8/olum8/_acc": null,
|
45 |
+
"/class8/holy_message8/_acc": null,
|
46 |
+
"/class8/social8/_acc": null,
|
47 |
+
"/class8/englishstudent8/_acc": null,
|
48 |
+
"/class9/allum9/_acc": null,
|
49 |
+
"/class9/math_1/_acc": null,
|
50 |
+
"/class9/farsi9/_acc": null,
|
51 |
+
"/class9/social9/_acc": null,
|
52 |
+
"/class9/hollymessage9/_acc": null,
|
53 |
+
"/class9/englishstu9/_acc": null,
|
54 |
+
"/class10/geometric10/_acc": null,
|
55 |
+
"/class10/math10/_acc": null,
|
56 |
+
"/class10/physics10/_acc": null,
|
57 |
+
"/class10/chemistry10/_acc": null,
|
58 |
+
"/class10/biology10/_acc": null,
|
59 |
+
"/class10/farsi10/_acc": null,
|
60 |
+
"/class10/arabic10/_acc": null,
|
61 |
+
"/class10/religion_Life10/_acc": null,
|
62 |
+
"/class10/economy10/_acc": null,
|
63 |
+
"/class10/socialist10/_acc": null,
|
64 |
+
"/class10/historic10/_acc": null,
|
65 |
+
"/class10/arabic_ensani10/_acc": null,
|
66 |
+
"/class10/logic10/_acc": null,
|
67 |
+
"/class10/englishStu/_acc": null,
|
68 |
+
"/class10/joghraphy10/_acc": null,
|
69 |
+
"/class10/riazi10fani/_acc": null,
|
70 |
+
"/class10/physicfani/_acc": null,
|
71 |
+
"/class-11/relegion11/_acc": null,
|
72 |
+
"/class-11/arabic11/_acc": null,
|
73 |
+
"/class-11/frasi11/_acc": null,
|
74 |
+
"/class-11/english11/_acc": null,
|
75 |
+
"/class-11/tarikh11/_acc": null,
|
76 |
+
"/class-11/hesaban11/_acc": null,
|
77 |
+
"/class-11/hendese11/_acc": null,
|
78 |
+
"/class-11/static11/_acc": null,
|
79 |
+
"/class-11/physic11/_acc": null,
|
80 |
+
"/class-11/chemistry11/_acc": null,
|
81 |
+
"/class-11/mathematic11/_acc": null,
|
82 |
+
"/class-11/biology 11/_acc": null,
|
83 |
+
"/class-11/history11/_acc": null,
|
84 |
+
"/class-11/jeographic/_acc": null,
|
85 |
+
"/class-11/pholosophy/_acc": null,
|
86 |
+
"/class-11/englishworkbook11/_acc": null,
|
87 |
+
"/class-11/earabic11/_acc": null,
|
88 |
+
"/class-12/religion12/_acc": null,
|
89 |
+
"/class-12/mathematic/_acc": null,
|
90 |
+
"/class-12/englishstudentbook/_acc": null,
|
91 |
+
"/class-12/biology/_acc": null,
|
92 |
+
"/class-12/chemistry/_acc": null,
|
93 |
+
"/class-12/arabic/_acc": null,
|
94 |
+
"/class-12/farsi/_acc": null,
|
95 |
+
"/class-12/calculus12/_acc": null,
|
96 |
+
"/class-12/physic12/_acc": null,
|
97 |
+
"/class-12/geometric/_acc": null,
|
98 |
+
"/class-12/history12/_acc": null,
|
99 |
+
"/class-12/geographic12/_acc": null,
|
100 |
+
"/class-12/arabic12e/_acc": null,
|
101 |
+
"/class-12/philosophy12/_acc": null,
|
102 |
+
"/class-12/sociology12/_acc": null,
|
103 |
+
"/class-12/riazi12fani/_acc": null,
|
104 |
+
"dandoon 1402_acc": null,
|
105 |
+
"pezeshki 1400_acc": null,
|
106 |
+
"dandoon 1401_acc": null,
|
107 |
+
"darusazi 1401_acc": null,
|
108 |
+
"dandoon 1403_acc": null,
|
109 |
+
"dampezeshki 1395_acc": null,
|
110 |
+
"pezeshki 1402_acc": null,
|
111 |
+
"dampezeshki 1396_acc": null,
|
112 |
+
"ergonomi 1403_acc": null,
|
113 |
+
"darusazi 1403_acc": null,
|
114 |
+
"dampezeshki 1401_acc": null,
|
115 |
+
"ergonomi 1402_acc": null,
|
116 |
+
"pezeshki 1403_acc": null,
|
117 |
+
"darusazi 1400_acc": null,
|
118 |
+
"dandoon 1400_acc": null,
|
119 |
+
"darusazi 1402_acc": null,
|
120 |
+
"pezeshki 1401_acc": null,
|
121 |
+
"dampezeshki 1402_acc": null,
|
122 |
+
"bastan_404_acc": null,
|
123 |
+
"cinema_404_acc": null,
|
124 |
+
"cinema_402_acc": null,
|
125 |
+
"eghtesad_404_acc": null,
|
126 |
+
"ejtemai_404_acc": null,
|
127 |
+
"elahiat_404_acc": null,
|
128 |
+
"falsafe_404_acc": null,
|
129 |
+
"falsafe_402_acc": null,
|
130 |
+
"farsi_404_acc": null,
|
131 |
+
"farsi_402_acc": null,
|
132 |
+
"hoghoogh_404_acc": null,
|
133 |
+
"hoghoogh_402_acc": null,
|
134 |
+
"modiriat_404_acc": null,
|
135 |
+
"ravan_404_acc": null,
|
136 |
+
"sanaye_404_acc": null,
|
137 |
+
"siasi_404_acc": null,
|
138 |
+
"shimi 1402_acc": null,
|
139 |
+
"cs 1403_acc": null,
|
140 |
+
"bargh 1402_acc": null,
|
141 |
+
"industrial 1402_acc": null,
|
142 |
+
"metalogy 1404_acc": null,
|
143 |
+
"industrial 1403_acc": null,
|
144 |
+
"naft 1403_acc": null,
|
145 |
+
"omran 1402_acc": null,
|
146 |
+
"mechanic 1402_acc": null,
|
147 |
+
"shimi 1403_acc": null,
|
148 |
+
"cs 1402_acc": null,
|
149 |
+
"shimi 1404_acc": null,
|
150 |
+
"metalogy 1402_acc": null,
|
151 |
+
"naft 1402_acc": null,
|
152 |
+
"mechanic 1404_acc": null,
|
153 |
+
"riazi 1404_acc": null,
|
154 |
+
"cs 1404_acc": null,
|
155 |
+
"mechanic 1403_acc": null,
|
156 |
+
"ce 1404_acc": null,
|
157 |
+
"naft 1404_acc": null,
|
158 |
+
"riazi 1402_acc": null,
|
159 |
+
"bargh 1403_acc": null,
|
160 |
+
"industrial 1404_acc": null,
|
161 |
+
"omran 1403_acc": null,
|
162 |
+
"metalogy 1403_acc": null,
|
163 |
+
"bargh 1404_acc": null,
|
164 |
+
"riazi 1403_acc": null,
|
165 |
+
"ce 1403_acc": null,
|
166 |
+
"ce 1402_acc": null,
|
167 |
+
"omran 1404_acc": null,
|
168 |
+
"GPK_acc": null,
|
169 |
+
"SPK_acc": null,
|
170 |
+
"UPK_acc": null
|
171 |
+
}
|
leaderboard/template_jsons/boolq.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"boolq": null}
|
leaderboard/template_jsons/hamrah_mt_bench.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"s1": null,
|
3 |
+
"s2": null,
|
4 |
+
"s3": null,
|
5 |
+
"s4": null,
|
6 |
+
"score_w_mean": null,
|
7 |
+
"score_mean": null
|
8 |
+
}
|
leaderboard/template_jsons/ifeval.json
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"strict_prompt_accuracy": null,
|
3 |
+
"strict_instruction_accuracy": null,
|
4 |
+
"loose_prompt_accuracy": null,
|
5 |
+
"loose_instruction_accuracy": null,
|
6 |
+
"strict_combination_category": null,
|
7 |
+
"strict_detectable_content_category": null,
|
8 |
+
"strict_detectable_format_category": null,
|
9 |
+
"strict_keywords_category": null,
|
10 |
+
"strict_language_category": null,
|
11 |
+
"strict_length_constraints_category": null,
|
12 |
+
"strict_punctuation_category": null,
|
13 |
+
"strict_startend_category": null,
|
14 |
+
"loose_combination_category": null,
|
15 |
+
"loose_detectable_content_category": null,
|
16 |
+
"loose_detectable_format_category": null,
|
17 |
+
"loose_keywords_category": null,
|
18 |
+
"loose_language_category": null,
|
19 |
+
"loose_length_constraints_category": null,
|
20 |
+
"loose_punctuation_category": null,
|
21 |
+
"loose_startend_category": null
|
22 |
+
}
|
leaderboard/template_jsons/ifeval_full.json
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"strict_prompt_accuracy": null,
|
3 |
+
"strict_instruction_accuracy": null,
|
4 |
+
"loose_prompt_accuracy": null,
|
5 |
+
"loose_instruction_accuracy": null,
|
6 |
+
"strict_combination_category": null,
|
7 |
+
"strict_detectable_content_category": null,
|
8 |
+
"strict_detectable_format_category": null,
|
9 |
+
"strict_keywords_category": null,
|
10 |
+
"strict_language_category": null,
|
11 |
+
"strict_length_constraints_category": null,
|
12 |
+
"strict_punctuation_category": null,
|
13 |
+
"strict_startend_category": null,
|
14 |
+
"strict_combination:repeat_prompt_instruction": null,
|
15 |
+
"strict_combination:two_responses_instruction": null,
|
16 |
+
"strict_detectable_content:number_placeholders_instruction": null,
|
17 |
+
"strict_detectable_content:postscript_instruction": null,
|
18 |
+
"strict_detectable_format:constrained_response_instruction": null,
|
19 |
+
"strict_detectable_format:json_format_instruction": null,
|
20 |
+
"strict_detectable_format:multiple_sections_instruction": null,
|
21 |
+
"strict_detectable_format:number_bullet_lists_instruction": null,
|
22 |
+
"strict_detectable_format:number_highlighted_sections_instruction": null,
|
23 |
+
"strict_detectable_format:title_instruction": null,
|
24 |
+
"strict_keywords:existence_instruction": null,
|
25 |
+
"strict_keywords:forbidden_words_instruction": null,
|
26 |
+
"strict_keywords:frequency_instruction": null,
|
27 |
+
"strict_keywords:letter_frequency_instruction": null,
|
28 |
+
"strict_language:response_language_instruction": null,
|
29 |
+
"strict_length_constraints:nth_paragraph_first_word_instruction": null,
|
30 |
+
"strict_length_constraints:number_paragraphs_instruction": null,
|
31 |
+
"strict_length_constraints:number_sentences_instruction": null,
|
32 |
+
"strict_length_constraints:number_words_instruction": null,
|
33 |
+
"strict_punctuation:no_comma_instruction": null,
|
34 |
+
"strict_startend:end_checker_instruction": null,
|
35 |
+
"strict_startend:quotation_instruction": null,
|
36 |
+
"loose_combination_category": null,
|
37 |
+
"loose_detectable_content_category": null,
|
38 |
+
"loose_detectable_format_category": null,
|
39 |
+
"loose_keywords_category": null,
|
40 |
+
"loose_language_category": null,
|
41 |
+
"loose_length_constraints_category": null,
|
42 |
+
"loose_punctuation_category": null,
|
43 |
+
"loose_startend_category": null,
|
44 |
+
"loose_combination:repeat_prompt_instruction": null,
|
45 |
+
"loose_combination:two_responses_instruction": null,
|
46 |
+
"loose_detectable_content:number_placeholders_instruction": null,
|
47 |
+
"loose_detectable_content:postscript_instruction": null,
|
48 |
+
"loose_detectable_format:constrained_response_instruction": null,
|
49 |
+
"loose_detectable_format:json_format_instruction": null,
|
50 |
+
"loose_detectable_format:multiple_sections_instruction": null,
|
51 |
+
"loose_detectable_format:number_bullet_lists_instruction": null,
|
52 |
+
"loose_detectable_format:number_highlighted_sections_instruction": null,
|
53 |
+
"loose_detectable_format:title_instruction": null,
|
54 |
+
"loose_keywords:existence_instruction": null,
|
55 |
+
"loose_keywords:forbidden_words_instruction": null,
|
56 |
+
"loose_keywords:frequency_instruction": null,
|
57 |
+
"loose_keywords:letter_frequency_instruction": null,
|
58 |
+
"loose_language:response_language_instruction": null,
|
59 |
+
"loose_length_constraints:nth_paragraph_first_word_instruction": null,
|
60 |
+
"loose_length_constraints:number_paragraphs_instruction": null,
|
61 |
+
"loose_length_constraints:number_sentences_instruction": null,
|
62 |
+
"loose_length_constraints:number_words_instruction": null,
|
63 |
+
"loose_punctuation:no_comma_instruction": null,
|
64 |
+
"loose_startend:end_checker_instruction": null,
|
65 |
+
"loose_startend:quotation_instruction": null
|
66 |
+
}
|
leaderboard/template_jsons/mt_bench.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"score_w_mean": null,
|
3 |
+
"score_mean": null,
|
4 |
+
"writing_score_w_mean": null,
|
5 |
+
"writing_score_mean": null,
|
6 |
+
"roleplay_score_w_mean": null,
|
7 |
+
"roleplay_score_mean": null,
|
8 |
+
"reasoning_score_w_mean": null,
|
9 |
+
"reasoning_score_mean": null,
|
10 |
+
"math_score_w_mean": null,
|
11 |
+
"math_score_mean": null,
|
12 |
+
"coding_score_w_mean": null,
|
13 |
+
"coding_score_mean": null,
|
14 |
+
"extraction_score_w_mean": null,
|
15 |
+
"extraction_score_mean": null,
|
16 |
+
"stem_score_w_mean": null,
|
17 |
+
"stem_score_mean": null,
|
18 |
+
"humanities_score_w_mean": null,
|
19 |
+
"humanities_score_mean": null,
|
20 |
+
"persian_general_knowledge_score_w_mean": null,
|
21 |
+
"persian_general_knowledge_score_mean": null,
|
22 |
+
"chatbot_rag_score_w_mean": null,
|
23 |
+
"chatbot_rag_score_mean": null
|
24 |
+
}
|
leaderboard/template_jsons/mt_bench_full.json
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"s1": null,
|
3 |
+
"s2": null,
|
4 |
+
"s3": null,
|
5 |
+
"s4": null,
|
6 |
+
"score_w_mean": null,
|
7 |
+
"score_mean": null,
|
8 |
+
// "writing_s1": null,
|
9 |
+
// "writing_s2": null,
|
10 |
+
// "writing_s3": null,
|
11 |
+
// "writing_s4": null,
|
12 |
+
"writing_score_w_mean": null,
|
13 |
+
"writing_score_mean": null,
|
14 |
+
// "roleplay_s1": null,
|
15 |
+
// "roleplay_s2": null,
|
16 |
+
// "roleplay_s3": null,
|
17 |
+
// "roleplay_s4": null,
|
18 |
+
"roleplay_score_w_mean": null,
|
19 |
+
"roleplay_score_mean": null,
|
20 |
+
// "reasoning_s1": null,
|
21 |
+
// "reasoning_s2": null,
|
22 |
+
// "reasoning_s3": null,
|
23 |
+
// "reasoning_s4": null,
|
24 |
+
"reasoning_score_w_mean": null,
|
25 |
+
"reasoning_score_mean": null,
|
26 |
+
// "math_s1": null,
|
27 |
+
// "math_s2": null,
|
28 |
+
// "math_s3": null,
|
29 |
+
// "math_s4": null,
|
30 |
+
"math_score_w_mean": null,
|
31 |
+
"math_score_mean": null,
|
32 |
+
// "coding_s1": null,
|
33 |
+
// "coding_s2": null,
|
34 |
+
"coding_score_w_mean": null,
|
35 |
+
"coding_score_mean": null,
|
36 |
+
// "extraction_s1": null,
|
37 |
+
// "extraction_s2": null,
|
38 |
+
"extraction_score_w_mean": null,
|
39 |
+
"extraction_score_mean": null,
|
40 |
+
// "stem_s1": null,
|
41 |
+
// "stem_s2": null,
|
42 |
+
"stem_score_w_mean": null,
|
43 |
+
"stem_score_mean": null,
|
44 |
+
// "humanities_s1": null,
|
45 |
+
// "humanities_s2": null,
|
46 |
+
"humanities_score_w_mean": null,
|
47 |
+
"humanities_score_mean": null,
|
48 |
+
// "persian_general_knowledge_s1": null,
|
49 |
+
// "persian_general_knowledge_s2": null,
|
50 |
+
// "persian_general_knowledge_s3": null,
|
51 |
+
// "persian_general_knowledge_s4": null,
|
52 |
+
"persian_general_knowledge_score_w_mean": null,
|
53 |
+
"persian_general_knowledge_score_mean": null,
|
54 |
+
// "chatbot_rag_s1": null,
|
55 |
+
// "chatbot_rag_s2": null,
|
56 |
+
// "chatbot_rag_s3": null,
|
57 |
+
// "chatbot_rag_s4": null,
|
58 |
+
"chatbot_rag_score_w_mean": null,
|
59 |
+
"chatbot_rag_score_mean": null
|
60 |
+
}
|
leaderboard/template_jsons/persian_csr.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"acc": null,
|
3 |
+
"acc_strict": null,
|
4 |
+
"donyaeeqtesad_acc": null,
|
5 |
+
"isna_acc": null,
|
6 |
+
"ninisite_article_acc": null,
|
7 |
+
"virgool_4_acc": null,
|
8 |
+
"khabaronline_acc": null,
|
9 |
+
"digiato_acc": null,
|
10 |
+
"doctoreto_acc": null,
|
11 |
+
"sarzamindownload_acc": null,
|
12 |
+
"hamgardi_acc": null,
|
13 |
+
"bigbangpage_acc": null,
|
14 |
+
"wiki_ahlolbait_acc": null,
|
15 |
+
"virgool_3_acc": null,
|
16 |
+
"virgool_2_acc": null,
|
17 |
+
"virgool_1_acc": null,
|
18 |
+
"hamshahrionline_acc": null,
|
19 |
+
"tabnak_acc": null,
|
20 |
+
"alibaba_acc": null,
|
21 |
+
"digikala_mag_acc": null,
|
22 |
+
"yjc_acc": null,
|
23 |
+
"beytoote_acc": null,
|
24 |
+
"asriran_acc": null,
|
25 |
+
"ecoiran_acc": null,
|
26 |
+
"hawzah_acc": null,
|
27 |
+
"zoomit_acc": null,
|
28 |
+
"wikipedia_acc": null,
|
29 |
+
"namnak_acc": null,
|
30 |
+
"khodro45_acc": null,
|
31 |
+
"fidibo_acc": null,
|
32 |
+
"newmiind_acc": null,
|
33 |
+
"taaghche_acc": null,
|
34 |
+
"motamem_acc": null,
|
35 |
+
"varzesh3_acc": null,
|
36 |
+
"mehrnews_acc": null,
|
37 |
+
"tasnim_acc": null,
|
38 |
+
"magerta_acc": null,
|
39 |
+
"radiokodak_book_acc": null,
|
40 |
+
"vipofilm_acc": null,
|
41 |
+
"wikishia_acc": null,
|
42 |
+
"voolak_acc": null,
|
43 |
+
"farsroid_acc": null,
|
44 |
+
"parsiday_acc": null,
|
45 |
+
"soft98_acc": null,
|
46 |
+
"ninisite_discussion_acc": null
|
47 |
+
}
|
leaderboard/template_jsons/persian_csr_full.json
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"acc": null,
|
3 |
+
"acc_strict": null,
|
4 |
+
// "اما_acc": null,
|
5 |
+
// "از طرفی_acc": null,
|
6 |
+
// "چون_acc": null,
|
7 |
+
// "چراکه_acc": null,
|
8 |
+
// "به عنوان مثال_acc": null,
|
9 |
+
// "همچنین_acc": null,
|
10 |
+
// "سپس_acc": null,
|
11 |
+
// "تا_acc": null,
|
12 |
+
// "زیرا_acc": null,
|
13 |
+
// "بنابراین_acc": null,
|
14 |
+
// "چرا که_acc": null,
|
15 |
+
// "نیز_acc": null,
|
16 |
+
// "اگرچه_acc": null,
|
17 |
+
// "مثلا_acc": null,
|
18 |
+
// "همچنین_acc": null,
|
19 |
+
// "همانطور که_acc": null,
|
20 |
+
// "برای مثال_acc": null,
|
21 |
+
// "با این وجود_acc": null,
|
22 |
+
// "یعنی_acc": null,
|
23 |
+
// "البته_acc": null,
|
24 |
+
// "کاش_acc": null,
|
25 |
+
// "ولی_acc": null,
|
26 |
+
// "لذا_acc": null,
|
27 |
+
// "علاوه بر آن_acc": null,
|
28 |
+
// "علاوه بر این_acc": null,
|
29 |
+
// "به همین دلیل_acc": null,
|
30 |
+
// "در حالی که_acc": null,
|
31 |
+
// "با این حال_acc": null,
|
32 |
+
// "حتی_acc": null,
|
33 |
+
// "از سوی دیگر_acc": null,
|
34 |
+
// "از طرف دیگر_acc": null,
|
35 |
+
// "که_acc": null,
|
36 |
+
// "به طور مثال_acc": null,
|
37 |
+
// "بلکه_acc": null,
|
38 |
+
// "برای نمونه_acc": null,
|
39 |
+
// "به همین علت_acc": null,
|
40 |
+
// "درحالیکه_acc": null,
|
41 |
+
// "ناگهان_acc": null,
|
42 |
+
// "به عنوان نمونه_acc": null,
|
43 |
+
// "در حالیکه_acc": null,
|
44 |
+
// "با این همه_acc": null,
|
45 |
+
"donyaeeqtesad_acc": null,
|
46 |
+
"isna_acc": null,
|
47 |
+
"ninisite_article_acc": null,
|
48 |
+
"virgool_4_acc": null,
|
49 |
+
"khabaronline_acc": null,
|
50 |
+
"digiato_acc": null,
|
51 |
+
"doctoreto_acc": null,
|
52 |
+
"sarzamindownload_acc": null,
|
53 |
+
"hamgardi_acc": null,
|
54 |
+
"bigbangpage_acc": null,
|
55 |
+
"wiki_ahlolbait_acc": null,
|
56 |
+
"virgool_3_acc": null,
|
57 |
+
"virgool_2_acc": null,
|
58 |
+
"virgool_1_acc": null,
|
59 |
+
"hamshahrionline_acc": null,
|
60 |
+
"tabnak_acc": null,
|
61 |
+
"alibaba_acc": null,
|
62 |
+
"digikala_mag_acc": null,
|
63 |
+
"yjc_acc": null,
|
64 |
+
"beytoote_acc": null,
|
65 |
+
"asriran_acc": null,
|
66 |
+
"ecoiran_acc": null,
|
67 |
+
"hawzah_acc": null,
|
68 |
+
"zoomit_acc": null,
|
69 |
+
"wikipedia_acc": null,
|
70 |
+
"namnak_acc": null,
|
71 |
+
"khodro45_acc": null,
|
72 |
+
"fidibo_acc": null,
|
73 |
+
"newmiind_acc": null,
|
74 |
+
"taaghche_acc": null,
|
75 |
+
"motamem_acc": null,
|
76 |
+
"varzesh3_acc": null,
|
77 |
+
"mehrnews_acc": null,
|
78 |
+
"tasnim_acc": null,
|
79 |
+
"magerta_acc": null,
|
80 |
+
"radiokodak_book_acc": null,
|
81 |
+
"vipofilm_acc": null,
|
82 |
+
"wikishia_acc": null,
|
83 |
+
"voolak_acc": null,
|
84 |
+
"farsroid_acc": null,
|
85 |
+
"parsiday_acc": null,
|
86 |
+
"soft98_acc": null,
|
87 |
+
"ninisite_discussion_acc": null,
|
88 |
+
// "اما_acc_strict": null,
|
89 |
+
// "از طرفی_acc_strict": null,
|
90 |
+
// "چون_acc_strict": null,
|
91 |
+
// "چراکه_acc_strict": null,
|
92 |
+
// "به عنوان مثال_acc_strict": null,
|
93 |
+
// "همچنین_acc_strict": null,
|
94 |
+
// "سپس_acc_strict": null,
|
95 |
+
// "تا_acc_strict": null,
|
96 |
+
// "زیرا_acc_strict": null,
|
97 |
+
// "بنابراین_acc_strict": null,
|
98 |
+
// "چرا که_acc_strict": null,
|
99 |
+
// "نیز_acc_strict": null,
|
100 |
+
// "اگرچه_acc_strict": null,
|
101 |
+
// "مثلا_acc_strict": null,
|
102 |
+
// "همچنین_acc_strict": null,
|
103 |
+
// "همانطور که_acc_strict": null,
|
104 |
+
// "برای مثال_acc_strict": null,
|
105 |
+
// "با این وجود_acc_strict": null,
|
106 |
+
// "حتی_acc_strict": null,
|
107 |
+
// "از سوی دیگر_acc_strict": null,
|
108 |
+
// "از طرف دیگر_acc_strict": null,
|
109 |
+
// "که_acc_strict": null,
|
110 |
+
// "به طور مثال_acc_strict": null,
|
111 |
+
// "بلکه_acc_strict": null,
|
112 |
+
// "برای نمونه_acc_strict": null,
|
113 |
+
// "به همین علت_acc_strict": null,
|
114 |
+
// "درحالیکه_acc_strict": null,
|
115 |
+
// "ناگهان_acc_strict": null,
|
116 |
+
// "با این همه_acc_strict": null
|
117 |
+
}
|
leaderboard/template_jsons/persian_nlg.json
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"question-generation_PersianQA": {
|
3 |
+
"question-generation_PersianQA_rougeL_precision": null,
|
4 |
+
"question-generation_PersianQA_rougeL_recall": null,
|
5 |
+
"question-generation_PersianQA_rougeL_f1_score": null
|
6 |
+
},
|
7 |
+
"translation-en2fa_en2fa": {
|
8 |
+
"translation-en2fa_en2fa_bleu": null,
|
9 |
+
"translation-en2fa_en2fa_epoque_bleu": null,
|
10 |
+
"translation-en2fa_en2fa_mizan_bleu": null,
|
11 |
+
"translation-en2fa_en2fa_quran_bleu": null,
|
12 |
+
"translation-en2fa_en2fa_sahife_bleu": null,
|
13 |
+
"translation-en2fa_en2fa_nahj_bleu": null,
|
14 |
+
"translation-en2fa_en2fa_tep_bleu": null
|
15 |
+
},
|
16 |
+
"summarization_SamSUM-fa": {
|
17 |
+
"summarization_SamSUM-fa_rougeL_precision": null,
|
18 |
+
"summarization_SamSUM-fa_rougeL_recall": null,
|
19 |
+
"summarization_SamSUM-fa_rougeL_f1_score": null
|
20 |
+
},
|
21 |
+
"translation-fa2en_fa2en": {
|
22 |
+
"translation-fa2en_fa2en_bleu": null,
|
23 |
+
"translation-fa2en_fa2en_tep_bleu": null,
|
24 |
+
"translation-fa2en_fa2en_mizan_bleu": null,
|
25 |
+
"translation-fa2en_fa2en_quran_bleu": null,
|
26 |
+
"translation-fa2en_fa2en_epoque_bleu": null,
|
27 |
+
"translation-fa2en_fa2en_nahj_bleu": null,
|
28 |
+
"translation-fa2en_fa2en_sahife_bleu": null
|
29 |
+
},
|
30 |
+
"translation-ar2fa_ar2fa": {
|
31 |
+
"translation-ar2fa_ar2fa_bleu": null,
|
32 |
+
"translation-ar2fa_ar2fa_sahife_bleu": null,
|
33 |
+
"translation-ar2fa_ar2fa_nahj_bleu": null,
|
34 |
+
"translation-ar2fa_ar2fa_quran_bleu": null
|
35 |
+
},
|
36 |
+
"summarization_PnSummary": {
|
37 |
+
"summarization_PnSummary_rougeL_precision": null,
|
38 |
+
"summarization_PnSummary_rougeL_recall": null,
|
39 |
+
"summarization_PnSummary_rougeL_f1_score": null
|
40 |
+
},
|
41 |
+
"translation-fa2ar_fa2ar": {
|
42 |
+
"translation-fa2ar_fa2ar_bleu": null,
|
43 |
+
"translation-fa2ar_fa2ar_nahj_bleu": null,
|
44 |
+
"translation-fa2ar_fa2ar_sahife_bleu": null,
|
45 |
+
"translation-fa2ar_fa2ar_quran_bleu": null
|
46 |
+
},
|
47 |
+
"nlg_score": null
|
48 |
+
}
|
leaderboard/template_jsons/persian_nlu.json
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"sentiment-analysis_deepsentipers": {
|
3 |
+
"sentiment-analysis_deepsentipers_acc_modified": null,
|
4 |
+
"sentiment-analysis_deepsentipers_precision_modified": null,
|
5 |
+
"sentiment-analysis_deepsentipers_recall_modified": null,
|
6 |
+
"sentiment-analysis_deepsentipers_fscore_modified": null,
|
7 |
+
"sentiment-analysis_deepsentipers_acc": null,
|
8 |
+
"sentiment-analysis_deepsentipers_precision": null,
|
9 |
+
"sentiment-analysis_deepsentipers_recall": null,
|
10 |
+
"sentiment-analysis_deepsentipers_fscore": null,
|
11 |
+
"sentiment-analysis_deepsentipers_valid_output_ratio": null
|
12 |
+
},
|
13 |
+
"sts_SynPerSTS": {
|
14 |
+
"sts_SynPerSTS_corrcoef_modified": null,
|
15 |
+
"sts_SynPerSTS_corrcoef": null,
|
16 |
+
"sts_SynPerSTS_valid_output_ratio": null
|
17 |
+
},
|
18 |
+
"ner_arman": {
|
19 |
+
"ner_arman_f1_mean": null,
|
20 |
+
"ner_arman_precision_mean": null,
|
21 |
+
"ner_arman_recall_mean": null
|
22 |
+
},
|
23 |
+
"keyword-extraction_SynKeywords": {
|
24 |
+
"keyword-extraction_SynKeywords_f1_mean": null,
|
25 |
+
"keyword-extraction_SynKeywords_precision_mean": null,
|
26 |
+
"keyword-extraction_SynKeywords_recall_mean": null
|
27 |
+
},
|
28 |
+
"tone-classification_SynTone": {
|
29 |
+
"tone-classification_SynTone_acc_modified": null,
|
30 |
+
"tone-classification_SynTone_precision_modified": null,
|
31 |
+
"tone-classification_SynTone_recall_modified": null,
|
32 |
+
"tone-classification_SynTone_fscore_modified": null,
|
33 |
+
"tone-classification_SynTone_acc": null,
|
34 |
+
"tone-classification_SynTone_precision": null,
|
35 |
+
"tone-classification_SynTone_recall": null,
|
36 |
+
"tone-classification_SynTone_fscore": null,
|
37 |
+
"tone-classification_SynTone_valid_output_ratio": null
|
38 |
+
},
|
39 |
+
"sts_FarSICK": {
|
40 |
+
"sts_FarSICK_corrcoef_modified": null,
|
41 |
+
"sts_FarSICK_corrcoef": null,
|
42 |
+
"sts_FarSICK_valid_output_ratio": null
|
43 |
+
},
|
44 |
+
"paraphrase-detection_FarsiParaphraseDetection": {
|
45 |
+
"paraphrase-detection_FarsiParaphraseDetection_acc_modified": null,
|
46 |
+
"paraphrase-detection_FarsiParaphraseDetection_precision_modified": null,
|
47 |
+
"paraphrase-detection_FarsiParaphraseDetection_recall_modified": null,
|
48 |
+
"paraphrase-detection_FarsiParaphraseDetection_fscore_modified": null,
|
49 |
+
"paraphrase-detection_FarsiParaphraseDetection_acc": null,
|
50 |
+
"paraphrase-detection_FarsiParaphraseDetection_precision": null,
|
51 |
+
"paraphrase-detection_FarsiParaphraseDetection_recall": null,
|
52 |
+
"paraphrase-detection_FarsiParaphraseDetection_fscore": null,
|
53 |
+
"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio": null
|
54 |
+
},
|
55 |
+
"nli_farstail": {
|
56 |
+
"nli_farstail_acc_modified": null,
|
57 |
+
"nli_farstail_precision_modified": null,
|
58 |
+
"nli_farstail_recall_modified": null,
|
59 |
+
"nli_farstail_fscore_modified": null,
|
60 |
+
"nli_farstail_acc": null,
|
61 |
+
"nli_farstail_precision": null,
|
62 |
+
"nli_farstail_recall": null,
|
63 |
+
"nli_farstail_fscore": null,
|
64 |
+
"nli_farstail_valid_output_ratio": null
|
65 |
+
},
|
66 |
+
"paraphrase-detection_parsinlu": {
|
67 |
+
"paraphrase-detection_parsinlu_acc_modified": null,
|
68 |
+
"paraphrase-detection_parsinlu_precision_modified": null,
|
69 |
+
"paraphrase-detection_parsinlu_recall_modified": null,
|
70 |
+
"paraphrase-detection_parsinlu_fscore_modified": null,
|
71 |
+
"paraphrase-detection_parsinlu_acc": null,
|
72 |
+
"paraphrase-detection_parsinlu_precision": null,
|
73 |
+
"paraphrase-detection_parsinlu_recall": null,
|
74 |
+
"paraphrase-detection_parsinlu_fscore": null,
|
75 |
+
"paraphrase-detection_parsinlu_valid_output_ratio": null
|
76 |
+
},
|
77 |
+
"extractive-qa_PQuAD": {
|
78 |
+
"extractive-qa_PQuAD_exact_match": null,
|
79 |
+
"extractive-qa_PQuAD_f1": null
|
80 |
+
},
|
81 |
+
"topic-classification_sid": {
|
82 |
+
"topic-classification_sid_acc_modified": null,
|
83 |
+
"topic-classification_sid_precision_modified": null,
|
84 |
+
"topic-classification_sid_recall_modified": null,
|
85 |
+
"topic-classification_sid_fscore_modified": null,
|
86 |
+
"topic-classification_sid_acc": null,
|
87 |
+
"topic-classification_sid_precision": null,
|
88 |
+
"topic-classification_sid_recall": null,
|
89 |
+
"topic-classification_sid_fscore": null,
|
90 |
+
"topic-classification_sid_valid_output_ratio": null
|
91 |
+
},
|
92 |
+
"nlu_score": null
|
93 |
+
}
|
leaderboard/template_jsons/piqa.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"piqa": null}
|
submission.py
ADDED
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import csv
|
3 |
+
import os
|
4 |
+
from datetime import datetime
|
5 |
+
from pathlib import Path
|
6 |
+
import pandas as pd
|
7 |
+
import io # To handle string as a file-like object for pandas
|
8 |
+
import logging
|
9 |
+
|
10 |
+
from huggingface_hub import HfApi, HfFolder, hf_hub_download
|
11 |
+
from huggingface_hub.utils import HfHubHTTPError, EntryNotFoundError # For specific error handling
|
12 |
+
|
13 |
+
# --- Logging Setup ---
|
14 |
+
# (Add this if not already present, or integrate with a central logging config)
|
15 |
+
logging.basicConfig(
|
16 |
+
level=logging.INFO,
|
17 |
+
format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
|
18 |
+
)
|
19 |
+
logger = logging.getLogger(__name__)
|
20 |
+
|
21 |
+
# --- Hugging Face Hub Configuration ---
|
22 |
+
# IMPORTANT: Replace with your actual repository details
|
23 |
+
TARGET_REPO_ID = "MCINext/submitted-models" # e.g., "MehranS/MIZAN_submissions" # Suggested change for consistency
|
24 |
+
TARGET_REPO_TYPE = "dataset" # Recommended type for storing data
|
25 |
+
FILENAME_IN_REPO = "model_submissions.csv" # The name of the CSV file within the Hub repository
|
26 |
+
|
27 |
+
# Define the header for your CSV file. This must be consistent.
|
28 |
+
CSV_HEADER = [
|
29 |
+
'timestamp', 'model_name', 'base_model', 'revision',
|
30 |
+
'precision', 'weight_type', 'model_type', 'status', 'submission_type'
|
31 |
+
]
|
32 |
+
|
33 |
+
def get_hf_token() -> str | None:
|
34 |
+
"""Retrieves the Hugging Face token from environment variables or HfFolder."""
|
35 |
+
token = os.environ.get("HF_TOKEN") # Standard for Spaces secrets
|
36 |
+
if not token:
|
37 |
+
try:
|
38 |
+
token = HfFolder.get_token() # Fallback for local development after CLI login
|
39 |
+
except Exception:
|
40 |
+
logger.warning("Hugging Face token not found in HfFolder and HF_TOKEN env var is not set.")
|
41 |
+
token = None
|
42 |
+
return token
|
43 |
+
|
44 |
+
def add_new_eval_hf_to_hub(model_name_hf_id: str, revision_hf: str) -> gr.Markdown:
|
45 |
+
"""
|
46 |
+
Handles new Hugging Face model evaluation requests by saving them to a CSV file
|
47 |
+
in a specified Hugging Face Hub repository.
|
48 |
+
"""
|
49 |
+
if not model_name_hf_id:
|
50 |
+
return gr.Markdown("⚠️ **Model Name (Hugging Face ID) is required.** Please enter a valid Hugging Face model ID.")
|
51 |
+
|
52 |
+
token = get_hf_token()
|
53 |
+
if not token:
|
54 |
+
error_html = "<div style='color:red; padding:10px; border:1px solid red; border-radius:5px;'>⚠️ **Configuration Error:** Hugging Face Token not found. Cannot save submission to the Hub. Please ensure the `HF_TOKEN` Space secret is set with write permissions to the target repository.</div>"
|
55 |
+
return gr.Markdown(error_html)
|
56 |
+
|
57 |
+
api = HfApi(token=token)
|
58 |
+
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
59 |
+
|
60 |
+
submission_data = {
|
61 |
+
'timestamp': timestamp,
|
62 |
+
'model_name': model_name_hf_id.strip(),
|
63 |
+
'base_model': 'N/A', # As per the simple form's design
|
64 |
+
'revision': revision_hf.strip() if revision_hf else 'main',
|
65 |
+
'precision': 'To be fetched/determined',
|
66 |
+
'weight_type': 'To be fetched/determined',
|
67 |
+
'model_type': 'To be fetched/determined',
|
68 |
+
'status': 'pending_hub_submission', # New status indicating it's for Hub processing
|
69 |
+
'submission_type': 'huggingface_simple_form_to_hub' # New type
|
70 |
+
}
|
71 |
+
|
72 |
+
try:
|
73 |
+
# 1. Attempt to download the existing CSV from the Hub
|
74 |
+
try:
|
75 |
+
local_download_path = hf_hub_download(
|
76 |
+
repo_id=TARGET_REPO_ID,
|
77 |
+
filename=FILENAME_IN_REPO,
|
78 |
+
repo_type=TARGET_REPO_TYPE,
|
79 |
+
token=token,
|
80 |
+
# force_download=True, # Consider this if caching becomes an issue
|
81 |
+
)
|
82 |
+
# Read the downloaded CSV into a pandas DataFrame
|
83 |
+
df = pd.read_csv(local_download_path)
|
84 |
+
# Ensure columns match CSV_HEADER, add missing ones with NaN if necessary
|
85 |
+
for col in CSV_HEADER:
|
86 |
+
if col not in df.columns:
|
87 |
+
df[col] = pd.NA
|
88 |
+
df = df[CSV_HEADER] # Reorder/select columns to match header
|
89 |
+
file_exists_on_hub = True
|
90 |
+
logger.info(f"Successfully downloaded existing '{FILENAME_IN_REPO}' from '{TARGET_REPO_ID}'.")
|
91 |
+
except EntryNotFoundError:
|
92 |
+
logger.info(f"'{FILENAME_IN_REPO}' not found in '{TARGET_REPO_ID}'. A new file will be created.")
|
93 |
+
df = pd.DataFrame(columns=CSV_HEADER) # Create an empty DataFrame with the correct headers
|
94 |
+
file_exists_on_hub = False
|
95 |
+
except HfHubHTTPError as e:
|
96 |
+
logger.error(f"HTTP error downloading '{FILENAME_IN_REPO}' from '{TARGET_REPO_ID}': {e.status_code} - {e.hf_raise}")
|
97 |
+
error_html = f"<div style='color:red; padding:10px; border:1px solid red; border-radius:5px;'>⚠️ **Hub Error:** Could not access the repository '{TARGET_REPO_ID}'. (HTTP {e.status_code}). Please check token permissions and repository ID.</div>"
|
98 |
+
return gr.Markdown(error_html)
|
99 |
+
|
100 |
+
# 2. Append the new submission data
|
101 |
+
new_row_df = pd.DataFrame([submission_data])
|
102 |
+
df = pd.concat([df, new_row_df], ignore_index=True)
|
103 |
+
|
104 |
+
# 3. Convert the DataFrame back to CSV in-memory
|
105 |
+
csv_buffer = io.StringIO()
|
106 |
+
df.to_csv(csv_buffer, index=False, header=True) # Always include header
|
107 |
+
csv_content_bytes = csv_buffer.getvalue().encode('utf-8')
|
108 |
+
csv_buffer.close()
|
109 |
+
|
110 |
+
# 4. Upload the updated CSV content to the Hub
|
111 |
+
commit_message = f"Add submission: {submission_data['model_name']} (rev: {submission_data['revision']})"
|
112 |
+
if not file_exists_on_hub:
|
113 |
+
commit_message = f"Create '{FILENAME_IN_REPO}' and add first submission: {submission_data['model_name']}"
|
114 |
+
|
115 |
+
api.upload_file(
|
116 |
+
path_or_fileobj=csv_content_bytes, # Pass the bytes directly
|
117 |
+
path_in_repo=FILENAME_IN_REPO,
|
118 |
+
repo_id=TARGET_REPO_ID,
|
119 |
+
repo_type=TARGET_REPO_TYPE,
|
120 |
+
commit_message=commit_message
|
121 |
+
)
|
122 |
+
|
123 |
+
logger.info(f"Submission for '{submission_data['model_name']}' pushed to '{TARGET_REPO_ID}/{FILENAME_IN_REPO}'.")
|
124 |
+
success_message_html = f"""
|
125 |
+
<div style='color:green; padding:10px; border:1px solid green; border-radius:5px;'>
|
126 |
+
✅ Request for Hugging Face model '<strong>{submission_data['model_name']}</strong>' (Revision: {submission_data['revision']}) has been successfully submitted to the central repository on Hugging Face Hub!
|
127 |
+
</div>
|
128 |
+
"""
|
129 |
+
return gr.Markdown(success_message_html)
|
130 |
+
|
131 |
+
except Exception as e:
|
132 |
+
logger.error(f"An unexpected error occurred while processing submission to Hugging Face Hub: {e}", exc_info=True)
|
133 |
+
error_html = f"<div style='color:red; padding:10px; border:1px solid red; border-radius:5px;'>⚠️ **System Error:** An unexpected error occurred: {e}. Please try again or contact support.</div>"
|
134 |
+
return gr.Markdown(error_html)
|
135 |
+
|
136 |
+
|
137 |
+
def render_submit():
|
138 |
+
# Text for Introduction and Option 1 (Hugging Face Form)
|
139 |
+
intro_and_option1_guidance = """
|
140 |
+
# Request Model Evaluation for MIZAN
|
141 |
+
|
142 |
+
We're excited to evaluate new models for **MIZAN: A Persian LLM Leaderboard**!
|
143 |
+
Please choose the submission path that best fits how your model can be accessed for evaluation.
|
144 |
+
|
145 |
+
---
|
146 |
+
|
147 |
+
### **Option 1: Your model is publicly available on Hugging Face Hub**
|
148 |
+
|
149 |
+
If your model and its tokenizer can be loaded directly using their Hugging Face identifier (e.g., `username/model_name`), you can use the simplified form below to submit its key identifiers. Your submission will be added to our central tracking repository on the Hugging Face Hub. Our team will attempt to gather other necessary details from the Hub.
|
150 |
+
"""
|
151 |
+
|
152 |
+
# Text for Option 2 (Email Submission)
|
153 |
+
option2_email_guidance = """
|
154 |
+
---
|
155 |
+
|
156 |
+
### **Option 2: Your model is NOT on Hugging Face, is private, or requires custom setup**
|
157 |
+
|
158 |
+
If your model is hosted elsewhere, is private, requires specific access permissions, needs custom inference code, or involves a more complex setup for evaluation, please initiate your submission request via email.
|
159 |
+
|
160 |
+
**To submit via email, please send comprehensive details to:**
|
161 |
+
📧 **[email protected]**
|
162 |
+
|
163 |
+
Our team will review your email and work with you to facilitate the evaluation process.
|
164 |
+
"""
|
165 |
+
|
166 |
+
with gr.Blocks() as submit_tab_interface:
|
167 |
+
gr.Markdown(intro_and_option1_guidance)
|
168 |
+
|
169 |
+
with gr.Group():
|
170 |
+
gr.Markdown("### ✨ Form for Option 1: Submit a Hugging Face Model to the Hub")
|
171 |
+
|
172 |
+
model_name_textbox_hf = gr.Textbox(
|
173 |
+
label="Model Name (Hugging Face ID: e.g., username/model_name)",
|
174 |
+
placeholder="bigscience/bloom-560m"
|
175 |
+
)
|
176 |
+
revision_name_textbox_hf = gr.Textbox(
|
177 |
+
label="Revision/Commit (Optional, defaults to 'main' if left empty)",
|
178 |
+
placeholder="e.g., main, or a specific commit hash"
|
179 |
+
)
|
180 |
+
|
181 |
+
request_hf_button = gr.Button("🚀 Request Evaluation & Submit to Hub", variant="primary")
|
182 |
+
|
183 |
+
submission_result_hf_form = gr.Markdown()
|
184 |
+
|
185 |
+
request_hf_button.click(
|
186 |
+
fn=add_new_eval_hf_to_hub, # Use the new function
|
187 |
+
inputs=[
|
188 |
+
model_name_textbox_hf,
|
189 |
+
revision_name_textbox_hf,
|
190 |
+
],
|
191 |
+
outputs=submission_result_hf_form,
|
192 |
+
)
|
193 |
+
|
194 |
+
gr.Markdown(option2_email_guidance)
|
195 |
+
|
196 |
+
return submit_tab_interface
|
197 |
+
|
198 |
+
# For direct testing of this file:
|
199 |
+
if __name__ == '__main__':
|
200 |
+
# You would need to set TARGET_REPO_ID and have a valid HF_TOKEN env var or be logged in.
|
201 |
+
# Example: os.environ["HF_TOKEN"] = "your_hf_write_token"
|
202 |
+
# TARGET_REPO_ID = "your-user/your-test-dataset" # Make sure this repo exists
|
203 |
+
|
204 |
+
if not TARGET_REPO_ID.startswith("YOUR_"): # Basic check to prevent running with placeholder
|
205 |
+
print(f"Testing submission to Hub. Target repo: {TARGET_REPO_ID}")
|
206 |
+
test_interface = render_submit()
|
207 |
+
test_interface.launch(debug=True)
|
208 |
+
else:
|
209 |
+
print("Please update TARGET_REPO_ID in submission.py before running this test.")
|