mohalisad commited on
Commit
42e0116
·
verified ·
1 Parent(s): f1d84ae

Update leaderboard_data.jsonl

Browse files
Files changed (1) hide show
  1. leaderboard_data.jsonl +37 -39
leaderboard_data.jsonl CHANGED
@@ -1,40 +1,38 @@
1
- {"Model": "gpt-4o-2024-08-06", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 90.82, "GSM8K": 73.1, "DC-Homograph": 87.04, "MC-Homograph": 95.62, "PiQA": 95.1, "Proverb-Quiz": 96.76, "VerbEval": 85.89, "Winogrande": 86.18, "Arc-Challenge": 95.09, "Arc-Easy": 97.22, "Feqh": 46.86, "Hallucination (Truthfulness)": 74.64, "P-Hellaswag": 85.53, "Law": 47.67, "AUT Multiple Choice": 67.7, "Parsi Literature": 45.95, "BoolQA": 94.1, "Reading Comprehension": 10.0, "PartExpert": 57.36, "MMLU Pro": 47.1, "Iranian Social Norms": 76.89, "Model sha": "unknown", "Hub License": "unknown"}
2
- {"Model": "gpt-4.1-2025-04-14", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 90.82, "GSM8K": 25.3, "DC-Homograph": 89.81, "MC-Homograph": 95.39, "PiQA": 95.9, "Proverb-Quiz": 95.14, "VerbEval": 83.04, "Winogrande": 85.92, "Arc-Challenge": 95.3, "Arc-Easy": 96.68, "Feqh": 52.0, "Hallucination (Truthfulness)": 77.43, "P-Hellaswag": 85.67, "Law": 53.67, "AUT Multiple Choice": 66.6, "Parsi Literature": 45.82, "BoolQA": 94.7, "Reading Comprehension": 3.6, "PartExpert": 59.92, "MMLU Pro": 50.5, "Iranian Social Norms": 77.56, "Model sha": "unknown", "Hub License": "unknown"}
3
- {"Model": "google__gemini-2.0-flash-001", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 87.76, "GSM8K": 53.7, "DC-Homograph": 79.63, "MC-Homograph": 91.71, "PiQA": 90.59, "Proverb-Quiz": 95.14, "VerbEval": 85.15, "Winogrande": 78.74, "Arc-Challenge": 91.35, "Arc-Easy": 97.22, "Feqh": 53.14, "Hallucination (Truthfulness)": 68.87, "P-Hellaswag": 82.95, "Law": 45.67, "AUT Multiple Choice": 60.9, "Parsi Literature": 44.02, "BoolQA": 91.3, "Reading Comprehension": 23.9, "PartExpert": 59.5, "MMLU Pro": 47.8, "Iranian Social Norms": 77.68, "Model sha": "unknown", "Hub License": "unknown"}
4
- {"Model": "deepseek-v3-03-24", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 85.71, "GSM8K": 53.1, "DC-Homograph": 83.33, "MC-Homograph": 94.24, "PiQA": 91.39, "Proverb-Quiz": 84.86, "VerbEval": 81.11, "Winogrande": 76.71, "Arc-Challenge": 92.31, "Arc-Easy": 96.58, "Feqh": 42.29, "Hallucination (Truthfulness)": 55.54, "P-Hellaswag": 85.3, "Law": 46.0, "AUT Multiple Choice": 65.6, "Parsi Literature": 44.66, "BoolQA": 95.3, "Reading Comprehension": 20.7, "PartExpert": 58.46, "MMLU Pro": 53.4, "Iranian Social Norms": 71.71, "Model sha": "unknown", "Hub License": "unknown"}
5
- {"Model": "gpt-4.1-mini-2025-04-14", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 79.34, "GSM8K": 60.3, "DC-Homograph": 66.67, "MC-Homograph": 94.24, "PiQA": 92.69, "Proverb-Quiz": 82.97, "VerbEval": 77.99, "Winogrande": 80.07, "Arc-Challenge": 91.88, "Arc-Easy": 96.15, "Feqh": 37.71, "Hallucination (Truthfulness)": 66.55, "P-Hellaswag": 84.57, "Law": 44.33, "AUT Multiple Choice": 53.5, "Parsi Literature": 41.18, "BoolQA": 93.7, "Reading Comprehension": 7.7, "PartExpert": 54.37, "MMLU Pro": 47.8, "Iranian Social Norms": 73.35, "Model sha": "unknown", "Hub License": "unknown"}
6
- {"Model": "gpt-4o-mini-2024-07-18", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 79.08, "GSM8K": 60.9, "DC-Homograph": 68.52, "MC-Homograph": 90.09, "PiQA": 90.89, "Proverb-Quiz": 84.05, "VerbEval": 74.23, "Winogrande": 75.73, "Arc-Challenge": 86.43, "Arc-Easy": 94.01, "Feqh": 41.71, "Hallucination (Truthfulness)": 82.04, "P-Hellaswag": 83.84, "Law": 34.0, "AUT Multiple Choice": 54.8, "Parsi Literature": 40.93, "BoolQA": 93.3, "Reading Comprehension": 20.5, "PartExpert": 42.54, "MMLU Pro": 34.8, "Iranian Social Norms": 71.59, "Model sha": "unknown", "Hub License": "unknown"}
7
- {"Model": "google__gemini-2.0-flash-lite-001", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 84.18, "GSM8K": 39.7, "DC-Homograph": 60.19, "MC-Homograph": 87.79, "PiQA": 85.29, "Proverb-Quiz": 91.35, "VerbEval": 81.39, "Winogrande": 75.64, "Arc-Challenge": 89.64, "Arc-Easy": 93.48, "Feqh": 41.71, "Hallucination (Truthfulness)": 67.32, "P-Hellaswag": 83.54, "Law": 43.0, "AUT Multiple Choice": 58.5, "Parsi Literature": 43.89, "BoolQA": 92.6, "Reading Comprehension": 26.7, "PartExpert": 54.15, "MMLU Pro": 41.2, "Iranian Social Norms": 70.49, "Model sha": "unknown", "Hub License": "unknown"}
8
- {"Model": "Qwen2.5-32B-Instruct", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2.5-32B-Instruct", "GeneralKnowledge": 61.73, "GSM8K": 50.1, "DC-Homograph": 67.59, "MC-Homograph": 91.47, "PiQA": 83.98, "Proverb-Quiz": 63.24, "VerbEval": 54.58, "Winogrande": 80.07, "Arc-Challenge": 85.15, "Arc-Easy": 91.87, "Feqh": 38.86, "Hallucination (Truthfulness)": 59.22, "P-Hellaswag": 82.07, "Law": 42.33, "AUT Multiple Choice": 50.4, "Parsi Literature": 40.41, "BoolQA": 93.4, "Reading Comprehension": 7.4, "PartExpert": 46.78, "MMLU Pro": 37.4, "Iranian Social Norms": 70.0, "Model sha": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", "Hub License": "apache-2.0"}
9
- {"Model": "gemma-3-27b-it", "#Params (B)": 27.43, "Precision": "BF16", "model_name_for_query": "google/gemma-3-27b-it", "GeneralKnowledge": 73.72, "GSM8K": 28.3, "DC-Homograph": 63.89, "MC-Homograph": 92.4, "PiQA": 87.29, "Proverb-Quiz": 78.92, "VerbEval": 66.02, "Winogrande": 78.12, "Arc-Challenge": 88.35, "Arc-Easy": 94.22, "Feqh": 24.57, "Hallucination (Truthfulness)": 60.15, "P-Hellaswag": 83.39, "Law": 36.33, "AUT Multiple Choice": 55.2, "Parsi Literature": 40.93, "BoolQA": 91.4, "Reading Comprehension": 1.2, "PartExpert": 49.32, "MMLU Pro": 36.6, "Iranian Social Norms": 70.49, "Model sha": "005ad3404e59d6023443cb575daa05336842228a", "Hub License": "gemma"}
10
- {"Model": "Qwen3-32B", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-32B", "GeneralKnowledge": 67.6, "GSM8K": 37.9, "DC-Homograph": 51.85, "MC-Homograph": 89.63, "PiQA": 87.69, "Proverb-Quiz": 64.59, "VerbEval": 56.35, "Winogrande": 71.48, "Arc-Challenge": 91.13, "Arc-Easy": 94.22, "Feqh": 29.71, "Hallucination (Truthfulness)": 47.5, "P-Hellaswag": 83.47, "Law": 37.0, "AUT Multiple Choice": 48.3, "Parsi Literature": 39.12, "BoolQA": 91.1, "Reading Comprehension": 22.0, "PartExpert": 50.06, "MMLU Pro": 42.8, "Iranian Social Norms": 73.48, "Model sha": "d47b0d4ae4b48fde975756bf360a63a9cca8d470", "Hub License": "apache-2.0"}
11
- {"Model": "QwQ-32B-Preview", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/QwQ-32B-Preview", "GeneralKnowledge": 63.27, "GSM8K": 34.7, "DC-Homograph": 61.11, "MC-Homograph": 88.25, "PiQA": 81.28, "Proverb-Quiz": 58.11, "VerbEval": 51.97, "Winogrande": 75.64, "Arc-Challenge": 85.58, "Arc-Easy": 91.44, "Feqh": 41.14, "Hallucination (Truthfulness)": 38.84, "P-Hellaswag": 84.13, "Law": 43.0, "AUT Multiple Choice": 50.6, "Parsi Literature": 39.77, "BoolQA": 88.5, "Reading Comprehension": 23.6, "PartExpert": 47.39, "MMLU Pro": 37.3, "Iranian Social Norms": 72.26, "Model sha": "91906fe41a48b6a89ce2970abfd1269eefee170e", "Hub License": "apache-2.0"}
12
- {"Model": "gemma-3-12b-it", "#Params (B)": 12.18, "Precision": "BF16", "model_name_for_query": "google/gemma-3-12b-it", "GeneralKnowledge": 68.37, "GSM8K": 20.2, "DC-Homograph": 67.59, "MC-Homograph": 91.24, "PiQA": 87.19, "Proverb-Quiz": 72.97, "VerbEval": 63.39, "Winogrande": 73.96, "Arc-Challenge": 83.33, "Arc-Easy": 93.26, "Feqh": 25.14, "Hallucination (Truthfulness)": 46.1, "P-Hellaswag": 83.17, "Law": 36.33, "AUT Multiple Choice": 49.0, "Parsi Literature": 40.03, "BoolQA": 87.6, "Reading Comprehension": 4.5, "PartExpert": 44.12, "MMLU Pro": 32.6, "Iranian Social Norms": 75.55, "Model sha": "96b6f1eccf38110c56df3a15bffe176da04bfd80", "Hub License": "gemma"}
13
- {"Model": "gemma-2-27b-it", "#Params (B)": 27.22, "Precision": "BF16", "model_name_for_query": "google/gemma-2-27b-it", "GeneralKnowledge": 68.11, "GSM8K": 26.7, "DC-Homograph": 60.19, "MC-Homograph": 91.24, "PiQA": 89.69, "Proverb-Quiz": 73.51, "VerbEval": 61.16, "Winogrande": 76.44, "Arc-Challenge": 86.75, "Arc-Easy": 94.22, "Feqh": 24.0, "Hallucination (Truthfulness)": 13.05, "P-Hellaswag": 83.69, "Law": 34.67, "AUT Multiple Choice": 50.8, "Parsi Literature": 35.91, "BoolQA": 89.8, "Reading Comprehension": 0.1, "PartExpert": 46.6, "MMLU Pro": 36.9, "Iranian Social Norms": 77.38, "Model sha": "aaf20e6b9f4c0fcf043f6fb2a2068419086d77b0", "Hub License": "gemma"}
14
- {"Model": "aya-expanse-32b", "#Params (B)": 32.29, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-expanse-32b", "GeneralKnowledge": 73.72, "GSM8K": 17.5, "DC-Homograph": 62.96, "MC-Homograph": 87.56, "PiQA": 91.19, "Proverb-Quiz": 77.03, "VerbEval": 61.95, "Winogrande": 70.5, "Arc-Challenge": 85.15, "Arc-Easy": 93.37, "Feqh": 37.14, "Hallucination (Truthfulness)": 44.84, "P-Hellaswag": 81.7, "Law": 38.67, "AUT Multiple Choice": 54.7, "Parsi Literature": 34.75, "BoolQA": 89.7, "Reading Comprehension": 24.9, "PartExpert": 44.29, "MMLU Pro": 32.1, "Iranian Social Norms": 74.94, "Model sha": "94bda1dcb97d260f732d230b832c7c685ae91e23", "Hub License": "cc-by-nc-4.0"}
15
- {"Model": "QwQ-32B", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/QwQ-32B", "GeneralKnowledge": 60.71, "GSM8K": 29.3, "DC-Homograph": 58.33, "MC-Homograph": 88.25, "PiQA": 81.68, "Proverb-Quiz": 59.19, "VerbEval": 52.31, "Winogrande": 73.07, "Arc-Challenge": 84.94, "Arc-Easy": 90.8, "Feqh": 41.71, "Hallucination (Truthfulness)": 48.93, "P-Hellaswag": 82.22, "Law": 38.0, "AUT Multiple Choice": 49.3, "Parsi Literature": 37.71, "BoolQA": 88.5, "Reading Comprehension": 17.8, "PartExpert": 46.75, "MMLU Pro": 39.0, "Iranian Social Norms": 70.73, "Model sha": "976055f8c83f394f35dbd3ab09a285a984907bd0", "Hub License": "apache-2.0"}
16
- {"Model": "gpt-4.1-nano-2025-04-14", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 68.11, "GSM8K": 58.4, "DC-Homograph": 49.07, "MC-Homograph": 78.11, "PiQA": 84.58, "Proverb-Quiz": 67.84, "VerbEval": 66.21, "Winogrande": 60.32, "Arc-Challenge": 81.41, "Arc-Easy": 91.55, "Feqh": 32.0, "Hallucination (Truthfulness)": 51.24, "P-Hellaswag": 77.96, "Law": 32.67, "AUT Multiple Choice": 46.1, "Parsi Literature": 36.42, "BoolQA": 81.7, "Reading Comprehension": 6.3, "PartExpert": 42.49, "MMLU Pro": 29.9, "Iranian Social Norms": 74.76, "Model sha": "unknown", "Hub License": "unknown"}
17
- {"Model": "Qwen3-14B", "#Params (B)": 14.76, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-14B", "GeneralKnowledge": 56.38, "GSM8K": 31.1, "DC-Homograph": 55.56, "MC-Homograph": 87.56, "PiQA": 77.18, "Proverb-Quiz": 53.78, "VerbEval": 54.36, "Winogrande": 67.32, "Arc-Challenge": 84.29, "Arc-Easy": 91.02, "Feqh": 29.14, "Hallucination (Truthfulness)": 44.54, "P-Hellaswag": 80.97, "Law": 34.67, "AUT Multiple Choice": 44.8, "Parsi Literature": 35.39, "BoolQA": 87.6, "Reading Comprehension": 24.4, "PartExpert": 43.22, "MMLU Pro": 35.5, "Iranian Social Norms": 74.51, "Model sha": "8268fe3026cb304910457689366670e803a6fd56", "Hub License": "apache-2.0"}
18
- {"Model": "gemma-2-9b-it", "#Params (B)": 9.24, "Precision": "BF16", "model_name_for_query": "google/gemma-2-9b-it", "GeneralKnowledge": 64.03, "GSM8K": 17.4, "DC-Homograph": 59.26, "MC-Homograph": 90.55, "PiQA": 87.09, "Proverb-Quiz": 69.19, "VerbEval": 58.25, "Winogrande": 72.01, "Arc-Challenge": 84.29, "Arc-Easy": 93.16, "Feqh": 29.71, "Hallucination (Truthfulness)": 50.58, "P-Hellaswag": 80.82, "Law": 33.67, "AUT Multiple Choice": 48.5, "Parsi Literature": 38.1, "BoolQA": 89.7, "Reading Comprehension": 0.1, "PartExpert": 43.03, "MMLU Pro": 33.2, "Iranian Social Norms": 73.84, "Model sha": "11c9b309abf73637e4b6f9a3fa1e92e615547819", "Hub License": "gemma"}
19
- {"Model": "Qwen3-30B-A3B", "#Params (B)": 30.53, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-30B-A3B", "GeneralKnowledge": 65.05, "GSM8K": 28.8, "DC-Homograph": 57.41, "MC-Homograph": 86.41, "PiQA": 72.47, "Proverb-Quiz": 50.81, "VerbEval": 48.09, "Winogrande": 65.28, "Arc-Challenge": 87.39, "Arc-Easy": 93.58, "Feqh": 23.43, "Hallucination (Truthfulness)": 3.54, "P-Hellaswag": 83.1, "Law": 35.33, "AUT Multiple Choice": 48.0, "Parsi Literature": 36.55, "BoolQA": 86.2, "Reading Comprehension": 26.4, "PartExpert": 41.13, "MMLU Pro": 36.3, "Iranian Social Norms": 44.21, "Model sha": "ae659febe817e4b3ebd7355f47792725801204c9", "Hub License": "apache-2.0"}
20
- {"Model": "aya-23-35B", "#Params (B)": 34.98, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-23-35B", "GeneralKnowledge": 63.27, "GSM8K": 10.0, "DC-Homograph": 55.56, "MC-Homograph": 83.64, "PiQA": 89.49, "Proverb-Quiz": 67.03, "VerbEval": 47.32, "Winogrande": 65.81, "Arc-Challenge": 77.56, "Arc-Easy": 90.16, "Feqh": 30.29, "Hallucination (Truthfulness)": 11.72, "P-Hellaswag": 79.87, "Law": 32.0, "AUT Multiple Choice": 48.7, "Parsi Literature": 31.92, "BoolQA": 86.2, "Reading Comprehension": 23.7, "PartExpert": 37.44, "MMLU Pro": 24.1, "Iranian Social Norms": 65.0, "Model sha": "5e72bd5ad83e5e1612ee7f56a0c1a439a7cfb887", "Hub License": "cc-by-nc-4.0"}
21
- {"Model": "Qwen3-8B", "#Params (B)": 8.19, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-8B", "GeneralKnowledge": 49.23, "GSM8K": 25.7, "DC-Homograph": 50.93, "MC-Homograph": 82.95, "PiQA": 75.98, "Proverb-Quiz": 51.89, "VerbEval": 47.93, "Winogrande": 61.91, "Arc-Challenge": 80.24, "Arc-Easy": 87.38, "Feqh": 28.0, "Hallucination (Truthfulness)": 38.46, "P-Hellaswag": 80.38, "Law": 29.67, "AUT Multiple Choice": 46.0, "Parsi Literature": 33.2, "BoolQA": 86.4, "Reading Comprehension": 25.0, "PartExpert": 38.31, "MMLU Pro": 31.1, "Iranian Social Norms": 63.41, "Model sha": "9c925d64d72725edaf899c6cb9c377fd0709d9c5", "Hub License": "apache-2.0"}
22
- {"Model": "aya-expanse-8b", "#Params (B)": 8.02, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-expanse-8b", "GeneralKnowledge": 58.67, "GSM8K": 9.8, "DC-Homograph": 51.85, "MC-Homograph": 80.65, "PiQA": 80.18, "Proverb-Quiz": 60.0, "VerbEval": 48.06, "Winogrande": 64.04, "Arc-Challenge": 71.47, "Arc-Easy": 84.6, "Feqh": 29.71, "Hallucination (Truthfulness)": 23.52, "P-Hellaswag": 76.49, "Law": 32.33, "AUT Multiple Choice": 45.8, "Parsi Literature": 34.49, "BoolQA": 82.3, "Reading Comprehension": 20.1, "PartExpert": 35.56, "MMLU Pro": 21.9, "Iranian Social Norms": 71.71, "Model sha": "0ad43ec1e309e1351faa4b1d22713c065e37359a", "Hub License": "cc-by-nc-4.0"}
23
- {"Model": "Hormoz-8B", "#Params (B)": 8.02, "Precision": "F32", "model_name_for_query": "mann-e/Hormoz-8B", "GeneralKnowledge": 58.42, "GSM8K": 10.0, "DC-Homograph": 50.93, "MC-Homograph": 80.65, "PiQA": 80.68, "Proverb-Quiz": 60.27, "VerbEval": 47.29, "Winogrande": 64.39, "Arc-Challenge": 70.41, "Arc-Easy": 84.28, "Feqh": 28.57, "Hallucination (Truthfulness)": 23.66, "P-Hellaswag": 76.05, "Law": 30.33, "AUT Multiple Choice": 46.7, "Parsi Literature": 33.08, "BoolQA": 79.8, "Reading Comprehension": 19.6, "PartExpert": 35.68, "MMLU Pro": 21.5, "Iranian Social Norms": 70.3, "Model sha": "c91bcecb236c90523f70db7efa23dd794e9b4cff", "Hub License": "mit"}
24
- {"Model": "Llama-3.1-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "meta-llama/Llama-3.1-8B-Instruct", "GeneralKnowledge": 52.55, "GSM8K": 12.0, "DC-Homograph": 43.52, "MC-Homograph": 79.03, "PiQA": 70.07, "Proverb-Quiz": 47.57, "VerbEval": 42.91, "Winogrande": 54.21, "Arc-Challenge": 68.91, "Arc-Easy": 80.11, "Feqh": 29.71, "Hallucination (Truthfulness)": 6.76, "P-Hellaswag": 79.79, "Law": 32.67, "AUT Multiple Choice": 44.9, "Parsi Literature": 32.3, "BoolQA": 82.7, "Reading Comprehension": 24.5, "PartExpert": 37.62, "MMLU Pro": 25.7, "Iranian Social Norms": 70.98, "Model sha": "0e9e39f249a16976918f6564b8830bc894c89659", "Hub License": "llama3.1"}
25
- {"Model": "Qwen2.5-7B-Instruct", "#Params (B)": 7.61, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2.5-7B-Instruct", "GeneralKnowledge": 51.02, "GSM8K": 18.0, "DC-Homograph": 52.78, "MC-Homograph": 79.26, "PiQA": 71.07, "Proverb-Quiz": 47.84, "VerbEval": 44.44, "Winogrande": 61.91, "Arc-Challenge": 72.33, "Arc-Easy": 81.5, "Feqh": 36.57, "Hallucination (Truthfulness)": 34.89, "P-Hellaswag": 74.8, "Law": 32.33, "AUT Multiple Choice": 42.6, "Parsi Literature": 31.27, "BoolQA": 82.5, "Reading Comprehension": 17.6, "PartExpert": 37.24, "MMLU Pro": 26.7, "Iranian Social Norms": 64.51, "Model sha": "a09a35458c702b33eeacc393d103063234e8bc28", "Hub License": "apache-2.0"}
26
- {"Model": "aya-23-8B", "#Params (B)": 8.02, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-23-8B", "GeneralKnowledge": 52.3, "GSM8K": 6.1, "DC-Homograph": 52.78, "MC-Homograph": 76.27, "PiQA": 80.78, "Proverb-Quiz": 44.32, "VerbEval": 39.3, "Winogrande": 57.13, "Arc-Challenge": 63.68, "Arc-Easy": 81.39, "Feqh": 29.14, "Hallucination (Truthfulness)": 0.6, "P-Hellaswag": 75.83, "Law": 28.33, "AUT Multiple Choice": 42.9, "Parsi Literature": 31.27, "BoolQA": 72.3, "Reading Comprehension": 23.4, "PartExpert": 33.33, "MMLU Pro": 19.9, "Iranian Social Norms": 70.73, "Model sha": "2a1a63b24af8f591616fdf58936ee576d63ca835", "Hub License": "cc-by-nc-4.0"}
27
- {"Model": "Qwen2-7B-Instruct", "#Params (B)": 7.61, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2-7B-Instruct", "GeneralKnowledge": 52.04, "GSM8K": 14.5, "DC-Homograph": 54.63, "MC-Homograph": 72.81, "PiQA": 70.97, "Proverb-Quiz": 50.54, "VerbEval": 40.62, "Winogrande": 60.94, "Arc-Challenge": 69.12, "Arc-Easy": 80.75, "Feqh": 28.0, "Hallucination (Truthfulness)": 25.93, "P-Hellaswag": 76.71, "Law": 28.33, "AUT Multiple Choice": 40.4, "Parsi Literature": 31.4, "BoolQA": 79.0, "Reading Comprehension": 10.9, "PartExpert": 36.31, "MMLU Pro": 23.8, "Iranian Social Norms": 62.2, "Model sha": "f2826a00ceef68f0f2b946d945ecc0477ce4450c", "Hub License": "apache-2.0"}
28
- {"Model": "Meta-Llama-3-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "meta-llama/Meta-Llama-3-8B-Instruct", "GeneralKnowledge": 52.04, "GSM8K": 10.4, "DC-Homograph": 41.67, "MC-Homograph": 81.11, "PiQA": 70.97, "Proverb-Quiz": 42.97, "VerbEval": 38.93, "Winogrande": 56.95, "Arc-Challenge": 66.77, "Arc-Easy": 76.47, "Feqh": 33.71, "Hallucination (Truthfulness)": 33.23, "P-Hellaswag": 76.71, "Law": 32.0, "AUT Multiple Choice": 45.0, "Parsi Literature": 29.99, "BoolQA": 82.5, "Reading Comprehension": 19.4, "PartExpert": 36.3, "MMLU Pro": 26.0, "Iranian Social Norms": 70.06, "Model sha": "5f0b02c75b57c5855da9ae460ce51323ea669d8a", "Hub License": "llama3"}
29
- {"Model": "gemma-3-4b-it", "#Params (B)": 4.3, "Precision": "BF16", "model_name_for_query": "google/gemma-3-4b-it", "GeneralKnowledge": 45.92, "GSM8K": 9.6, "DC-Homograph": 42.59, "MC-Homograph": 72.58, "PiQA": 72.77, "Proverb-Quiz": 53.78, "VerbEval": 45.3, "Winogrande": 55.09, "Arc-Challenge": 63.46, "Arc-Easy": 79.57, "Feqh": 21.14, "Hallucination (Truthfulness)": 46.04, "P-Hellaswag": 73.84, "Law": 27.67, "AUT Multiple Choice": 42.5, "Parsi Literature": 30.24, "BoolQA": 78.6, "Reading Comprehension": 5.5, "PartExpert": 34.7, "MMLU Pro": 22.8, "Iranian Social Norms": 65.55, "Model sha": "093f9f388b31de276ce2de164bdc2081324b9767", "Hub License": "gemma"}
30
- {"Model": "Qwen3-4B", "#Params (B)": 4.02, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-4B", "GeneralKnowledge": 43.88, "GSM8K": 20.1, "DC-Homograph": 38.89, "MC-Homograph": 76.27, "PiQA": 66.07, "Proverb-Quiz": 45.41, "VerbEval": 41.23, "Winogrande": 54.56, "Arc-Challenge": 73.61, "Arc-Easy": 83.42, "Feqh": 30.29, "Hallucination (Truthfulness)": 25.29, "P-Hellaswag": 78.03, "Law": 30.33, "AUT Multiple Choice": 40.6, "Parsi Literature": 31.79, "BoolQA": 81.9, "Reading Comprehension": 21.3, "PartExpert": 37.28, "MMLU Pro": 28.9, "Iranian Social Norms": 68.72, "Model sha": "531c80e289d6cff3a7cd8c0db8110231d23a6f7a", "Hub License": "apache-2.0"}
31
- {"Model": "Hermes-3-Llama-3.1-8B", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "NousResearch/Hermes-3-Llama-3.1-8B", "GeneralKnowledge": 49.49, "GSM8K": 10.2, "DC-Homograph": 44.44, "MC-Homograph": 79.72, "PiQA": 70.37, "Proverb-Quiz": 47.84, "VerbEval": 48.94, "Winogrande": 55.18, "Arc-Challenge": 65.28, "Arc-Easy": 78.07, "Feqh": 30.29, "Hallucination (Truthfulness)": 45.2, "P-Hellaswag": 73.99, "Law": 31.67, "AUT Multiple Choice": 42.1, "Parsi Literature": 30.63, "BoolQA": 83.5, "Reading Comprehension": 13.5, "PartExpert": 35.61, "MMLU Pro": 24.1, "Iranian Social Norms": 54.88, "Model sha": "896ea440e5a9e6070e3d8a2774daf2b481ab425b", "Hub License": "llama3"}
32
- {"Model": "Dorna2-Llama3.1-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "PartAI/Dorna2-Llama3.1-8B-Instruct", "GeneralKnowledge": 48.72, "GSM8K": 11.9, "DC-Homograph": 44.44, "MC-Homograph": 72.81, "PiQA": 69.97, "Proverb-Quiz": 42.97, "VerbEval": 42.06, "Winogrande": 54.47, "Arc-Challenge": 67.63, "Arc-Easy": 78.72, "Feqh": 33.71, "Hallucination (Truthfulness)": 33.91, "P-Hellaswag": 78.91, "Law": 29.67, "AUT Multiple Choice": 41.0, "Parsi Literature": 27.28, "BoolQA": 81.8, "Reading Comprehension": 21.9, "PartExpert": 35.65, "MMLU Pro": 22.7, "Iranian Social Norms": 49.82, "Model sha": "b78e4bd261100c96e511ed5090ca0ce0e1f4b340", "Hub License": "llama3.1"}
33
- {"Model": "Llama-3.1-8B", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "meta-llama/Llama-3.1-8B", "GeneralKnowledge": 49.23, "GSM8K": 10.8, "DC-Homograph": 46.3, "MC-Homograph": 72.12, "PiQA": 66.47, "Proverb-Quiz": 35.95, "VerbEval": 39.91, "Winogrande": 54.92, "Arc-Challenge": 63.35, "Arc-Easy": 75.08, "Feqh": 30.86, "Hallucination (Truthfulness)": 45.02, "P-Hellaswag": 76.34, "Law": 33.0, "AUT Multiple Choice": 42.6, "Parsi Literature": 27.41, "BoolQA": 71.6, "Reading Comprehension": 19.7, "PartExpert": 35.92, "MMLU Pro": 22.2, "Iranian Social Norms": 61.83, "Model sha": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", "Hub License": "llama3.1"}
34
- {"Model": "Meta-Llama-3-8B", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "meta-llama/Meta-Llama-3-8B", "GeneralKnowledge": 47.7, "GSM8K": 10.3, "DC-Homograph": 41.67, "MC-Homograph": 74.42, "PiQA": 64.16, "Proverb-Quiz": 37.3, "VerbEval": 39.46, "Winogrande": 55.36, "Arc-Challenge": 62.07, "Arc-Easy": 75.83, "Feqh": 27.43, "Hallucination (Truthfulness)": 37.0, "P-Hellaswag": 76.49, "Law": 35.67, "AUT Multiple Choice": 42.5, "Parsi Literature": 28.19, "BoolQA": 75.2, "Reading Comprehension": 19.5, "PartExpert": 35.1, "MMLU Pro": 22.8, "Iranian Social Norms": 54.02, "Model sha": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", "Hub License": "llama3"}
35
- {"Model": "Dorna-Llama3-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "PartAI/Dorna-Llama3-8B-Instruct", "GeneralKnowledge": 41.33, "GSM8K": 10.3, "DC-Homograph": 40.74, "MC-Homograph": 74.65, "PiQA": 66.17, "Proverb-Quiz": 35.41, "VerbEval": 34.74, "Winogrande": 56.16, "Arc-Challenge": 59.94, "Arc-Easy": 70.7, "Feqh": 29.14, "Hallucination (Truthfulness)": 31.49, "P-Hellaswag": 75.68, "Law": 25.33, "AUT Multiple Choice": 36.9, "Parsi Literature": 27.54, "BoolQA": 80.1, "Reading Comprehension": 21.8, "PartExpert": 34.49, "MMLU Pro": 22.0, "Iranian Social Norms": 69.39, "Model sha": "fb268bb51b950b4db5b7c82c1b73d9e803020eed", "Hub License": "llama3"}
36
- {"Model": "gemma-2-2b-it", "#Params (B)": 2.61, "Precision": "BF16", "model_name_for_query": "google/gemma-2-2b-it", "GeneralKnowledge": 32.91, "GSM8K": 6.4, "DC-Homograph": 47.22, "MC-Homograph": 74.65, "PiQA": 66.87, "Proverb-Quiz": 45.68, "VerbEval": 36.18, "Winogrande": 54.74, "Arc-Challenge": 57.91, "Arc-Easy": 70.48, "Feqh": 25.71, "Hallucination (Truthfulness)": 39.02, "P-Hellaswag": 69.88, "Law": 32.67, "AUT Multiple Choice": 36.9, "Parsi Literature": 30.76, "BoolQA": 72.4, "Reading Comprehension": 0.3, "PartExpert": 31.31, "MMLU Pro": 18.2, "Iranian Social Norms": 40.18, "Model sha": "299a8560bedf22ed1c72a8a11e7dce4a7f9f51f8", "Hub License": "gemma"}
37
  {"Model": "PersianMind-v1.0", "#Params (B)": 0.0, "Precision": "F32", "model_name_for_query": "universitytehran/PersianMind-v1.0", "GeneralKnowledge": 30.61, "GSM8K": 2.3, "DC-Homograph": 41.67, "MC-Homograph": 65.9, "PiQA": 59.76, "Proverb-Quiz": 34.32, "VerbEval": 26.26, "Winogrande": 52.17, "Arc-Challenge": 54.59, "Arc-Easy": 69.73, "Feqh": 26.29, "Hallucination (Truthfulness)": 2.37, "P-Hellaswag": 63.78, "Law": 27.33, "AUT Multiple Choice": 36.1, "Parsi Literature": 27.8, "BoolQA": 66.3, "Reading Comprehension": 0.0, "PartExpert": 29.75, "MMLU Pro": 14.5, "Iranian Social Norms": 48.41, "Model sha": "af603eeb074138e2a613fbc95d89f018afbd3041", "Hub License": "cc-by-nc-sa-4.0"}
38
- {"Model": "gemma-3-1b-it", "#Params (B)": 0.99, "Precision": "BF16", "model_name_for_query": "google/gemma-3-1b-it", "GeneralKnowledge": 26.02, "GSM8K": 4.3, "DC-Homograph": 49.07, "MC-Homograph": 51.15, "PiQA": 57.66, "Proverb-Quiz": 28.92, "VerbEval": 27.67, "Winogrande": 50.58, "Arc-Challenge": 36.43, "Arc-Easy": 46.1, "Feqh": 28.0, "Hallucination (Truthfulness)": 54.94, "P-Hellaswag": 63.92, "Law": 20.33, "AUT Multiple Choice": 29.1, "Parsi Literature": 24.97, "BoolQA": 63.9, "Reading Comprehension": 2.1, "PartExpert": 27.22, "MMLU Pro": 13.7, "Iranian Social Norms": 51.22, "Model sha": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", "Hub License": "gemma"}
39
- {"Model": "Llama-3.2-1B-Instruct", "#Params (B)": 1.23, "Precision": "BF16", "model_name_for_query": "meta-llama/Llama-3.2-1B-Instruct", "GeneralKnowledge": 29.59, "GSM8K": 4.1, "DC-Homograph": 50.93, "MC-Homograph": 52.53, "PiQA": 54.05, "Proverb-Quiz": 28.65, "VerbEval": 26.11, "Winogrande": 49.07, "Arc-Challenge": 37.5, "Arc-Easy": 47.38, "Feqh": 31.43, "Hallucination (Truthfulness)": 3.34, "P-Hellaswag": 55.4, "Law": 24.0, "AUT Multiple Choice": 29.9, "Parsi Literature": 27.03, "BoolQA": 64.1, "Reading Comprehension": 7.2, "PartExpert": 28.59, "MMLU Pro": 15.7, "Iranian Social Norms": 37.44, "Model sha": "9213176726f574b556790deb65791e0c5aa438b6", "Hub License": "llama3.2"}
40
- {"Model": "Maral-7B-alpha-1", "#Params (B)": 7.24, "Precision": "BF16", "model_name_for_query": "MaralGPT/Maral-7B-alpha-1", "GeneralKnowledge": 31.63, "GSM8K": 6.1, "DC-Homograph": 43.52, "MC-Homograph": 47.47, "PiQA": 51.95, "Proverb-Quiz": 22.16, "VerbEval": 28.96, "Winogrande": 49.42, "Arc-Challenge": 37.29, "Arc-Easy": 43.1, "Feqh": 26.29, "Hallucination (Truthfulness)": 0.0, "P-Hellaswag": 60.18, "Law": 26.33, "AUT Multiple Choice": 28.4, "Parsi Literature": 26.77, "BoolQA": 62.7, "Reading Comprehension": 10.8, "PartExpert": 27.1, "MMLU Pro": 14.8, "Iranian Social Norms": 24.63, "Model sha": "2ab5ca2a0d1a4454a78b4ca911e595bb9da2fe2f", "Hub License": "mit"}
 
 
 
 
 
 
 
 
1
+ {"Model": "aya-23-35B", "#Params (B)": 34.98, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-23-35B", "GeneralKnowledge": 63.27, "GSM8K": 10.0, "DC-Homograph": 55.56, "MC-Homograph": 83.64, "PiQA": 89.49, "Proverb-Quiz": 67.03, "VerbEval": 47.32, "Winogrande": 65.81, "Arc-Challenge": 77.56, "Arc-Easy": 90.16, "Feqh": 30.29, "Hallucination (Truthfulness)": 11.72, "P-Hellaswag": 79.87, "Law": 32.0, "AUT Multiple Choice": 48.7, "Parsi Literature": 31.92, "BoolQA": 86.2, "Reading Comprehension": 62.82, "PartExpert": 37.44, "MMLU Pro": 24.1, "Iranian Social Norms": 65.0, "Model sha": "5e72bd5ad83e5e1612ee7f56a0c1a439a7cfb887", "Hub License": "cc-by-nc-4.0"}
2
+ {"Model": "aya-23-8B", "#Params (B)": 8.02, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-23-8B", "GeneralKnowledge": 52.3, "GSM8K": 6.1, "DC-Homograph": 52.78, "MC-Homograph": 76.27, "PiQA": 80.78, "Proverb-Quiz": 44.32, "VerbEval": 39.3, "Winogrande": 57.13, "Arc-Challenge": 63.68, "Arc-Easy": 81.39, "Feqh": 29.14, "Hallucination (Truthfulness)": 0.6, "P-Hellaswag": 75.83, "Law": 28.33, "AUT Multiple Choice": 42.9, "Parsi Literature": 31.27, "BoolQA": 72.3, "Reading Comprehension": 60.31, "PartExpert": 33.33, "MMLU Pro": 19.9, "Iranian Social Norms": 70.73, "Model sha": "2a1a63b24af8f591616fdf58936ee576d63ca835", "Hub License": "cc-by-nc-4.0"}
3
+ {"Model": "aya-expanse-32b", "#Params (B)": 32.29, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-expanse-32b", "GeneralKnowledge": 73.72, "GSM8K": 17.5, "DC-Homograph": 62.96, "MC-Homograph": 87.56, "PiQA": 91.19, "Proverb-Quiz": 77.03, "VerbEval": 61.95, "Winogrande": 70.5, "Arc-Challenge": 85.15, "Arc-Easy": 93.37, "Feqh": 37.14, "Hallucination (Truthfulness)": 44.84, "P-Hellaswag": 81.7, "Law": 38.67, "AUT Multiple Choice": 54.7, "Parsi Literature": 34.75, "BoolQA": 89.7, "Reading Comprehension": 67.25, "PartExpert": 44.29, "MMLU Pro": 32.1, "Iranian Social Norms": 74.94, "Model sha": "94bda1dcb97d260f732d230b832c7c685ae91e23", "Hub License": "cc-by-nc-4.0"}
4
+ {"Model": "aya-expanse-8b", "#Params (B)": 8.02, "Precision": "F16", "model_name_for_query": "CohereLabs/aya-expanse-8b", "GeneralKnowledge": 58.67, "GSM8K": 9.8, "DC-Homograph": 51.85, "MC-Homograph": 80.65, "PiQA": 80.18, "Proverb-Quiz": 60.0, "VerbEval": 48.06, "Winogrande": 64.04, "Arc-Challenge": 71.47, "Arc-Easy": 84.6, "Feqh": 29.71, "Hallucination (Truthfulness)": 23.52, "P-Hellaswag": 76.49, "Law": 32.33, "AUT Multiple Choice": 45.8, "Parsi Literature": 34.49, "BoolQA": 82.3, "Reading Comprehension": 61.98, "PartExpert": 35.56, "MMLU Pro": 21.9, "Iranian Social Norms": 71.71, "Model sha": "0ad43ec1e309e1351faa4b1d22713c065e37359a", "Hub License": "cc-by-nc-4.0"}
5
+ {"Model": "deepseek-v3-03-24", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 85.71, "GSM8K": 53.1, "DC-Homograph": 83.33, "MC-Homograph": 94.24, "PiQA": 91.39, "Proverb-Quiz": 84.86, "VerbEval": 81.11, "Winogrande": 76.71, "Arc-Challenge": 92.31, "Arc-Easy": 96.58, "Feqh": 42.29, "Hallucination (Truthfulness)": 55.54, "P-Hellaswag": 85.3, "Law": 46.0, "AUT Multiple Choice": 65.6, "Parsi Literature": 44.66, "BoolQA": 95.3, "Reading Comprehension": 63.79, "PartExpert": 58.46, "MMLU Pro": 53.4, "Iranian Social Norms": 71.71, "Model sha": "unknown", "Hub License": "unknown"}
6
+ {"Model": "Dorna-Llama3-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "PartAI/Dorna-Llama3-8B-Instruct", "GeneralKnowledge": 41.33, "GSM8K": 10.3, "DC-Homograph": 40.74, "MC-Homograph": 74.65, "PiQA": 66.17, "Proverb-Quiz": 35.41, "VerbEval": 34.74, "Winogrande": 56.16, "Arc-Challenge": 59.94, "Arc-Easy": 70.7, "Feqh": 29.14, "Hallucination (Truthfulness)": 31.49, "P-Hellaswag": 75.68, "Law": 25.33, "AUT Multiple Choice": 36.9, "Parsi Literature": 27.54, "BoolQA": 80.1, "Reading Comprehension": 64.85, "PartExpert": 34.49, "MMLU Pro": 22.0, "Iranian Social Norms": 69.39, "Model sha": "fb268bb51b950b4db5b7c82c1b73d9e803020eed", "Hub License": "llama3"}
7
+ {"Model": "Dorna2-Llama3.1-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "PartAI/Dorna2-Llama3.1-8B-Instruct", "GeneralKnowledge": 48.72, "GSM8K": 11.9, "DC-Homograph": 44.44, "MC-Homograph": 72.81, "PiQA": 69.97, "Proverb-Quiz": 42.97, "VerbEval": 42.06, "Winogrande": 54.47, "Arc-Challenge": 67.63, "Arc-Easy": 78.72, "Feqh": 33.71, "Hallucination (Truthfulness)": 33.91, "P-Hellaswag": 78.91, "Law": 29.67, "AUT Multiple Choice": 41.0, "Parsi Literature": 27.28, "BoolQA": 81.8, "Reading Comprehension": 56.84, "PartExpert": 35.65, "MMLU Pro": 22.7, "Iranian Social Norms": 49.82, "Model sha": "b78e4bd261100c96e511ed5090ca0ce0e1f4b340", "Hub License": "llama3.1"}
8
+ {"Model": "gemma-2-27b-it", "#Params (B)": 27.22, "Precision": "BF16", "model_name_for_query": "google/gemma-2-27b-it", "GeneralKnowledge": 68.11, "GSM8K": 26.7, "DC-Homograph": 60.19, "MC-Homograph": 91.24, "PiQA": 89.69, "Proverb-Quiz": 73.51, "VerbEval": 61.16, "Winogrande": 76.44, "Arc-Challenge": 86.75, "Arc-Easy": 94.22, "Feqh": 24.0, "Hallucination (Truthfulness)": 13.05, "P-Hellaswag": 83.69, "Law": 34.67, "AUT Multiple Choice": 50.8, "Parsi Literature": 35.91, "BoolQA": 89.8, "Reading Comprehension": 56.76, "PartExpert": 46.6, "MMLU Pro": 36.9, "Iranian Social Norms": 77.38, "Model sha": "aaf20e6b9f4c0fcf043f6fb2a2068419086d77b0", "Hub License": "gemma"}
9
+ {"Model": "gemma-2-2b-it", "#Params (B)": 2.61, "Precision": "BF16", "model_name_for_query": "google/gemma-2-2b-it", "GeneralKnowledge": 32.91, "GSM8K": 6.4, "DC-Homograph": 47.22, "MC-Homograph": 74.65, "PiQA": 66.87, "Proverb-Quiz": 45.68, "VerbEval": 36.18, "Winogrande": 54.74, "Arc-Challenge": 57.91, "Arc-Easy": 70.48, "Feqh": 25.71, "Hallucination (Truthfulness)": 39.02, "P-Hellaswag": 69.88, "Law": 32.67, "AUT Multiple Choice": 36.9, "Parsi Literature": 30.76, "BoolQA": 72.4, "Reading Comprehension": 41.79, "PartExpert": 31.31, "MMLU Pro": 18.2, "Iranian Social Norms": 40.18, "Model sha": "299a8560bedf22ed1c72a8a11e7dce4a7f9f51f8", "Hub License": "gemma"}
10
+ {"Model": "gemma-2-9b-it", "#Params (B)": 9.24, "Precision": "BF16", "model_name_for_query": "google/gemma-2-9b-it", "GeneralKnowledge": 64.03, "GSM8K": 17.4, "DC-Homograph": 59.26, "MC-Homograph": 90.55, "PiQA": 87.09, "Proverb-Quiz": 69.19, "VerbEval": 58.25, "Winogrande": 72.01, "Arc-Challenge": 84.29, "Arc-Easy": 93.16, "Feqh": 29.71, "Hallucination (Truthfulness)": 50.58, "P-Hellaswag": 80.82, "Law": 33.67, "AUT Multiple Choice": 48.5, "Parsi Literature": 38.1, "BoolQA": 89.7, "Reading Comprehension": 56.43, "PartExpert": 43.03, "MMLU Pro": 33.2, "Iranian Social Norms": 73.84, "Model sha": "11c9b309abf73637e4b6f9a3fa1e92e615547819", "Hub License": "gemma"}
11
+ {"Model": "gemma-3-12b-it", "#Params (B)": 12.18, "Precision": "BF16", "model_name_for_query": "google/gemma-3-12b-it", "GeneralKnowledge": 68.37, "GSM8K": 20.2, "DC-Homograph": 67.59, "MC-Homograph": 91.24, "PiQA": 87.19, "Proverb-Quiz": 72.97, "VerbEval": 63.39, "Winogrande": 73.96, "Arc-Challenge": 83.33, "Arc-Easy": 93.26, "Feqh": 25.14, "Hallucination (Truthfulness)": 46.1, "P-Hellaswag": 83.17, "Law": 36.33, "AUT Multiple Choice": 49.0, "Parsi Literature": 40.03, "BoolQA": 87.6, "Reading Comprehension": 55.26, "PartExpert": 44.12, "MMLU Pro": 32.6, "Iranian Social Norms": 75.55, "Model sha": "96b6f1eccf38110c56df3a15bffe176da04bfd80", "Hub License": "gemma"}
12
+ {"Model": "gemma-3-1b-it", "#Params (B)": 0.99, "Precision": "BF16", "model_name_for_query": "google/gemma-3-1b-it", "GeneralKnowledge": 26.02, "GSM8K": 4.3, "DC-Homograph": 49.07, "MC-Homograph": 51.15, "PiQA": 57.66, "Proverb-Quiz": 28.92, "VerbEval": 27.67, "Winogrande": 50.58, "Arc-Challenge": 36.43, "Arc-Easy": 46.1, "Feqh": 28.0, "Hallucination (Truthfulness)": 54.94, "P-Hellaswag": 63.92, "Law": 20.33, "AUT Multiple Choice": 29.1, "Parsi Literature": 24.97, "BoolQA": 63.9, "Reading Comprehension": 31.98, "PartExpert": 27.22, "MMLU Pro": 13.7, "Iranian Social Norms": 51.22, "Model sha": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", "Hub License": "gemma"}
13
+ {"Model": "gemma-3-27b-it", "#Params (B)": 27.43, "Precision": "BF16", "model_name_for_query": "google/gemma-3-27b-it", "GeneralKnowledge": 73.72, "GSM8K": 28.3, "DC-Homograph": 63.89, "MC-Homograph": 92.4, "PiQA": 87.29, "Proverb-Quiz": 78.92, "VerbEval": 66.02, "Winogrande": 78.12, "Arc-Challenge": 88.35, "Arc-Easy": 94.22, "Feqh": 24.57, "Hallucination (Truthfulness)": 60.15, "P-Hellaswag": 83.39, "Law": 36.33, "AUT Multiple Choice": 55.2, "Parsi Literature": 40.93, "BoolQA": 91.4, "Reading Comprehension": 58.01, "PartExpert": 49.32, "MMLU Pro": 36.6, "Iranian Social Norms": 70.49, "Model sha": "005ad3404e59d6023443cb575daa05336842228a", "Hub License": "gemma"}
14
+ {"Model": "gemma-3-4b-it", "#Params (B)": 4.3, "Precision": "BF16", "model_name_for_query": "google/gemma-3-4b-it", "GeneralKnowledge": 45.92, "GSM8K": 9.6, "DC-Homograph": 42.59, "MC-Homograph": 72.58, "PiQA": 72.77, "Proverb-Quiz": 53.78, "VerbEval": 45.3, "Winogrande": 55.09, "Arc-Challenge": 63.46, "Arc-Easy": 79.57, "Feqh": 21.14, "Hallucination (Truthfulness)": 46.04, "P-Hellaswag": 73.84, "Law": 27.67, "AUT Multiple Choice": 42.5, "Parsi Literature": 30.24, "BoolQA": 78.6, "Reading Comprehension": 47.28, "PartExpert": 34.7, "MMLU Pro": 22.8, "Iranian Social Norms": 65.55, "Model sha": "093f9f388b31de276ce2de164bdc2081324b9767", "Hub License": "gemma"}
15
+ {"Model": "google__gemini-2.0-flash-001", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 87.76, "GSM8K": 53.7, "DC-Homograph": 79.63, "MC-Homograph": 91.71, "PiQA": 90.59, "Proverb-Quiz": 95.14, "VerbEval": 85.15, "Winogrande": 78.74, "Arc-Challenge": 91.35, "Arc-Easy": 97.22, "Feqh": 53.14, "Hallucination (Truthfulness)": 68.87, "P-Hellaswag": 82.95, "Law": 45.67, "AUT Multiple Choice": 60.9, "Parsi Literature": 44.02, "BoolQA": 91.3, "Reading Comprehension": 67.92, "PartExpert": 59.5, "MMLU Pro": 47.8, "Iranian Social Norms": 77.68, "Model sha": "unknown", "Hub License": "unknown"}
16
+ {"Model": "google__gemini-2.0-flash-lite-001", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 84.18, "GSM8K": 39.7, "DC-Homograph": 60.19, "MC-Homograph": 87.79, "PiQA": 85.29, "Proverb-Quiz": 91.35, "VerbEval": 81.39, "Winogrande": 75.64, "Arc-Challenge": 89.64, "Arc-Easy": 93.48, "Feqh": 41.71, "Hallucination (Truthfulness)": 67.32, "P-Hellaswag": 83.54, "Law": 43.0, "AUT Multiple Choice": 58.5, "Parsi Literature": 43.89, "BoolQA": 92.6, "Reading Comprehension": 65.92, "PartExpert": 54.15, "MMLU Pro": 41.2, "Iranian Social Norms": 70.49, "Model sha": "unknown", "Hub License": "unknown"}
17
+ {"Model": "gpt-4.1-2025-04-14", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 90.82, "GSM8K": 25.3, "DC-Homograph": 89.81, "MC-Homograph": 95.39, "PiQA": 95.9, "Proverb-Quiz": 95.14, "VerbEval": 83.04, "Winogrande": 85.92, "Arc-Challenge": 95.3, "Arc-Easy": 96.68, "Feqh": 52.0, "Hallucination (Truthfulness)": 77.43, "P-Hellaswag": 85.67, "Law": 53.67, "AUT Multiple Choice": 66.6, "Parsi Literature": 45.82, "BoolQA": 94.7, "Reading Comprehension": 44.82, "PartExpert": 59.92, "MMLU Pro": 50.5, "Iranian Social Norms": 77.56, "Model sha": "unknown", "Hub License": "unknown"}
18
+ {"Model": "gpt-4.1-mini-2025-04-14", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 79.34, "GSM8K": 60.3, "DC-Homograph": 66.67, "MC-Homograph": 94.24, "PiQA": 92.69, "Proverb-Quiz": 82.97, "VerbEval": 77.99, "Winogrande": 80.07, "Arc-Challenge": 91.88, "Arc-Easy": 96.15, "Feqh": 37.71, "Hallucination (Truthfulness)": 66.55, "P-Hellaswag": 84.57, "Law": 44.33, "AUT Multiple Choice": 53.5, "Parsi Literature": 41.18, "BoolQA": 93.7, "Reading Comprehension": 51.85, "PartExpert": 54.37, "MMLU Pro": 47.8, "Iranian Social Norms": 73.35, "Model sha": "unknown", "Hub License": "unknown"}
19
+ {"Model": "gpt-4.1-nano-2025-04-14", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 68.11, "GSM8K": 58.4, "DC-Homograph": 49.07, "MC-Homograph": 78.11, "PiQA": 84.58, "Proverb-Quiz": 67.84, "VerbEval": 66.21, "Winogrande": 60.32, "Arc-Challenge": 81.41, "Arc-Easy": 91.55, "Feqh": 32.0, "Hallucination (Truthfulness)": 51.24, "P-Hellaswag": 77.96, "Law": 32.67, "AUT Multiple Choice": 46.1, "Parsi Literature": 36.42, "BoolQA": 81.7, "Reading Comprehension": 50.66, "PartExpert": 42.49, "MMLU Pro": 29.9, "Iranian Social Norms": 74.76, "Model sha": "unknown", "Hub License": "unknown"}
20
+ {"Model": "gpt-4o-2024-08-06", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 90.82, "GSM8K": 73.1, "DC-Homograph": 87.04, "MC-Homograph": 95.62, "PiQA": 95.1, "Proverb-Quiz": 96.76, "VerbEval": 85.89, "Winogrande": 86.18, "Arc-Challenge": 95.09, "Arc-Easy": 97.22, "Feqh": 46.86, "Hallucination (Truthfulness)": 74.64, "P-Hellaswag": 85.53, "Law": 47.67, "AUT Multiple Choice": 67.7, "Parsi Literature": 45.95, "BoolQA": 94.1, "Reading Comprehension": 55.34, "PartExpert": 57.36, "MMLU Pro": 47.1, "Iranian Social Norms": 76.89, "Model sha": "unknown", "Hub License": "unknown"}
21
+ {"Model": "gpt-4o-mini-2024-07-18", "#Params (B)": "unknown", "Precision": "unknown", "model_name_for_query": null, "GeneralKnowledge": 79.08, "GSM8K": 60.9, "DC-Homograph": 68.52, "MC-Homograph": 90.09, "PiQA": 90.89, "Proverb-Quiz": 84.05, "VerbEval": 74.23, "Winogrande": 75.73, "Arc-Challenge": 86.43, "Arc-Easy": 94.01, "Feqh": 41.71, "Hallucination (Truthfulness)": 82.04, "P-Hellaswag": 83.84, "Law": 34.0, "AUT Multiple Choice": 54.8, "Parsi Literature": 40.93, "BoolQA": 93.3, "Reading Comprehension": 63.29, "PartExpert": 42.54, "MMLU Pro": 34.8, "Iranian Social Norms": 71.59, "Model sha": "unknown", "Hub License": "unknown"}
22
+ {"Model": "Hermes-3-Llama-3.1-8B", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "NousResearch/Hermes-3-Llama-3.1-8B", "GeneralKnowledge": 49.49, "GSM8K": 10.2, "DC-Homograph": 44.44, "MC-Homograph": 79.72, "PiQA": 70.37, "Proverb-Quiz": 47.84, "VerbEval": 48.94, "Winogrande": 55.18, "Arc-Challenge": 65.28, "Arc-Easy": 78.07, "Feqh": 30.29, "Hallucination (Truthfulness)": 45.2, "P-Hellaswag": 73.99, "Law": 31.67, "AUT Multiple Choice": 42.1, "Parsi Literature": 30.63, "BoolQA": 83.5, "Reading Comprehension": 56.4, "PartExpert": 35.61, "MMLU Pro": 24.1, "Iranian Social Norms": 54.88, "Model sha": "896ea440e5a9e6070e3d8a2774daf2b481ab425b", "Hub License": "llama3"}
23
+ {"Model": "Hormoz-8B", "#Params (B)": 8.02, "Precision": "F32", "model_name_for_query": "mann-e/Hormoz-8B", "GeneralKnowledge": 58.42, "GSM8K": 10.0, "DC-Homograph": 50.93, "MC-Homograph": 80.65, "PiQA": 80.68, "Proverb-Quiz": 60.27, "VerbEval": 47.29, "Winogrande": 64.39, "Arc-Challenge": 70.41, "Arc-Easy": 84.28, "Feqh": 28.57, "Hallucination (Truthfulness)": 23.66, "P-Hellaswag": 76.05, "Law": 30.33, "AUT Multiple Choice": 46.7, "Parsi Literature": 33.08, "BoolQA": 79.8, "Reading Comprehension": 61.11, "PartExpert": 35.68, "MMLU Pro": 21.5, "Iranian Social Norms": 70.3, "Model sha": "c91bcecb236c90523f70db7efa23dd794e9b4cff", "Hub License": "mit"}
24
+ {"Model": "Llama-3.1-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "meta-llama/Llama-3.1-8B-Instruct", "GeneralKnowledge": 52.55, "GSM8K": 12.0, "DC-Homograph": 43.52, "MC-Homograph": 79.03, "PiQA": 70.07, "Proverb-Quiz": 47.57, "VerbEval": 42.91, "Winogrande": 54.21, "Arc-Challenge": 68.91, "Arc-Easy": 80.11, "Feqh": 29.71, "Hallucination (Truthfulness)": 6.76, "P-Hellaswag": 79.79, "Law": 32.67, "AUT Multiple Choice": 44.9, "Parsi Literature": 32.3, "BoolQA": 82.7, "Reading Comprehension": 62.45, "PartExpert": 37.62, "MMLU Pro": 25.7, "Iranian Social Norms": 70.98, "Model sha": "0e9e39f249a16976918f6564b8830bc894c89659", "Hub License": "llama3.1"}
25
+ {"Model": "Llama-3.2-1B-Instruct", "#Params (B)": 1.23, "Precision": "BF16", "model_name_for_query": "meta-llama/Llama-3.2-1B-Instruct", "GeneralKnowledge": 29.59, "GSM8K": 4.1, "DC-Homograph": 50.93, "MC-Homograph": 52.53, "PiQA": 54.05, "Proverb-Quiz": 28.65, "VerbEval": 26.11, "Winogrande": 49.07, "Arc-Challenge": 37.5, "Arc-Easy": 47.38, "Feqh": 31.43, "Hallucination (Truthfulness)": 3.34, "P-Hellaswag": 55.4, "Law": 24.0, "AUT Multiple Choice": 29.9, "Parsi Literature": 27.03, "BoolQA": 64.1, "Reading Comprehension": 38.0, "PartExpert": 28.59, "MMLU Pro": 15.7, "Iranian Social Norms": 37.44, "Model sha": "9213176726f574b556790deb65791e0c5aa438b6", "Hub License": "llama3.2"}
26
+ {"Model": "Maral-7B-alpha-1", "#Params (B)": 7.24, "Precision": "BF16", "model_name_for_query": "MaralGPT/Maral-7B-alpha-1", "GeneralKnowledge": 31.63, "GSM8K": 6.1, "DC-Homograph": 43.52, "MC-Homograph": 47.47, "PiQA": 51.95, "Proverb-Quiz": 22.16, "VerbEval": 28.96, "Winogrande": 49.42, "Arc-Challenge": 37.29, "Arc-Easy": 43.1, "Feqh": 26.29, "Hallucination (Truthfulness)": 0.0, "P-Hellaswag": 60.18, "Law": 26.33, "AUT Multiple Choice": 28.4, "Parsi Literature": 26.77, "BoolQA": 62.7, "Reading Comprehension": 42.04, "PartExpert": 27.1, "MMLU Pro": 14.8, "Iranian Social Norms": 24.63, "Model sha": "2ab5ca2a0d1a4454a78b4ca911e595bb9da2fe2f", "Hub License": "mit"}
27
+ {"Model": "Meta-Llama-3-8B-Instruct", "#Params (B)": 8.03, "Precision": "BF16", "model_name_for_query": "meta-llama/Meta-Llama-3-8B-Instruct", "GeneralKnowledge": 52.04, "GSM8K": 10.4, "DC-Homograph": 41.67, "MC-Homograph": 81.11, "PiQA": 70.97, "Proverb-Quiz": 42.97, "VerbEval": 38.93, "Winogrande": 56.95, "Arc-Challenge": 66.77, "Arc-Easy": 76.47, "Feqh": 33.71, "Hallucination (Truthfulness)": 33.23, "P-Hellaswag": 76.71, "Law": 32.0, "AUT Multiple Choice": 45.0, "Parsi Literature": 29.99, "BoolQA": 82.5, "Reading Comprehension": 54.79, "PartExpert": 36.3, "MMLU Pro": 26.0, "Iranian Social Norms": 70.06, "Model sha": "5f0b02c75b57c5855da9ae460ce51323ea669d8a", "Hub License": "llama3"}
 
 
 
 
 
 
 
 
 
28
  {"Model": "PersianMind-v1.0", "#Params (B)": 0.0, "Precision": "F32", "model_name_for_query": "universitytehran/PersianMind-v1.0", "GeneralKnowledge": 30.61, "GSM8K": 2.3, "DC-Homograph": 41.67, "MC-Homograph": 65.9, "PiQA": 59.76, "Proverb-Quiz": 34.32, "VerbEval": 26.26, "Winogrande": 52.17, "Arc-Challenge": 54.59, "Arc-Easy": 69.73, "Feqh": 26.29, "Hallucination (Truthfulness)": 2.37, "P-Hellaswag": 63.78, "Law": 27.33, "AUT Multiple Choice": 36.1, "Parsi Literature": 27.8, "BoolQA": 66.3, "Reading Comprehension": 0.0, "PartExpert": 29.75, "MMLU Pro": 14.5, "Iranian Social Norms": 48.41, "Model sha": "af603eeb074138e2a613fbc95d89f018afbd3041", "Hub License": "cc-by-nc-sa-4.0"}
29
+ {"Model": "Qwen2-7B-Instruct", "#Params (B)": 7.61, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2-7B-Instruct", "GeneralKnowledge": 52.04, "GSM8K": 14.5, "DC-Homograph": 54.63, "MC-Homograph": 72.81, "PiQA": 70.97, "Proverb-Quiz": 50.54, "VerbEval": 40.62, "Winogrande": 60.94, "Arc-Challenge": 69.12, "Arc-Easy": 80.75, "Feqh": 28.0, "Hallucination (Truthfulness)": 25.93, "P-Hellaswag": 76.71, "Law": 28.33, "AUT Multiple Choice": 40.4, "Parsi Literature": 31.4, "BoolQA": 79.0, "Reading Comprehension": 50.14, "PartExpert": 36.31, "MMLU Pro": 23.8, "Iranian Social Norms": 62.2, "Model sha": "f2826a00ceef68f0f2b946d945ecc0477ce4450c", "Hub License": "apache-2.0"}
30
+ {"Model": "Qwen2.5-32B-Instruct", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2.5-32B-Instruct", "GeneralKnowledge": 61.73, "GSM8K": 50.1, "DC-Homograph": 67.59, "MC-Homograph": 91.47, "PiQA": 83.98, "Proverb-Quiz": 63.24, "VerbEval": 54.58, "Winogrande": 80.07, "Arc-Challenge": 85.15, "Arc-Easy": 91.87, "Feqh": 38.86, "Hallucination (Truthfulness)": 59.22, "P-Hellaswag": 82.07, "Law": 42.33, "AUT Multiple Choice": 50.4, "Parsi Literature": 40.41, "BoolQA": 93.4, "Reading Comprehension": 28.11, "PartExpert": 46.78, "MMLU Pro": 37.4, "Iranian Social Norms": 70.0, "Model sha": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", "Hub License": "apache-2.0"}
31
+ {"Model": "Qwen2.5-7B-Instruct", "#Params (B)": 7.61, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen2.5-7B-Instruct", "GeneralKnowledge": 51.02, "GSM8K": 18.0, "DC-Homograph": 52.78, "MC-Homograph": 79.26, "PiQA": 71.07, "Proverb-Quiz": 47.84, "VerbEval": 44.44, "Winogrande": 61.91, "Arc-Challenge": 72.33, "Arc-Easy": 81.5, "Feqh": 36.57, "Hallucination (Truthfulness)": 34.89, "P-Hellaswag": 74.8, "Law": 32.33, "AUT Multiple Choice": 42.6, "Parsi Literature": 31.27, "BoolQA": 82.5, "Reading Comprehension": 58.43, "PartExpert": 37.24, "MMLU Pro": 26.7, "Iranian Social Norms": 64.51, "Model sha": "a09a35458c702b33eeacc393d103063234e8bc28", "Hub License": "apache-2.0"}
32
+ {"Model": "Qwen3-14B", "#Params (B)": 14.76, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-14B", "GeneralKnowledge": 56.38, "GSM8K": 31.1, "DC-Homograph": 55.56, "MC-Homograph": 87.56, "PiQA": 77.18, "Proverb-Quiz": 53.78, "VerbEval": 54.36, "Winogrande": 67.32, "Arc-Challenge": 84.29, "Arc-Easy": 91.02, "Feqh": 29.14, "Hallucination (Truthfulness)": 44.54, "P-Hellaswag": 80.97, "Law": 34.67, "AUT Multiple Choice": 44.8, "Parsi Literature": 35.39, "BoolQA": 87.6, "Reading Comprehension": 44.36, "PartExpert": 43.22, "MMLU Pro": 35.5, "Iranian Social Norms": 74.51, "Model sha": "8268fe3026cb304910457689366670e803a6fd56", "Hub License": "apache-2.0"}
33
+ {"Model": "Qwen3-30B-A3B", "#Params (B)": 30.53, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-30B-A3B", "GeneralKnowledge": 65.05, "GSM8K": 28.8, "DC-Homograph": 57.41, "MC-Homograph": 86.41, "PiQA": 72.47, "Proverb-Quiz": 50.81, "VerbEval": 48.09, "Winogrande": 65.28, "Arc-Challenge": 87.39, "Arc-Easy": 93.58, "Feqh": 23.43, "Hallucination (Truthfulness)": 3.54, "P-Hellaswag": 83.1, "Law": 35.33, "AUT Multiple Choice": 48.0, "Parsi Literature": 36.55, "BoolQA": 86.2, "Reading Comprehension": 66.24, "PartExpert": 41.13, "MMLU Pro": 36.3, "Iranian Social Norms": 44.21, "Model sha": "ae659febe817e4b3ebd7355f47792725801204c9", "Hub License": "apache-2.0"}
34
+ {"Model": "Qwen3-32B", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-32B", "GeneralKnowledge": 67.6, "GSM8K": 37.9, "DC-Homograph": 51.85, "MC-Homograph": 89.63, "PiQA": 87.69, "Proverb-Quiz": 64.59, "VerbEval": 56.35, "Winogrande": 71.48, "Arc-Challenge": 91.13, "Arc-Easy": 94.22, "Feqh": 29.71, "Hallucination (Truthfulness)": 47.5, "P-Hellaswag": 83.47, "Law": 37.0, "AUT Multiple Choice": 48.3, "Parsi Literature": 39.12, "BoolQA": 91.1, "Reading Comprehension": 63.96, "PartExpert": 50.06, "MMLU Pro": 42.8, "Iranian Social Norms": 73.48, "Model sha": "d47b0d4ae4b48fde975756bf360a63a9cca8d470", "Hub License": "apache-2.0"}
35
+ {"Model": "Qwen3-4B", "#Params (B)": 4.02, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-4B", "GeneralKnowledge": 43.88, "GSM8K": 20.1, "DC-Homograph": 38.89, "MC-Homograph": 76.27, "PiQA": 66.07, "Proverb-Quiz": 45.41, "VerbEval": 41.23, "Winogrande": 54.56, "Arc-Challenge": 73.61, "Arc-Easy": 83.42, "Feqh": 30.29, "Hallucination (Truthfulness)": 25.29, "P-Hellaswag": 78.03, "Law": 30.33, "AUT Multiple Choice": 40.6, "Parsi Literature": 31.79, "BoolQA": 81.9, "Reading Comprehension": 63.43, "PartExpert": 37.28, "MMLU Pro": 28.9, "Iranian Social Norms": 68.72, "Model sha": "531c80e289d6cff3a7cd8c0db8110231d23a6f7a", "Hub License": "apache-2.0"}
36
+ {"Model": "Qwen3-8B", "#Params (B)": 8.19, "Precision": "BF16", "model_name_for_query": "Qwen/Qwen3-8B", "GeneralKnowledge": 49.23, "GSM8K": 25.7, "DC-Homograph": 50.93, "MC-Homograph": 82.95, "PiQA": 75.98, "Proverb-Quiz": 51.89, "VerbEval": 47.93, "Winogrande": 61.91, "Arc-Challenge": 80.24, "Arc-Easy": 87.38, "Feqh": 28.0, "Hallucination (Truthfulness)": 38.46, "P-Hellaswag": 80.38, "Law": 29.67, "AUT Multiple Choice": 46.0, "Parsi Literature": 33.2, "BoolQA": 86.4, "Reading Comprehension": 66.38, "PartExpert": 38.31, "MMLU Pro": 31.1, "Iranian Social Norms": 63.41, "Model sha": "9c925d64d72725edaf899c6cb9c377fd0709d9c5", "Hub License": "apache-2.0"}
37
+ {"Model": "QwQ-32B", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/QwQ-32B", "GeneralKnowledge": 60.71, "GSM8K": 29.3, "DC-Homograph": 58.33, "MC-Homograph": 88.25, "PiQA": 81.68, "Proverb-Quiz": 59.19, "VerbEval": 52.31, "Winogrande": 73.07, "Arc-Challenge": 84.94, "Arc-Easy": 90.8, "Feqh": 41.71, "Hallucination (Truthfulness)": 48.93, "P-Hellaswag": 82.22, "Law": 38.0, "AUT Multiple Choice": 49.3, "Parsi Literature": 37.71, "BoolQA": 88.5, "Reading Comprehension": 50.25, "PartExpert": 46.75, "MMLU Pro": 39.0, "Iranian Social Norms": 70.73, "Model sha": "976055f8c83f394f35dbd3ab09a285a984907bd0", "Hub License": "apache-2.0"}
38
+ {"Model": "QwQ-32B-Preview", "#Params (B)": 32.76, "Precision": "BF16", "model_name_for_query": "Qwen/QwQ-32B-Preview", "GeneralKnowledge": 63.27, "GSM8K": 34.7, "DC-Homograph": 61.11, "MC-Homograph": 88.25, "PiQA": 81.28, "Proverb-Quiz": 58.11, "VerbEval": 51.97, "Winogrande": 75.64, "Arc-Challenge": 85.58, "Arc-Easy": 91.44, "Feqh": 41.14, "Hallucination (Truthfulness)": 38.84, "P-Hellaswag": 84.13, "Law": 43.0, "AUT Multiple Choice": 50.6, "Parsi Literature": 39.77, "BoolQA": 88.5, "Reading Comprehension": 65.38, "PartExpert": 47.39, "MMLU Pro": 37.3, "Iranian Social Norms": 72.26, "Model sha": "91906fe41a48b6a89ce2970abfd1269eefee170e", "Hub License": "apache-2.0"}