Update leaderboard_data.jsonl
Browse files- leaderboard_data.jsonl +40 -40
leaderboard_data.jsonl
CHANGED
@@ -1,40 +1,40 @@
|
|
1 |
-
{"Model": "gpt-4o-2024-08-06", "model_name_for_query": null, "GeneralKnowledge": "90.82", "GSM8K": "73.10", "DC-Homograph": "87.04", "MC-Homograph": "95.62", "PiQA": "95.10", "Proverb-Quiz": "96.76", "VerbEval": "85.89", "Winogrande": "86.18", "Arc-Challenge": "95.09", "Arc-Easy": "97.22", "Feqh": "46.86", "halluc_final": "74.64", "P-Hellaswag": "85.53", "Law": "47.67", "AUT Multiple Choice": "67.70", "Parsi Literature": "45.95", "BoolQA": "94.10", "Reading Comprehension": "10.00", "PartExpert": "57.36", "MMLU Pro": "47.10", "Iranian Social Norms": "76.89", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
2 |
-
{"Model": "gpt-4.1-2025-04-14", "model_name_for_query": null, "GeneralKnowledge": "90.82", "GSM8K": "25.30", "DC-Homograph": "89.81", "MC-Homograph": "95.39", "PiQA": "95.90", "Proverb-Quiz": "95.14", "VerbEval": "83.04", "Winogrande": "85.92", "Arc-Challenge": "95.30", "Arc-Easy": "96.68", "Feqh": "52.00", "halluc_final": "77.43", "P-Hellaswag": "85.67", "Law": "53.67", "AUT Multiple Choice": "66.60", "Parsi Literature": "45.82", "BoolQA": "94.70", "Reading Comprehension": "3.60", "PartExpert": "59.92", "MMLU Pro": "50.50", "Iranian Social Norms": "77.56", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
3 |
-
{"Model": "google__gemini-2.0-flash-001", "model_name_for_query": null, "GeneralKnowledge": "87.76", "GSM8K": "53.70", "DC-Homograph": "79.63", "MC-Homograph": "91.71", "PiQA": "90.59", "Proverb-Quiz": "95.14", "VerbEval": "85.15", "Winogrande": "78.74", "Arc-Challenge": "91.35", "Arc-Easy": "97.22", "Feqh": "53.14", "halluc_final": "68.87", "P-Hellaswag": "82.95", "Law": "45.67", "AUT Multiple Choice": "60.90", "Parsi Literature": "44.02", "BoolQA": "91.30", "Reading Comprehension": "23.90", "PartExpert": "59.50", "MMLU Pro": "47.80", "Iranian Social Norms": "77.68", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
4 |
-
{"Model": "deepseek-v3-03-24", "model_name_for_query": null, "GeneralKnowledge": "85.71", "GSM8K": "53.10", "DC-Homograph": "83.33", "MC-Homograph": "94.24", "PiQA": "91.39", "Proverb-Quiz": "84.86", "VerbEval": "81.11", "Winogrande": "76.71", "Arc-Challenge": "92.31", "Arc-Easy": "96.58", "Feqh": "42.29", "halluc_final": "55.54", "P-Hellaswag": "85.30", "Law": "46.00", "AUT Multiple Choice": "65.60", "Parsi Literature": "44.66", "BoolQA": "95.30", "Reading Comprehension": "20.70", "PartExpert": "58.46", "MMLU Pro": "53.40", "Iranian Social Norms": "71.71", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
5 |
-
{"Model": "gpt-4.1-mini-2025-04-14", "model_name_for_query": null, "GeneralKnowledge": "79.34", "GSM8K": "60.30", "DC-Homograph": "66.67", "MC-Homograph": "94.24", "PiQA": "92.69", "Proverb-Quiz": "82.97", "VerbEval": "77.99", "Winogrande": "80.07", "Arc-Challenge": "91.88", "Arc-Easy": "96.15", "Feqh": "37.71", "halluc_final": "66.55", "P-Hellaswag": "84.57", "Law": "44.33", "AUT Multiple Choice": "53.50", "Parsi Literature": "41.18", "BoolQA": "93.70", "Reading Comprehension": "7.70", "PartExpert": "54.37", "MMLU Pro": "47.80", "Iranian Social Norms": "73.35", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
6 |
-
{"Model": "gpt-4o-mini-2024-07-18", "model_name_for_query": null, "GeneralKnowledge": "79.08", "GSM8K": "60.90", "DC-Homograph": "68.52", "MC-Homograph": "90.09", "PiQA": "90.89", "Proverb-Quiz": "84.05", "VerbEval": "74.23", "Winogrande": "75.73", "Arc-Challenge": "86.43", "Arc-Easy": "94.01", "Feqh": "41.71", "halluc_final": "82.04", "P-Hellaswag": "83.84", "Law": "34.00", "AUT Multiple Choice": "54.80", "Parsi Literature": "40.93", "BoolQA": "93.30", "Reading Comprehension": "20.50", "PartExpert": "42.54", "MMLU Pro": "34.80", "Iranian Social Norms": "71.59", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
7 |
-
{"Model": "google__gemini-2.0-flash-lite-001", "model_name_for_query": null, "GeneralKnowledge": "84.18", "GSM8K": "39.70", "DC-Homograph": "60.19", "MC-Homograph": "87.79", "PiQA": "85.29", "Proverb-Quiz": "91.35", "VerbEval": "81.39", "Winogrande": "75.64", "Arc-Challenge": "89.64", "Arc-Easy": "93.48", "Feqh": "41.71", "halluc_final": "67.32", "P-Hellaswag": "83.54", "Law": "43.00", "AUT Multiple Choice": "58.50", "Parsi Literature": "43.89", "BoolQA": "92.60", "Reading Comprehension": "26.70", "PartExpert": "54.15", "MMLU Pro": "41.20", "Iranian Social Norms": "70.49", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
8 |
-
{"Model": "Qwen2.5-32B-Instruct", "model_name_for_query": "Qwen/Qwen2.5-32B-Instruct", "GeneralKnowledge": "61.73", "GSM8K": "50.10", "DC-Homograph": "67.59", "MC-Homograph": "91.47", "PiQA": "83.98", "Proverb-Quiz": "63.24", "VerbEval": "54.58", "Winogrande": "80.07", "Arc-Challenge": "85.15", "Arc-Easy": "91.87", "Feqh": "38.86", "halluc_final": "59.22", "P-Hellaswag": "82.07", "Law": "42.33", "AUT Multiple Choice": "50.40", "Parsi Literature": "40.41", "BoolQA": "93.40", "Reading Comprehension": "7.40", "PartExpert": "46.78", "MMLU Pro": "37.40", "Iranian Social Norms": "70.00", "Precision": "BF16", "Model sha": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", "Hub License": "apache-2.0"}
|
9 |
-
{"Model": "gemma-3-27b-it", "model_name_for_query": "google/gemma-3-27b-it", "GeneralKnowledge": "73.72", "GSM8K": "28.30", "DC-Homograph": "63.89", "MC-Homograph": "92.40", "PiQA": "87.29", "Proverb-Quiz": "78.92", "VerbEval": "66.02", "Winogrande": "78.12", "Arc-Challenge": "88.35", "Arc-Easy": "94.22", "Feqh": "24.57", "halluc_final": "60.15", "P-Hellaswag": "83.39", "Law": "36.33", "AUT Multiple Choice": "55.20", "Parsi Literature": "40.93", "BoolQA": "91.40", "Reading Comprehension": "1.20", "PartExpert": "49.32", "MMLU Pro": "36.60", "Iranian Social Norms": "70.49", "Precision": "BF16", "Model sha": "005ad3404e59d6023443cb575daa05336842228a", "Hub License": "gemma"}
|
10 |
-
{"Model": "Qwen3-32B", "model_name_for_query": "Qwen/Qwen3-32B", "GeneralKnowledge": "67.60", "GSM8K": "37.90", "DC-Homograph": "51.85", "MC-Homograph": "89.63", "PiQA": "87.69", "Proverb-Quiz": "64.59", "VerbEval": "56.35", "Winogrande": "71.48", "Arc-Challenge": "91.13", "Arc-Easy": "94.22", "Feqh": "29.71", "halluc_final": "47.50", "P-Hellaswag": "83.47", "Law": "37.00", "AUT Multiple Choice": "48.30", "Parsi Literature": "39.12", "BoolQA": "91.10", "Reading Comprehension": "22.00", "PartExpert": "50.06", "MMLU Pro": "42.80", "Iranian Social Norms": "73.48", "Precision": "BF16", "Model sha": "d47b0d4ae4b48fde975756bf360a63a9cca8d470", "Hub License": "apache-2.0"}
|
11 |
-
{"Model": "QwQ-32B-Preview", "model_name_for_query": "Qwen/QwQ-32B-Preview", "GeneralKnowledge": "63.27", "GSM8K": "34.70", "DC-Homograph": "61.11", "MC-Homograph": "88.25", "PiQA": "81.28", "Proverb-Quiz": "58.11", "VerbEval": "51.97", "Winogrande": "75.64", "Arc-Challenge": "85.58", "Arc-Easy": "91.44", "Feqh": "41.14", "halluc_final": "38.84", "P-Hellaswag": "84.13", "Law": "43.00", "AUT Multiple Choice": "50.60", "Parsi Literature": "39.77", "BoolQA": "88.50", "Reading Comprehension": "23.60", "PartExpert": "47.39", "MMLU Pro": "37.30", "Iranian Social Norms": "72.26", "Precision": "BF16", "Model sha": "91906fe41a48b6a89ce2970abfd1269eefee170e", "Hub License": "apache-2.0"}
|
12 |
-
{"Model": "gemma-3-12b-it", "model_name_for_query": "google/gemma-3-12b-it", "GeneralKnowledge": "68.37", "GSM8K": "20.20", "DC-Homograph": "67.59", "MC-Homograph": "91.24", "PiQA": "87.19", "Proverb-Quiz": "72.97", "VerbEval": "63.39", "Winogrande": "73.96", "Arc-Challenge": "83.33", "Arc-Easy": "93.26", "Feqh": "25.14", "halluc_final": "46.10", "P-Hellaswag": "83.17", "Law": "36.33", "AUT Multiple Choice": "49.00", "Parsi Literature": "40.03", "BoolQA": "87.60", "Reading Comprehension": "4.50", "PartExpert": "44.12", "MMLU Pro": "32.60", "Iranian Social Norms": "75.55", "Precision": "BF16", "Model sha": "96b6f1eccf38110c56df3a15bffe176da04bfd80", "Hub License": "gemma"}
|
13 |
-
{"Model": "gemma-2-27b-it", "model_name_for_query": "google/gemma-2-27b-it", "GeneralKnowledge": "68.11", "GSM8K": "26.70", "DC-Homograph": "60.19", "MC-Homograph": "91.24", "PiQA": "89.69", "Proverb-Quiz": "73.51", "VerbEval": "61.16", "Winogrande": "76.44", "Arc-Challenge": "86.75", "Arc-Easy": "94.22", "Feqh": "24.00", "halluc_final": "13.05", "P-Hellaswag": "83.69", "Law": "34.67", "AUT Multiple Choice": "50.80", "Parsi Literature": "35.91", "BoolQA": "89.80", "Reading Comprehension": "0.10", "PartExpert": "46.60", "MMLU Pro": "36.90", "Iranian Social Norms": "77.38", "Precision": "BF16", "Model sha": "aaf20e6b9f4c0fcf043f6fb2a2068419086d77b0", "Hub License": "gemma"}
|
14 |
-
{"Model": "aya-expanse-32b", "model_name_for_query": "CohereLabs/aya-expanse-32b", "GeneralKnowledge": "73.72", "GSM8K": "17.50", "DC-Homograph": "62.96", "MC-Homograph": "87.56", "PiQA": "91.19", "Proverb-Quiz": "77.03", "VerbEval": "61.95", "Winogrande": "70.50", "Arc-Challenge": "85.15", "Arc-Easy": "93.37", "Feqh": "37.14", "halluc_final": "44.84", "P-Hellaswag": "81.70", "Law": "38.67", "AUT Multiple Choice": "54.70", "Parsi Literature": "34.75", "BoolQA": "89.70", "Reading Comprehension": "24.90", "PartExpert": "44.29", "MMLU Pro": "32.10", "Iranian Social Norms": "74.94", "Precision": "F16", "Model sha": "94bda1dcb97d260f732d230b832c7c685ae91e23", "Hub License": "cc-by-nc-4.0"}
|
15 |
-
{"Model": "QwQ-32B", "model_name_for_query": "Qwen/QwQ-32B", "GeneralKnowledge": "60.71", "GSM8K": "29.30", "DC-Homograph": "58.33", "MC-Homograph": "88.25", "PiQA": "81.68", "Proverb-Quiz": "59.19", "VerbEval": "52.31", "Winogrande": "73.07", "Arc-Challenge": "84.94", "Arc-Easy": "90.80", "Feqh": "41.71", "halluc_final": "48.93", "P-Hellaswag": "82.22", "Law": "38.00", "AUT Multiple Choice": "49.30", "Parsi Literature": "37.71", "BoolQA": "88.50", "Reading Comprehension": "17.80", "PartExpert": "46.75", "MMLU Pro": "39.00", "Iranian Social Norms": "70.73", "Precision": "BF16", "Model sha": "976055f8c83f394f35dbd3ab09a285a984907bd0", "Hub License": "apache-2.0"}
|
16 |
-
{"Model": "gpt-4.1-nano-2025-04-14", "model_name_for_query": null, "GeneralKnowledge": "68.11", "GSM8K": "58.40", "DC-Homograph": "49.07", "MC-Homograph": "78.11", "PiQA": "84.58", "Proverb-Quiz": "67.84", "VerbEval": "66.21", "Winogrande": "60.32", "Arc-Challenge": "81.41", "Arc-Easy": "91.55", "Feqh": "32.00", "halluc_final": "51.24", "P-Hellaswag": "77.96", "Law": "32.67", "AUT Multiple Choice": "46.10", "Parsi Literature": "36.42", "BoolQA": "81.70", "Reading Comprehension": "6.30", "PartExpert": "42.49", "MMLU Pro": "29.90", "Iranian Social Norms": "74.76", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
17 |
-
{"Model": "Qwen3-14B", "model_name_for_query": "Qwen/Qwen3-14B", "GeneralKnowledge": "56.38", "GSM8K": "31.10", "DC-Homograph": "55.56", "MC-Homograph": "87.56", "PiQA": "77.18", "Proverb-Quiz": "53.78", "VerbEval": "54.36", "Winogrande": "67.32", "Arc-Challenge": "84.29", "Arc-Easy": "91.02", "Feqh": "29.14", "halluc_final": "44.54", "P-Hellaswag": "80.97", "Law": "34.67", "AUT Multiple Choice": "44.80", "Parsi Literature": "35.39", "BoolQA": "87.60", "Reading Comprehension": "24.40", "PartExpert": "43.22", "MMLU Pro": "35.50", "Iranian Social Norms": "74.51", "Precision": "BF16", "Model sha": "8268fe3026cb304910457689366670e803a6fd56", "Hub License": "apache-2.0"}
|
18 |
-
{"Model": "gemma-2-9b-it", "model_name_for_query": "google/gemma-2-9b-it", "GeneralKnowledge": "64.03", "GSM8K": "17.40", "DC-Homograph": "59.26", "MC-Homograph": "90.55", "PiQA": "87.09", "Proverb-Quiz": "69.19", "VerbEval": "58.25", "Winogrande": "72.01", "Arc-Challenge": "84.29", "Arc-Easy": "93.16", "Feqh": "29.71", "halluc_final": "50.58", "P-Hellaswag": "80.82", "Law": "33.67", "AUT Multiple Choice": "48.50", "Parsi Literature": "38.10", "BoolQA": "89.70", "Reading Comprehension": "0.10", "PartExpert": "43.03", "MMLU Pro": "33.20", "Iranian Social Norms": "73.84", "Precision": "BF16", "Model sha": "11c9b309abf73637e4b6f9a3fa1e92e615547819", "Hub License": "gemma"}
|
19 |
-
{"Model": "Qwen3-30B-A3B", "model_name_for_query": "Qwen/Qwen3-30B-A3B", "GeneralKnowledge": "65.05", "GSM8K": "28.80", "DC-Homograph": "57.41", "MC-Homograph": "86.41", "PiQA": "72.47", "Proverb-Quiz": "50.81", "VerbEval": "48.09", "Winogrande": "65.28", "Arc-Challenge": "87.39", "Arc-Easy": "93.58", "Feqh": "23.43", "halluc_final": "3.54", "P-Hellaswag": "83.10", "Law": "35.33", "AUT Multiple Choice": "48.00", "Parsi Literature": "36.55", "BoolQA": "86.20", "Reading Comprehension": "26.40", "PartExpert": "41.13", "MMLU Pro": "36.30", "Iranian Social Norms": "44.21", "Precision": "BF16", "Model sha": "ae659febe817e4b3ebd7355f47792725801204c9", "Hub License": "apache-2.0"}
|
20 |
-
{"Model": "aya-23-35B", "model_name_for_query": "CohereLabs/aya-23-35B", "GeneralKnowledge": "63.27", "GSM8K": "10.00", "DC-Homograph": "55.56", "MC-Homograph": "83.64", "PiQA": "89.49", "Proverb-Quiz": "67.03", "VerbEval": "47.32", "Winogrande": "65.81", "Arc-Challenge": "77.56", "Arc-Easy": "90.16", "Feqh": "30.29", "halluc_final": "11.72", "P-Hellaswag": "79.87", "Law": "32.00", "AUT Multiple Choice": "48.70", "Parsi Literature": "31.92", "BoolQA": "86.20", "Reading Comprehension": "23.70", "PartExpert": "37.44", "MMLU Pro": "24.10", "Iranian Social Norms": "65.00", "Precision": "F16", "Model sha": "5e72bd5ad83e5e1612ee7f56a0c1a439a7cfb887", "Hub License": "cc-by-nc-4.0"}
|
21 |
-
{"Model": "Qwen3-8B", "model_name_for_query": "Qwen/Qwen3-8B", "GeneralKnowledge": "49.23", "GSM8K": "25.70", "DC-Homograph": "50.93", "MC-Homograph": "82.95", "PiQA": "75.98", "Proverb-Quiz": "51.89", "VerbEval": "47.93", "Winogrande": "61.91", "Arc-Challenge": "80.24", "Arc-Easy": "87.38", "Feqh": "28.00", "halluc_final": "38.46", "P-Hellaswag": "80.38", "Law": "29.67", "AUT Multiple Choice": "46.00", "Parsi Literature": "33.20", "BoolQA": "86.40", "Reading Comprehension": "25.00", "PartExpert": "38.31", "MMLU Pro": "31.10", "Iranian Social Norms": "63.41", "Precision": "BF16", "Model sha": "9c925d64d72725edaf899c6cb9c377fd0709d9c5", "Hub License": "apache-2.0"}
|
22 |
-
{"Model": "aya-expanse-8b", "model_name_for_query": "CohereLabs/aya-expanse-8b", "GeneralKnowledge": "58.67", "GSM8K": "9.80", "DC-Homograph": "51.85", "MC-Homograph": "80.65", "PiQA": "80.18", "Proverb-Quiz": "60.00", "VerbEval": "48.06", "Winogrande": "64.04", "Arc-Challenge": "71.47", "Arc-Easy": "84.60", "Feqh": "29.71", "halluc_final": "23.52", "P-Hellaswag": "76.49", "Law": "32.33", "AUT Multiple Choice": "45.80", "Parsi Literature": "34.49", "BoolQA": "82.30", "Reading Comprehension": "20.10", "PartExpert": "35.56", "MMLU Pro": "21.90", "Iranian Social Norms": "71.71", "Precision": "F16", "Model sha": "0ad43ec1e309e1351faa4b1d22713c065e37359a", "Hub License": "cc-by-nc-4.0"}
|
23 |
-
{"Model": "Hormoz-8B", "model_name_for_query": "mann-e/Hormoz-8B", "GeneralKnowledge": "58.42", "GSM8K": "10.00", "DC-Homograph": "50.93", "MC-Homograph": "80.65", "PiQA": "80.68", "Proverb-Quiz": "60.27", "VerbEval": "47.29", "Winogrande": "64.39", "Arc-Challenge": "70.41", "Arc-Easy": "84.28", "Feqh": "28.57", "halluc_final": "23.66", "P-Hellaswag": "76.05", "Law": "30.33", "AUT Multiple Choice": "46.70", "Parsi Literature": "33.08", "BoolQA": "79.80", "Reading Comprehension": "19.60", "PartExpert": "35.68", "MMLU Pro": "21.50", "Iranian Social Norms": "70.30", "Precision": "F32", "Model sha": "c91bcecb236c90523f70db7efa23dd794e9b4cff", "Hub License": "mit"}
|
24 |
-
{"Model": "Llama-3.1-8B-Instruct", "model_name_for_query": "meta-llama/Llama-3.1-8B-Instruct", "GeneralKnowledge": "52.55", "GSM8K": "12.00", "DC-Homograph": "43.52", "MC-Homograph": "79.03", "PiQA": "70.07", "Proverb-Quiz": "47.57", "VerbEval": "42.91", "Winogrande": "54.21", "Arc-Challenge": "68.91", "Arc-Easy": "80.11", "Feqh": "29.71", "halluc_final": "6.76", "P-Hellaswag": "79.79", "Law": "32.67", "AUT Multiple Choice": "44.90", "Parsi Literature": "32.30", "BoolQA": "82.70", "Reading Comprehension": "24.50", "PartExpert": "37.62", "MMLU Pro": "25.70", "Iranian Social Norms": "70.98", "Precision": "BF16", "Model sha": "0e9e39f249a16976918f6564b8830bc894c89659", "Hub License": "llama3.1"}
|
25 |
-
{"Model": "Qwen2.5-7B-Instruct", "model_name_for_query": "Qwen/Qwen2.5-7B-Instruct", "GeneralKnowledge": "51.02", "GSM8K": "18.00", "DC-Homograph": "52.78", "MC-Homograph": "79.26", "PiQA": "71.07", "Proverb-Quiz": "47.84", "VerbEval": "44.44", "Winogrande": "61.91", "Arc-Challenge": "72.33", "Arc-Easy": "81.50", "Feqh": "36.57", "halluc_final": "34.89", "P-Hellaswag": "74.80", "Law": "32.33", "AUT Multiple Choice": "42.60", "Parsi Literature": "31.27", "BoolQA": "82.50", "Reading Comprehension": "17.60", "PartExpert": "37.24", "MMLU Pro": "26.70", "Iranian Social Norms": "64.51", "Precision": "BF16", "Model sha": "a09a35458c702b33eeacc393d103063234e8bc28", "Hub License": "apache-2.0"}
|
26 |
-
{"Model": "aya-23-8B", "model_name_for_query": "CohereLabs/aya-23-8B", "GeneralKnowledge": "52.30", "GSM8K": "6.10", "DC-Homograph": "52.78", "MC-Homograph": "76.27", "PiQA": "80.78", "Proverb-Quiz": "44.32", "VerbEval": "39.30", "Winogrande": "57.13", "Arc-Challenge": "63.68", "Arc-Easy": "81.39", "Feqh": "29.14", "halluc_final": "0.60", "P-Hellaswag": "75.83", "Law": "28.33", "AUT Multiple Choice": "42.90", "Parsi Literature": "31.27", "BoolQA": "72.30", "Reading Comprehension": "23.40", "PartExpert": "33.33", "MMLU Pro": "19.90", "Iranian Social Norms": "70.73", "Precision": "F16", "Model sha": "2a1a63b24af8f591616fdf58936ee576d63ca835", "Hub License": "cc-by-nc-4.0"}
|
27 |
-
{"Model": "Qwen2-7B-Instruct", "model_name_for_query": "Qwen/Qwen2-7B-Instruct", "GeneralKnowledge": "52.04", "GSM8K": "14.50", "DC-Homograph": "54.63", "MC-Homograph": "72.81", "PiQA": "70.97", "Proverb-Quiz": "50.54", "VerbEval": "40.62", "Winogrande": "60.94", "Arc-Challenge": "69.12", "Arc-Easy": "80.75", "Feqh": "28.00", "halluc_final": "25.93", "P-Hellaswag": "76.71", "Law": "28.33", "AUT Multiple Choice": "40.40", "Parsi Literature": "31.40", "BoolQA": "79.00", "Reading Comprehension": "10.90", "PartExpert": "36.31", "MMLU Pro": "23.80", "Iranian Social Norms": "62.20", "Precision": "BF16", "Model sha": "f2826a00ceef68f0f2b946d945ecc0477ce4450c", "Hub License": "apache-2.0"}
|
28 |
-
{"Model": "Meta-Llama-3-8B-Instruct", "model_name_for_query": "meta-llama/Meta-Llama-3-8B-Instruct", "GeneralKnowledge": "52.04", "GSM8K": "10.40", "DC-Homograph": "41.67", "MC-Homograph": "81.11", "PiQA": "70.97", "Proverb-Quiz": "42.97", "VerbEval": "38.93", "Winogrande": "56.95", "Arc-Challenge": "66.77", "Arc-Easy": "76.47", "Feqh": "33.71", "halluc_final": "33.23", "P-Hellaswag": "76.71", "Law": "32.00", "AUT Multiple Choice": "45.00", "Parsi Literature": "29.99", "BoolQA": "82.50", "Reading Comprehension": "19.40", "PartExpert": "36.30", "MMLU Pro": "26.00", "Iranian Social Norms": "70.06", "Precision": "BF16", "Model sha": "5f0b02c75b57c5855da9ae460ce51323ea669d8a", "Hub License": "llama3"}
|
29 |
-
{"Model": "gemma-3-4b-it", "model_name_for_query": "google/gemma-3-4b-it", "GeneralKnowledge": "45.92", "GSM8K": "9.60", "DC-Homograph": "42.59", "MC-Homograph": "72.58", "PiQA": "72.77", "Proverb-Quiz": "53.78", "VerbEval": "45.30", "Winogrande": "55.09", "Arc-Challenge": "63.46", "Arc-Easy": "79.57", "Feqh": "21.14", "halluc_final": "46.04", "P-Hellaswag": "73.84", "Law": "27.67", "AUT Multiple Choice": "42.50", "Parsi Literature": "30.24", "BoolQA": "78.60", "Reading Comprehension": "5.50", "PartExpert": "34.70", "MMLU Pro": "22.80", "Iranian Social Norms": "65.55", "Precision": "BF16", "Model sha": "093f9f388b31de276ce2de164bdc2081324b9767", "Hub License": "gemma"}
|
30 |
-
{"Model": "Qwen3-4B", "model_name_for_query": "Qwen/Qwen3-4B", "GeneralKnowledge": "43.88", "GSM8K": "20.10", "DC-Homograph": "38.89", "MC-Homograph": "76.27", "PiQA": "66.07", "Proverb-Quiz": "45.41", "VerbEval": "41.23", "Winogrande": "54.56", "Arc-Challenge": "73.61", "Arc-Easy": "83.42", "Feqh": "30.29", "halluc_final": "25.29", "P-Hellaswag": "78.03", "Law": "30.33", "AUT Multiple Choice": "40.60", "Parsi Literature": "31.79", "BoolQA": "81.90", "Reading Comprehension": "21.30", "PartExpert": "37.28", "MMLU Pro": "28.90", "Iranian Social Norms": "68.72", "Precision": "BF16", "Model sha": "531c80e289d6cff3a7cd8c0db8110231d23a6f7a", "Hub License": "apache-2.0"}
|
31 |
-
{"Model": "Hermes-3-Llama-3.1-8B", "model_name_for_query": "NousResearch/Hermes-3-Llama-3.1-8B", "GeneralKnowledge": "49.49", "GSM8K": "10.20", "DC-Homograph": "44.44", "MC-Homograph": "79.72", "PiQA": "70.37", "Proverb-Quiz": "47.84", "VerbEval": "48.94", "Winogrande": "55.18", "Arc-Challenge": "65.28", "Arc-Easy": "78.07", "Feqh": "30.29", "halluc_final": "45.20", "P-Hellaswag": "73.99", "Law": "31.67", "AUT Multiple Choice": "42.10", "Parsi Literature": "30.63", "BoolQA": "83.50", "Reading Comprehension": "13.50", "PartExpert": "35.61", "MMLU Pro": "24.10", "Iranian Social Norms": "54.88", "Precision": "BF16", "Model sha": "896ea440e5a9e6070e3d8a2774daf2b481ab425b", "Hub License": "llama3"}
|
32 |
-
{"Model": "Dorna2-Llama3.1-8B-Instruct", "model_name_for_query": "PartAI/Dorna2-Llama3.1-8B-Instruct", "GeneralKnowledge": "48.72", "GSM8K": "11.90", "DC-Homograph": "44.44", "MC-Homograph": "72.81", "PiQA": "69.97", "Proverb-Quiz": "42.97", "VerbEval": "42.06", "Winogrande": "54.47", "Arc-Challenge": "67.63", "Arc-Easy": "78.72", "Feqh": "33.71", "halluc_final": "33.91", "P-Hellaswag": "78.91", "Law": "29.67", "AUT Multiple Choice": "41.00", "Parsi Literature": "27.28", "BoolQA": "81.80", "Reading Comprehension": "21.90", "PartExpert": "35.65", "MMLU Pro": "22.70", "Iranian Social Norms": "49.82", "Precision": "BF16", "Model sha": "b78e4bd261100c96e511ed5090ca0ce0e1f4b340", "Hub License": "llama3.1"}
|
33 |
-
{"Model": "Llama-3.1-8B", "model_name_for_query": "meta-llama/Llama-3.1-8B", "GeneralKnowledge": "49.23", "GSM8K": "10.80", "DC-Homograph": "46.30", "MC-Homograph": "72.12", "PiQA": "66.47", "Proverb-Quiz": "35.95", "VerbEval": "39.91", "Winogrande": "54.92", "Arc-Challenge": "63.35", "Arc-Easy": "75.08", "Feqh": "30.86", "halluc_final": "45.02", "P-Hellaswag": "76.34", "Law": "33.00", "AUT Multiple Choice": "42.60", "Parsi Literature": "27.41", "BoolQA": "71.60", "Reading Comprehension": "19.70", "PartExpert": "35.92", "MMLU Pro": "22.20", "Iranian Social Norms": "61.83", "Precision": "BF16", "Model sha": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", "Hub License": "llama3.1"}
|
34 |
-
{"Model": "Meta-Llama-3-8B", "model_name_for_query": "meta-llama/Meta-Llama-3-8B", "GeneralKnowledge": "47.70", "GSM8K": "10.30", "DC-Homograph": "41.67", "MC-Homograph": "74.42", "PiQA": "64.16", "Proverb-Quiz": "37.30", "VerbEval": "39.46", "Winogrande": "55.36", "Arc-Challenge": "62.07", "Arc-Easy": "75.83", "Feqh": "27.43", "halluc_final": "37.00", "P-Hellaswag": "76.49", "Law": "35.67", "AUT Multiple Choice": "42.50", "Parsi Literature": "28.19", "BoolQA": "75.20", "Reading Comprehension": "19.50", "PartExpert": "35.10", "MMLU Pro": "22.80", "Iranian Social Norms": "54.02", "Precision": "BF16", "Model sha": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", "Hub License": "llama3"}
|
35 |
-
{"Model": "Dorna-Llama3-8B-Instruct", "model_name_for_query": "PartAI/Dorna-Llama3-8B-Instruct", "GeneralKnowledge": "41.33", "GSM8K": "10.30", "DC-Homograph": "40.74", "MC-Homograph": "74.65", "PiQA": "66.17", "Proverb-Quiz": "35.41", "VerbEval": "34.74", "Winogrande": "56.16", "Arc-Challenge": "59.94", "Arc-Easy": "70.70", "Feqh": "29.14", "halluc_final": "31.49", "P-Hellaswag": "75.68", "Law": "25.33", "AUT Multiple Choice": "36.90", "Parsi Literature": "27.54", "BoolQA": "80.10", "Reading Comprehension": "21.80", "PartExpert": "34.49", "MMLU Pro": "22.00", "Iranian Social Norms": "69.39", "Precision": "BF16", "Model sha": "fb268bb51b950b4db5b7c82c1b73d9e803020eed", "Hub License": "llama3"}
|
36 |
-
{"Model": "gemma-2-2b-it", "model_name_for_query": "google/gemma-2-2b-it", "GeneralKnowledge": "32.91", "GSM8K": "6.40", "DC-Homograph": "47.22", "MC-Homograph": "74.65", "PiQA": "66.87", "Proverb-Quiz": "45.68", "VerbEval": "36.18", "Winogrande": "54.74", "Arc-Challenge": "57.91", "Arc-Easy": "70.48", "Feqh": "25.71", "halluc_final": "39.02", "P-Hellaswag": "69.88", "Law": "32.67", "AUT Multiple Choice": "36.90", "Parsi Literature": "30.76", "BoolQA": "72.40", "Reading Comprehension": "0.30", "PartExpert": "31.31", "MMLU Pro": "18.20", "Iranian Social Norms": "40.18", "Precision": "BF16", "Model sha": "299a8560bedf22ed1c72a8a11e7dce4a7f9f51f8", "Hub License": "gemma"}
|
37 |
-
{"Model": "PersianMind-v1.0", "model_name_for_query": "universitytehran/PersianMind-v1.0", "GeneralKnowledge": "30.61", "GSM8K": "2.30", "DC-Homograph": "41.67", "MC-Homograph": "65.90", "PiQA": "59.76", "Proverb-Quiz": "34.32", "VerbEval": "26.26", "Winogrande": "52.17", "Arc-Challenge": "54.59", "Arc-Easy": "69.73", "Feqh": "26.29", "halluc_final": "2.37", "P-Hellaswag": "63.78", "Law": "27.33", "AUT Multiple Choice": "36.10", "Parsi Literature": "27.80", "BoolQA": "66.30", "Reading Comprehension": "-", "PartExpert": "29.75", "MMLU Pro": "14.50", "Iranian Social Norms": "48.41", "Precision": "F32", "Model sha": "af603eeb074138e2a613fbc95d89f018afbd3041", "Hub License": "cc-by-nc-sa-4.0"}
|
38 |
-
{"Model": "gemma-3-1b-it", "model_name_for_query": "google/gemma-3-1b-it", "GeneralKnowledge": "26.02", "GSM8K": "4.30", "DC-Homograph": "49.07", "MC-Homograph": "51.15", "PiQA": "57.66", "Proverb-Quiz": "28.92", "VerbEval": "27.67", "Winogrande": "50.58", "Arc-Challenge": "36.43", "Arc-Easy": "46.10", "Feqh": "28.00", "halluc_final": "54.94", "P-Hellaswag": "63.92", "Law": "20.33", "AUT Multiple Choice": "29.10", "Parsi Literature": "24.97", "BoolQA": "63.90", "Reading Comprehension": "2.10", "PartExpert": "27.22", "MMLU Pro": "13.70", "Iranian Social Norms": "51.22", "Precision": "BF16", "Model sha": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", "Hub License": "gemma"}
|
39 |
-
{"Model": "Llama-3.2-1B-Instruct", "model_name_for_query": "meta-llama/Llama-3.2-1B-Instruct", "GeneralKnowledge": "29.59", "GSM8K": "4.10", "DC-Homograph": "50.93", "MC-Homograph": "52.53", "PiQA": "54.05", "Proverb-Quiz": "28.65", "VerbEval": "26.11", "Winogrande": "49.07", "Arc-Challenge": "37.50", "Arc-Easy": "47.38", "Feqh": "31.43", "halluc_final": "3.34", "P-Hellaswag": "55.40", "Law": "24.00", "AUT Multiple Choice": "29.90", "Parsi Literature": "27.03", "BoolQA": "64.10", "Reading Comprehension": "7.20", "PartExpert": "28.59", "MMLU Pro": "15.70", "Iranian Social Norms": "37.44", "Precision": "BF16", "Model sha": "9213176726f574b556790deb65791e0c5aa438b6", "Hub License": "llama3.2"}
|
40 |
-
{"Model": "Maral-7B-alpha-1", "model_name_for_query": "MaralGPT/Maral-7B-alpha-1", "GeneralKnowledge": "31.63", "GSM8K": "6.10", "DC-Homograph": "43.52", "MC-Homograph": "47.47", "PiQA": "51.95", "Proverb-Quiz": "22.16", "VerbEval": "28.96", "Winogrande": "49.42", "Arc-Challenge": "37.29", "Arc-Easy": "43.10", "Feqh": "26.29", "halluc_final": "0.00", "P-Hellaswag": "60.18", "Law": "26.33", "AUT Multiple Choice": "28.40", "Parsi Literature": "26.77", "BoolQA": "62.70", "Reading Comprehension": "10.80", "PartExpert": "27.10", "MMLU Pro": "14.80", "Iranian Social Norms": "24.63", "Precision": "BF16", "Model sha": "2ab5ca2a0d1a4454a78b4ca911e595bb9da2fe2f", "Hub License": "mit"}
|
|
|
1 |
+
{"Model": "gpt-4o-2024-08-06", "model_name_for_query": null, "GeneralKnowledge": "90.82", "GSM8K": "73.10", "DC-Homograph": "87.04", "MC-Homograph": "95.62", "PiQA": "95.10", "Proverb-Quiz": "96.76", "VerbEval": "85.89", "Winogrande": "86.18", "Arc-Challenge": "95.09", "Arc-Easy": "97.22", "Feqh": "46.86", "halluc_final": "74.64", "P-Hellaswag": "85.53", "Law": "47.67", "AUT Multiple Choice": "67.70", "Parsi Literature": "45.95", "BoolQA": "94.10", "Reading Comprehension": "10.00", "PartExpert": "57.36", "MMLU Pro": "47.10", "Iranian Social Norms": "76.89", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
2 |
+
{"Model": "gpt-4.1-2025-04-14", "model_name_for_query": null, "GeneralKnowledge": "90.82", "GSM8K": "25.30", "DC-Homograph": "89.81", "MC-Homograph": "95.39", "PiQA": "95.90", "Proverb-Quiz": "95.14", "VerbEval": "83.04", "Winogrande": "85.92", "Arc-Challenge": "95.30", "Arc-Easy": "96.68", "Feqh": "52.00", "halluc_final": "77.43", "P-Hellaswag": "85.67", "Law": "53.67", "AUT Multiple Choice": "66.60", "Parsi Literature": "45.82", "BoolQA": "94.70", "Reading Comprehension": "3.60", "PartExpert": "59.92", "MMLU Pro": "50.50", "Iranian Social Norms": "77.56", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
3 |
+
{"Model": "google__gemini-2.0-flash-001", "model_name_for_query": null, "GeneralKnowledge": "87.76", "GSM8K": "53.70", "DC-Homograph": "79.63", "MC-Homograph": "91.71", "PiQA": "90.59", "Proverb-Quiz": "95.14", "VerbEval": "85.15", "Winogrande": "78.74", "Arc-Challenge": "91.35", "Arc-Easy": "97.22", "Feqh": "53.14", "halluc_final": "68.87", "P-Hellaswag": "82.95", "Law": "45.67", "AUT Multiple Choice": "60.90", "Parsi Literature": "44.02", "BoolQA": "91.30", "Reading Comprehension": "23.90", "PartExpert": "59.50", "MMLU Pro": "47.80", "Iranian Social Norms": "77.68", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
4 |
+
{"Model": "deepseek-v3-03-24", "model_name_for_query": null, "GeneralKnowledge": "85.71", "GSM8K": "53.10", "DC-Homograph": "83.33", "MC-Homograph": "94.24", "PiQA": "91.39", "Proverb-Quiz": "84.86", "VerbEval": "81.11", "Winogrande": "76.71", "Arc-Challenge": "92.31", "Arc-Easy": "96.58", "Feqh": "42.29", "halluc_final": "55.54", "P-Hellaswag": "85.30", "Law": "46.00", "AUT Multiple Choice": "65.60", "Parsi Literature": "44.66", "BoolQA": "95.30", "Reading Comprehension": "20.70", "PartExpert": "58.46", "MMLU Pro": "53.40", "Iranian Social Norms": "71.71", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
5 |
+
{"Model": "gpt-4.1-mini-2025-04-14", "model_name_for_query": null, "GeneralKnowledge": "79.34", "GSM8K": "60.30", "DC-Homograph": "66.67", "MC-Homograph": "94.24", "PiQA": "92.69", "Proverb-Quiz": "82.97", "VerbEval": "77.99", "Winogrande": "80.07", "Arc-Challenge": "91.88", "Arc-Easy": "96.15", "Feqh": "37.71", "halluc_final": "66.55", "P-Hellaswag": "84.57", "Law": "44.33", "AUT Multiple Choice": "53.50", "Parsi Literature": "41.18", "BoolQA": "93.70", "Reading Comprehension": "7.70", "PartExpert": "54.37", "MMLU Pro": "47.80", "Iranian Social Norms": "73.35", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
6 |
+
{"Model": "gpt-4o-mini-2024-07-18", "model_name_for_query": null, "GeneralKnowledge": "79.08", "GSM8K": "60.90", "DC-Homograph": "68.52", "MC-Homograph": "90.09", "PiQA": "90.89", "Proverb-Quiz": "84.05", "VerbEval": "74.23", "Winogrande": "75.73", "Arc-Challenge": "86.43", "Arc-Easy": "94.01", "Feqh": "41.71", "halluc_final": "82.04", "P-Hellaswag": "83.84", "Law": "34.00", "AUT Multiple Choice": "54.80", "Parsi Literature": "40.93", "BoolQA": "93.30", "Reading Comprehension": "20.50", "PartExpert": "42.54", "MMLU Pro": "34.80", "Iranian Social Norms": "71.59", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
7 |
+
{"Model": "google__gemini-2.0-flash-lite-001", "model_name_for_query": null, "GeneralKnowledge": "84.18", "GSM8K": "39.70", "DC-Homograph": "60.19", "MC-Homograph": "87.79", "PiQA": "85.29", "Proverb-Quiz": "91.35", "VerbEval": "81.39", "Winogrande": "75.64", "Arc-Challenge": "89.64", "Arc-Easy": "93.48", "Feqh": "41.71", "halluc_final": "67.32", "P-Hellaswag": "83.54", "Law": "43.00", "AUT Multiple Choice": "58.50", "Parsi Literature": "43.89", "BoolQA": "92.60", "Reading Comprehension": "26.70", "PartExpert": "54.15", "MMLU Pro": "41.20", "Iranian Social Norms": "70.49", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
8 |
+
{"Model": "Qwen2.5-32B-Instruct", "model_name_for_query": "Qwen/Qwen2.5-32B-Instruct", "GeneralKnowledge": "61.73", "GSM8K": "50.10", "DC-Homograph": "67.59", "MC-Homograph": "91.47", "PiQA": "83.98", "Proverb-Quiz": "63.24", "VerbEval": "54.58", "Winogrande": "80.07", "Arc-Challenge": "85.15", "Arc-Easy": "91.87", "Feqh": "38.86", "halluc_final": "59.22", "P-Hellaswag": "82.07", "Law": "42.33", "AUT Multiple Choice": "50.40", "Parsi Literature": "40.41", "BoolQA": "93.40", "Reading Comprehension": "7.40", "PartExpert": "46.78", "MMLU Pro": "37.40", "Iranian Social Norms": "70.00", "#Params (B)": 32.76, "Precision": "BF16", "Model sha": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", "Hub License": "apache-2.0"}
|
9 |
+
{"Model": "gemma-3-27b-it", "model_name_for_query": "google/gemma-3-27b-it", "GeneralKnowledge": "73.72", "GSM8K": "28.30", "DC-Homograph": "63.89", "MC-Homograph": "92.40", "PiQA": "87.29", "Proverb-Quiz": "78.92", "VerbEval": "66.02", "Winogrande": "78.12", "Arc-Challenge": "88.35", "Arc-Easy": "94.22", "Feqh": "24.57", "halluc_final": "60.15", "P-Hellaswag": "83.39", "Law": "36.33", "AUT Multiple Choice": "55.20", "Parsi Literature": "40.93", "BoolQA": "91.40", "Reading Comprehension": "1.20", "PartExpert": "49.32", "MMLU Pro": "36.60", "Iranian Social Norms": "70.49", "#Params (B)": 27.43, "Precision": "BF16", "Model sha": "005ad3404e59d6023443cb575daa05336842228a", "Hub License": "gemma"}
|
10 |
+
{"Model": "Qwen3-32B", "model_name_for_query": "Qwen/Qwen3-32B", "GeneralKnowledge": "67.60", "GSM8K": "37.90", "DC-Homograph": "51.85", "MC-Homograph": "89.63", "PiQA": "87.69", "Proverb-Quiz": "64.59", "VerbEval": "56.35", "Winogrande": "71.48", "Arc-Challenge": "91.13", "Arc-Easy": "94.22", "Feqh": "29.71", "halluc_final": "47.50", "P-Hellaswag": "83.47", "Law": "37.00", "AUT Multiple Choice": "48.30", "Parsi Literature": "39.12", "BoolQA": "91.10", "Reading Comprehension": "22.00", "PartExpert": "50.06", "MMLU Pro": "42.80", "Iranian Social Norms": "73.48", "#Params (B)": 32.76, "Precision": "BF16", "Model sha": "d47b0d4ae4b48fde975756bf360a63a9cca8d470", "Hub License": "apache-2.0"}
|
11 |
+
{"Model": "QwQ-32B-Preview", "model_name_for_query": "Qwen/QwQ-32B-Preview", "GeneralKnowledge": "63.27", "GSM8K": "34.70", "DC-Homograph": "61.11", "MC-Homograph": "88.25", "PiQA": "81.28", "Proverb-Quiz": "58.11", "VerbEval": "51.97", "Winogrande": "75.64", "Arc-Challenge": "85.58", "Arc-Easy": "91.44", "Feqh": "41.14", "halluc_final": "38.84", "P-Hellaswag": "84.13", "Law": "43.00", "AUT Multiple Choice": "50.60", "Parsi Literature": "39.77", "BoolQA": "88.50", "Reading Comprehension": "23.60", "PartExpert": "47.39", "MMLU Pro": "37.30", "Iranian Social Norms": "72.26", "#Params (B)": 32.76, "Precision": "BF16", "Model sha": "91906fe41a48b6a89ce2970abfd1269eefee170e", "Hub License": "apache-2.0"}
|
12 |
+
{"Model": "gemma-3-12b-it", "model_name_for_query": "google/gemma-3-12b-it", "GeneralKnowledge": "68.37", "GSM8K": "20.20", "DC-Homograph": "67.59", "MC-Homograph": "91.24", "PiQA": "87.19", "Proverb-Quiz": "72.97", "VerbEval": "63.39", "Winogrande": "73.96", "Arc-Challenge": "83.33", "Arc-Easy": "93.26", "Feqh": "25.14", "halluc_final": "46.10", "P-Hellaswag": "83.17", "Law": "36.33", "AUT Multiple Choice": "49.00", "Parsi Literature": "40.03", "BoolQA": "87.60", "Reading Comprehension": "4.50", "PartExpert": "44.12", "MMLU Pro": "32.60", "Iranian Social Norms": "75.55", "#Params (B)": 12.18, "Precision": "BF16", "Model sha": "96b6f1eccf38110c56df3a15bffe176da04bfd80", "Hub License": "gemma"}
|
13 |
+
{"Model": "gemma-2-27b-it", "model_name_for_query": "google/gemma-2-27b-it", "GeneralKnowledge": "68.11", "GSM8K": "26.70", "DC-Homograph": "60.19", "MC-Homograph": "91.24", "PiQA": "89.69", "Proverb-Quiz": "73.51", "VerbEval": "61.16", "Winogrande": "76.44", "Arc-Challenge": "86.75", "Arc-Easy": "94.22", "Feqh": "24.00", "halluc_final": "13.05", "P-Hellaswag": "83.69", "Law": "34.67", "AUT Multiple Choice": "50.80", "Parsi Literature": "35.91", "BoolQA": "89.80", "Reading Comprehension": "0.10", "PartExpert": "46.60", "MMLU Pro": "36.90", "Iranian Social Norms": "77.38", "#Params (B)": 27.22, "Precision": "BF16", "Model sha": "aaf20e6b9f4c0fcf043f6fb2a2068419086d77b0", "Hub License": "gemma"}
|
14 |
+
{"Model": "aya-expanse-32b", "model_name_for_query": "CohereLabs/aya-expanse-32b", "GeneralKnowledge": "73.72", "GSM8K": "17.50", "DC-Homograph": "62.96", "MC-Homograph": "87.56", "PiQA": "91.19", "Proverb-Quiz": "77.03", "VerbEval": "61.95", "Winogrande": "70.50", "Arc-Challenge": "85.15", "Arc-Easy": "93.37", "Feqh": "37.14", "halluc_final": "44.84", "P-Hellaswag": "81.70", "Law": "38.67", "AUT Multiple Choice": "54.70", "Parsi Literature": "34.75", "BoolQA": "89.70", "Reading Comprehension": "24.90", "PartExpert": "44.29", "MMLU Pro": "32.10", "Iranian Social Norms": "74.94", "#Params (B)": 32.29, "Precision": "F16", "Model sha": "94bda1dcb97d260f732d230b832c7c685ae91e23", "Hub License": "cc-by-nc-4.0"}
|
15 |
+
{"Model": "QwQ-32B", "model_name_for_query": "Qwen/QwQ-32B", "GeneralKnowledge": "60.71", "GSM8K": "29.30", "DC-Homograph": "58.33", "MC-Homograph": "88.25", "PiQA": "81.68", "Proverb-Quiz": "59.19", "VerbEval": "52.31", "Winogrande": "73.07", "Arc-Challenge": "84.94", "Arc-Easy": "90.80", "Feqh": "41.71", "halluc_final": "48.93", "P-Hellaswag": "82.22", "Law": "38.00", "AUT Multiple Choice": "49.30", "Parsi Literature": "37.71", "BoolQA": "88.50", "Reading Comprehension": "17.80", "PartExpert": "46.75", "MMLU Pro": "39.00", "Iranian Social Norms": "70.73", "#Params (B)": 32.76, "Precision": "BF16", "Model sha": "976055f8c83f394f35dbd3ab09a285a984907bd0", "Hub License": "apache-2.0"}
|
16 |
+
{"Model": "gpt-4.1-nano-2025-04-14", "model_name_for_query": null, "GeneralKnowledge": "68.11", "GSM8K": "58.40", "DC-Homograph": "49.07", "MC-Homograph": "78.11", "PiQA": "84.58", "Proverb-Quiz": "67.84", "VerbEval": "66.21", "Winogrande": "60.32", "Arc-Challenge": "81.41", "Arc-Easy": "91.55", "Feqh": "32.00", "halluc_final": "51.24", "P-Hellaswag": "77.96", "Law": "32.67", "AUT Multiple Choice": "46.10", "Parsi Literature": "36.42", "BoolQA": "81.70", "Reading Comprehension": "6.30", "PartExpert": "42.49", "MMLU Pro": "29.90", "Iranian Social Norms": "74.76", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
|
17 |
+
{"Model": "Qwen3-14B", "model_name_for_query": "Qwen/Qwen3-14B", "GeneralKnowledge": "56.38", "GSM8K": "31.10", "DC-Homograph": "55.56", "MC-Homograph": "87.56", "PiQA": "77.18", "Proverb-Quiz": "53.78", "VerbEval": "54.36", "Winogrande": "67.32", "Arc-Challenge": "84.29", "Arc-Easy": "91.02", "Feqh": "29.14", "halluc_final": "44.54", "P-Hellaswag": "80.97", "Law": "34.67", "AUT Multiple Choice": "44.80", "Parsi Literature": "35.39", "BoolQA": "87.60", "Reading Comprehension": "24.40", "PartExpert": "43.22", "MMLU Pro": "35.50", "Iranian Social Norms": "74.51", "#Params (B)": 14.76, "Precision": "BF16", "Model sha": "8268fe3026cb304910457689366670e803a6fd56", "Hub License": "apache-2.0"}
|
18 |
+
{"Model": "gemma-2-9b-it", "model_name_for_query": "google/gemma-2-9b-it", "GeneralKnowledge": "64.03", "GSM8K": "17.40", "DC-Homograph": "59.26", "MC-Homograph": "90.55", "PiQA": "87.09", "Proverb-Quiz": "69.19", "VerbEval": "58.25", "Winogrande": "72.01", "Arc-Challenge": "84.29", "Arc-Easy": "93.16", "Feqh": "29.71", "halluc_final": "50.58", "P-Hellaswag": "80.82", "Law": "33.67", "AUT Multiple Choice": "48.50", "Parsi Literature": "38.10", "BoolQA": "89.70", "Reading Comprehension": "0.10", "PartExpert": "43.03", "MMLU Pro": "33.20", "Iranian Social Norms": "73.84", "#Params (B)": 9.24, "Precision": "BF16", "Model sha": "11c9b309abf73637e4b6f9a3fa1e92e615547819", "Hub License": "gemma"}
|
19 |
+
{"Model": "Qwen3-30B-A3B", "model_name_for_query": "Qwen/Qwen3-30B-A3B", "GeneralKnowledge": "65.05", "GSM8K": "28.80", "DC-Homograph": "57.41", "MC-Homograph": "86.41", "PiQA": "72.47", "Proverb-Quiz": "50.81", "VerbEval": "48.09", "Winogrande": "65.28", "Arc-Challenge": "87.39", "Arc-Easy": "93.58", "Feqh": "23.43", "halluc_final": "3.54", "P-Hellaswag": "83.10", "Law": "35.33", "AUT Multiple Choice": "48.00", "Parsi Literature": "36.55", "BoolQA": "86.20", "Reading Comprehension": "26.40", "PartExpert": "41.13", "MMLU Pro": "36.30", "Iranian Social Norms": "44.21", "#Params (B)": 30.53, "Precision": "BF16", "Model sha": "ae659febe817e4b3ebd7355f47792725801204c9", "Hub License": "apache-2.0"}
|
20 |
+
{"Model": "aya-23-35B", "model_name_for_query": "CohereLabs/aya-23-35B", "GeneralKnowledge": "63.27", "GSM8K": "10.00", "DC-Homograph": "55.56", "MC-Homograph": "83.64", "PiQA": "89.49", "Proverb-Quiz": "67.03", "VerbEval": "47.32", "Winogrande": "65.81", "Arc-Challenge": "77.56", "Arc-Easy": "90.16", "Feqh": "30.29", "halluc_final": "11.72", "P-Hellaswag": "79.87", "Law": "32.00", "AUT Multiple Choice": "48.70", "Parsi Literature": "31.92", "BoolQA": "86.20", "Reading Comprehension": "23.70", "PartExpert": "37.44", "MMLU Pro": "24.10", "Iranian Social Norms": "65.00", "#Params (B)": 34.98, "Precision": "F16", "Model sha": "5e72bd5ad83e5e1612ee7f56a0c1a439a7cfb887", "Hub License": "cc-by-nc-4.0"}
|
21 |
+
{"Model": "Qwen3-8B", "model_name_for_query": "Qwen/Qwen3-8B", "GeneralKnowledge": "49.23", "GSM8K": "25.70", "DC-Homograph": "50.93", "MC-Homograph": "82.95", "PiQA": "75.98", "Proverb-Quiz": "51.89", "VerbEval": "47.93", "Winogrande": "61.91", "Arc-Challenge": "80.24", "Arc-Easy": "87.38", "Feqh": "28.00", "halluc_final": "38.46", "P-Hellaswag": "80.38", "Law": "29.67", "AUT Multiple Choice": "46.00", "Parsi Literature": "33.20", "BoolQA": "86.40", "Reading Comprehension": "25.00", "PartExpert": "38.31", "MMLU Pro": "31.10", "Iranian Social Norms": "63.41", "#Params (B)": 8.19, "Precision": "BF16", "Model sha": "9c925d64d72725edaf899c6cb9c377fd0709d9c5", "Hub License": "apache-2.0"}
|
22 |
+
{"Model": "aya-expanse-8b", "model_name_for_query": "CohereLabs/aya-expanse-8b", "GeneralKnowledge": "58.67", "GSM8K": "9.80", "DC-Homograph": "51.85", "MC-Homograph": "80.65", "PiQA": "80.18", "Proverb-Quiz": "60.00", "VerbEval": "48.06", "Winogrande": "64.04", "Arc-Challenge": "71.47", "Arc-Easy": "84.60", "Feqh": "29.71", "halluc_final": "23.52", "P-Hellaswag": "76.49", "Law": "32.33", "AUT Multiple Choice": "45.80", "Parsi Literature": "34.49", "BoolQA": "82.30", "Reading Comprehension": "20.10", "PartExpert": "35.56", "MMLU Pro": "21.90", "Iranian Social Norms": "71.71", "#Params (B)": 8.02, "Precision": "F16", "Model sha": "0ad43ec1e309e1351faa4b1d22713c065e37359a", "Hub License": "cc-by-nc-4.0"}
|
23 |
+
{"Model": "Hormoz-8B", "model_name_for_query": "mann-e/Hormoz-8B", "GeneralKnowledge": "58.42", "GSM8K": "10.00", "DC-Homograph": "50.93", "MC-Homograph": "80.65", "PiQA": "80.68", "Proverb-Quiz": "60.27", "VerbEval": "47.29", "Winogrande": "64.39", "Arc-Challenge": "70.41", "Arc-Easy": "84.28", "Feqh": "28.57", "halluc_final": "23.66", "P-Hellaswag": "76.05", "Law": "30.33", "AUT Multiple Choice": "46.70", "Parsi Literature": "33.08", "BoolQA": "79.80", "Reading Comprehension": "19.60", "PartExpert": "35.68", "MMLU Pro": "21.50", "Iranian Social Norms": "70.30", "#Params (B)": 8.02, "Precision": "F32", "Model sha": "c91bcecb236c90523f70db7efa23dd794e9b4cff", "Hub License": "mit"}
|
24 |
+
{"Model": "Llama-3.1-8B-Instruct", "model_name_for_query": "meta-llama/Llama-3.1-8B-Instruct", "GeneralKnowledge": "52.55", "GSM8K": "12.00", "DC-Homograph": "43.52", "MC-Homograph": "79.03", "PiQA": "70.07", "Proverb-Quiz": "47.57", "VerbEval": "42.91", "Winogrande": "54.21", "Arc-Challenge": "68.91", "Arc-Easy": "80.11", "Feqh": "29.71", "halluc_final": "6.76", "P-Hellaswag": "79.79", "Law": "32.67", "AUT Multiple Choice": "44.90", "Parsi Literature": "32.30", "BoolQA": "82.70", "Reading Comprehension": "24.50", "PartExpert": "37.62", "MMLU Pro": "25.70", "Iranian Social Norms": "70.98", "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "0e9e39f249a16976918f6564b8830bc894c89659", "Hub License": "llama3.1"}
|
25 |
+
{"Model": "Qwen2.5-7B-Instruct", "model_name_for_query": "Qwen/Qwen2.5-7B-Instruct", "GeneralKnowledge": "51.02", "GSM8K": "18.00", "DC-Homograph": "52.78", "MC-Homograph": "79.26", "PiQA": "71.07", "Proverb-Quiz": "47.84", "VerbEval": "44.44", "Winogrande": "61.91", "Arc-Challenge": "72.33", "Arc-Easy": "81.50", "Feqh": "36.57", "halluc_final": "34.89", "P-Hellaswag": "74.80", "Law": "32.33", "AUT Multiple Choice": "42.60", "Parsi Literature": "31.27", "BoolQA": "82.50", "Reading Comprehension": "17.60", "PartExpert": "37.24", "MMLU Pro": "26.70", "Iranian Social Norms": "64.51", "#Params (B)": 7.61, "Precision": "BF16", "Model sha": "a09a35458c702b33eeacc393d103063234e8bc28", "Hub License": "apache-2.0"}
|
26 |
+
{"Model": "aya-23-8B", "model_name_for_query": "CohereLabs/aya-23-8B", "GeneralKnowledge": "52.30", "GSM8K": "6.10", "DC-Homograph": "52.78", "MC-Homograph": "76.27", "PiQA": "80.78", "Proverb-Quiz": "44.32", "VerbEval": "39.30", "Winogrande": "57.13", "Arc-Challenge": "63.68", "Arc-Easy": "81.39", "Feqh": "29.14", "halluc_final": "0.60", "P-Hellaswag": "75.83", "Law": "28.33", "AUT Multiple Choice": "42.90", "Parsi Literature": "31.27", "BoolQA": "72.30", "Reading Comprehension": "23.40", "PartExpert": "33.33", "MMLU Pro": "19.90", "Iranian Social Norms": "70.73", "#Params (B)": 8.02, "Precision": "F16", "Model sha": "2a1a63b24af8f591616fdf58936ee576d63ca835", "Hub License": "cc-by-nc-4.0"}
|
27 |
+
{"Model": "Qwen2-7B-Instruct", "model_name_for_query": "Qwen/Qwen2-7B-Instruct", "GeneralKnowledge": "52.04", "GSM8K": "14.50", "DC-Homograph": "54.63", "MC-Homograph": "72.81", "PiQA": "70.97", "Proverb-Quiz": "50.54", "VerbEval": "40.62", "Winogrande": "60.94", "Arc-Challenge": "69.12", "Arc-Easy": "80.75", "Feqh": "28.00", "halluc_final": "25.93", "P-Hellaswag": "76.71", "Law": "28.33", "AUT Multiple Choice": "40.40", "Parsi Literature": "31.40", "BoolQA": "79.00", "Reading Comprehension": "10.90", "PartExpert": "36.31", "MMLU Pro": "23.80", "Iranian Social Norms": "62.20", "#Params (B)": 7.61, "Precision": "BF16", "Model sha": "f2826a00ceef68f0f2b946d945ecc0477ce4450c", "Hub License": "apache-2.0"}
|
28 |
+
{"Model": "Meta-Llama-3-8B-Instruct", "model_name_for_query": "meta-llama/Meta-Llama-3-8B-Instruct", "GeneralKnowledge": "52.04", "GSM8K": "10.40", "DC-Homograph": "41.67", "MC-Homograph": "81.11", "PiQA": "70.97", "Proverb-Quiz": "42.97", "VerbEval": "38.93", "Winogrande": "56.95", "Arc-Challenge": "66.77", "Arc-Easy": "76.47", "Feqh": "33.71", "halluc_final": "33.23", "P-Hellaswag": "76.71", "Law": "32.00", "AUT Multiple Choice": "45.00", "Parsi Literature": "29.99", "BoolQA": "82.50", "Reading Comprehension": "19.40", "PartExpert": "36.30", "MMLU Pro": "26.00", "Iranian Social Norms": "70.06", "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "5f0b02c75b57c5855da9ae460ce51323ea669d8a", "Hub License": "llama3"}
|
29 |
+
{"Model": "gemma-3-4b-it", "model_name_for_query": "google/gemma-3-4b-it", "GeneralKnowledge": "45.92", "GSM8K": "9.60", "DC-Homograph": "42.59", "MC-Homograph": "72.58", "PiQA": "72.77", "Proverb-Quiz": "53.78", "VerbEval": "45.30", "Winogrande": "55.09", "Arc-Challenge": "63.46", "Arc-Easy": "79.57", "Feqh": "21.14", "halluc_final": "46.04", "P-Hellaswag": "73.84", "Law": "27.67", "AUT Multiple Choice": "42.50", "Parsi Literature": "30.24", "BoolQA": "78.60", "Reading Comprehension": "5.50", "PartExpert": "34.70", "MMLU Pro": "22.80", "Iranian Social Norms": "65.55", "#Params (B)": 4.3, "Precision": "BF16", "Model sha": "093f9f388b31de276ce2de164bdc2081324b9767", "Hub License": "gemma"}
|
30 |
+
{"Model": "Qwen3-4B", "model_name_for_query": "Qwen/Qwen3-4B", "GeneralKnowledge": "43.88", "GSM8K": "20.10", "DC-Homograph": "38.89", "MC-Homograph": "76.27", "PiQA": "66.07", "Proverb-Quiz": "45.41", "VerbEval": "41.23", "Winogrande": "54.56", "Arc-Challenge": "73.61", "Arc-Easy": "83.42", "Feqh": "30.29", "halluc_final": "25.29", "P-Hellaswag": "78.03", "Law": "30.33", "AUT Multiple Choice": "40.60", "Parsi Literature": "31.79", "BoolQA": "81.90", "Reading Comprehension": "21.30", "PartExpert": "37.28", "MMLU Pro": "28.90", "Iranian Social Norms": "68.72", "#Params (B)": 4.02, "Precision": "BF16", "Model sha": "531c80e289d6cff3a7cd8c0db8110231d23a6f7a", "Hub License": "apache-2.0"}
|
31 |
+
{"Model": "Hermes-3-Llama-3.1-8B", "model_name_for_query": "NousResearch/Hermes-3-Llama-3.1-8B", "GeneralKnowledge": "49.49", "GSM8K": "10.20", "DC-Homograph": "44.44", "MC-Homograph": "79.72", "PiQA": "70.37", "Proverb-Quiz": "47.84", "VerbEval": "48.94", "Winogrande": "55.18", "Arc-Challenge": "65.28", "Arc-Easy": "78.07", "Feqh": "30.29", "halluc_final": "45.20", "P-Hellaswag": "73.99", "Law": "31.67", "AUT Multiple Choice": "42.10", "Parsi Literature": "30.63", "BoolQA": "83.50", "Reading Comprehension": "13.50", "PartExpert": "35.61", "MMLU Pro": "24.10", "Iranian Social Norms": "54.88", "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "896ea440e5a9e6070e3d8a2774daf2b481ab425b", "Hub License": "llama3"}
|
32 |
+
{"Model": "Dorna2-Llama3.1-8B-Instruct", "model_name_for_query": "PartAI/Dorna2-Llama3.1-8B-Instruct", "GeneralKnowledge": "48.72", "GSM8K": "11.90", "DC-Homograph": "44.44", "MC-Homograph": "72.81", "PiQA": "69.97", "Proverb-Quiz": "42.97", "VerbEval": "42.06", "Winogrande": "54.47", "Arc-Challenge": "67.63", "Arc-Easy": "78.72", "Feqh": "33.71", "halluc_final": "33.91", "P-Hellaswag": "78.91", "Law": "29.67", "AUT Multiple Choice": "41.00", "Parsi Literature": "27.28", "BoolQA": "81.80", "Reading Comprehension": "21.90", "PartExpert": "35.65", "MMLU Pro": "22.70", "Iranian Social Norms": "49.82", "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "b78e4bd261100c96e511ed5090ca0ce0e1f4b340", "Hub License": "llama3.1"}
|
33 |
+
{"Model": "Llama-3.1-8B", "model_name_for_query": "meta-llama/Llama-3.1-8B", "GeneralKnowledge": "49.23", "GSM8K": "10.80", "DC-Homograph": "46.30", "MC-Homograph": "72.12", "PiQA": "66.47", "Proverb-Quiz": "35.95", "VerbEval": "39.91", "Winogrande": "54.92", "Arc-Challenge": "63.35", "Arc-Easy": "75.08", "Feqh": "30.86", "halluc_final": "45.02", "P-Hellaswag": "76.34", "Law": "33.00", "AUT Multiple Choice": "42.60", "Parsi Literature": "27.41", "BoolQA": "71.60", "Reading Comprehension": "19.70", "PartExpert": "35.92", "MMLU Pro": "22.20", "Iranian Social Norms": "61.83", "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", "Hub License": "llama3.1"}
|
34 |
+
{"Model": "Meta-Llama-3-8B", "model_name_for_query": "meta-llama/Meta-Llama-3-8B", "GeneralKnowledge": "47.70", "GSM8K": "10.30", "DC-Homograph": "41.67", "MC-Homograph": "74.42", "PiQA": "64.16", "Proverb-Quiz": "37.30", "VerbEval": "39.46", "Winogrande": "55.36", "Arc-Challenge": "62.07", "Arc-Easy": "75.83", "Feqh": "27.43", "halluc_final": "37.00", "P-Hellaswag": "76.49", "Law": "35.67", "AUT Multiple Choice": "42.50", "Parsi Literature": "28.19", "BoolQA": "75.20", "Reading Comprehension": "19.50", "PartExpert": "35.10", "MMLU Pro": "22.80", "Iranian Social Norms": "54.02", "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", "Hub License": "llama3"}
|
35 |
+
{"Model": "Dorna-Llama3-8B-Instruct", "model_name_for_query": "PartAI/Dorna-Llama3-8B-Instruct", "GeneralKnowledge": "41.33", "GSM8K": "10.30", "DC-Homograph": "40.74", "MC-Homograph": "74.65", "PiQA": "66.17", "Proverb-Quiz": "35.41", "VerbEval": "34.74", "Winogrande": "56.16", "Arc-Challenge": "59.94", "Arc-Easy": "70.70", "Feqh": "29.14", "halluc_final": "31.49", "P-Hellaswag": "75.68", "Law": "25.33", "AUT Multiple Choice": "36.90", "Parsi Literature": "27.54", "BoolQA": "80.10", "Reading Comprehension": "21.80", "PartExpert": "34.49", "MMLU Pro": "22.00", "Iranian Social Norms": "69.39", "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "fb268bb51b950b4db5b7c82c1b73d9e803020eed", "Hub License": "llama3"}
|
36 |
+
{"Model": "gemma-2-2b-it", "model_name_for_query": "google/gemma-2-2b-it", "GeneralKnowledge": "32.91", "GSM8K": "6.40", "DC-Homograph": "47.22", "MC-Homograph": "74.65", "PiQA": "66.87", "Proverb-Quiz": "45.68", "VerbEval": "36.18", "Winogrande": "54.74", "Arc-Challenge": "57.91", "Arc-Easy": "70.48", "Feqh": "25.71", "halluc_final": "39.02", "P-Hellaswag": "69.88", "Law": "32.67", "AUT Multiple Choice": "36.90", "Parsi Literature": "30.76", "BoolQA": "72.40", "Reading Comprehension": "0.30", "PartExpert": "31.31", "MMLU Pro": "18.20", "Iranian Social Norms": "40.18", "#Params (B)": 2.61, "Precision": "BF16", "Model sha": "299a8560bedf22ed1c72a8a11e7dce4a7f9f51f8", "Hub License": "gemma"}
|
37 |
+
{"Model": "PersianMind-v1.0", "model_name_for_query": "universitytehran/PersianMind-v1.0", "GeneralKnowledge": "30.61", "GSM8K": "2.30", "DC-Homograph": "41.67", "MC-Homograph": "65.90", "PiQA": "59.76", "Proverb-Quiz": "34.32", "VerbEval": "26.26", "Winogrande": "52.17", "Arc-Challenge": "54.59", "Arc-Easy": "69.73", "Feqh": "26.29", "halluc_final": "2.37", "P-Hellaswag": "63.78", "Law": "27.33", "AUT Multiple Choice": "36.10", "Parsi Literature": "27.80", "BoolQA": "66.30", "Reading Comprehension": "-", "PartExpert": "29.75", "MMLU Pro": "14.50", "Iranian Social Norms": "48.41", "#Params (B)": 0.0, "Precision": "F32", "Model sha": "af603eeb074138e2a613fbc95d89f018afbd3041", "Hub License": "cc-by-nc-sa-4.0"}
|
38 |
+
{"Model": "gemma-3-1b-it", "model_name_for_query": "google/gemma-3-1b-it", "GeneralKnowledge": "26.02", "GSM8K": "4.30", "DC-Homograph": "49.07", "MC-Homograph": "51.15", "PiQA": "57.66", "Proverb-Quiz": "28.92", "VerbEval": "27.67", "Winogrande": "50.58", "Arc-Challenge": "36.43", "Arc-Easy": "46.10", "Feqh": "28.00", "halluc_final": "54.94", "P-Hellaswag": "63.92", "Law": "20.33", "AUT Multiple Choice": "29.10", "Parsi Literature": "24.97", "BoolQA": "63.90", "Reading Comprehension": "2.10", "PartExpert": "27.22", "MMLU Pro": "13.70", "Iranian Social Norms": "51.22", "#Params (B)": 0.99, "Precision": "BF16", "Model sha": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", "Hub License": "gemma"}
|
39 |
+
{"Model": "Llama-3.2-1B-Instruct", "model_name_for_query": "meta-llama/Llama-3.2-1B-Instruct", "GeneralKnowledge": "29.59", "GSM8K": "4.10", "DC-Homograph": "50.93", "MC-Homograph": "52.53", "PiQA": "54.05", "Proverb-Quiz": "28.65", "VerbEval": "26.11", "Winogrande": "49.07", "Arc-Challenge": "37.50", "Arc-Easy": "47.38", "Feqh": "31.43", "halluc_final": "3.34", "P-Hellaswag": "55.40", "Law": "24.00", "AUT Multiple Choice": "29.90", "Parsi Literature": "27.03", "BoolQA": "64.10", "Reading Comprehension": "7.20", "PartExpert": "28.59", "MMLU Pro": "15.70", "Iranian Social Norms": "37.44", "#Params (B)": 1.23, "Precision": "BF16", "Model sha": "9213176726f574b556790deb65791e0c5aa438b6", "Hub License": "llama3.2"}
|
40 |
+
{"Model": "Maral-7B-alpha-1", "model_name_for_query": "MaralGPT/Maral-7B-alpha-1", "GeneralKnowledge": "31.63", "GSM8K": "6.10", "DC-Homograph": "43.52", "MC-Homograph": "47.47", "PiQA": "51.95", "Proverb-Quiz": "22.16", "VerbEval": "28.96", "Winogrande": "49.42", "Arc-Challenge": "37.29", "Arc-Easy": "43.10", "Feqh": "26.29", "halluc_final": "0.00", "P-Hellaswag": "60.18", "Law": "26.33", "AUT Multiple Choice": "28.40", "Parsi Literature": "26.77", "BoolQA": "62.70", "Reading Comprehension": "10.80", "PartExpert": "27.10", "MMLU Pro": "14.80", "Iranian Social Norms": "24.63", "#Params (B)": 7.24, "Precision": "BF16", "Model sha": "2ab5ca2a0d1a4454a78b4ca911e595bb9da2fe2f", "Hub License": "mit"}
|