mizan-llm-leaderboard / leaderboard /boards_data /keyword-extraction_SynKeywords.jsonl
mehran
update gpt-oss resutls
9221caa
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2115068728,"keyword-extraction_SynKeywords_precision_mean":0.1912410205,"keyword-extraction_SynKeywords_recall_mean":0.2483695652,"nlu_score":0.7143086066}
{"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1297217026,"keyword-extraction_SynKeywords_precision_mean":0.1052290945,"keyword-extraction_SynKeywords_recall_mean":0.1816123188,"nlu_score":0.628506628}
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.229921048,"keyword-extraction_SynKeywords_precision_mean":0.21147343,"keyword-extraction_SynKeywords_recall_mean":0.2634963768,"nlu_score":0.6241793507}
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0860842686,"keyword-extraction_SynKeywords_precision_mean":0.0757882818,"keyword-extraction_SynKeywords_recall_mean":0.1065217391,"nlu_score":0.6297634971}
{"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1547160783,"keyword-extraction_SynKeywords_precision_mean":0.1275089966,"keyword-extraction_SynKeywords_recall_mean":0.2111413043,"nlu_score":0.7144353486}
{"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1186662307,"keyword-extraction_SynKeywords_precision_mean":0.1013265485,"keyword-extraction_SynKeywords_recall_mean":0.1581521739,"nlu_score":0.6749652797}
{"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1543389439,"keyword-extraction_SynKeywords_precision_mean":0.1301371778,"keyword-extraction_SynKeywords_recall_mean":0.2038949275,"nlu_score":0.6458443785}
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2645652392,"keyword-extraction_SynKeywords_precision_mean":0.2349391249,"keyword-extraction_SynKeywords_recall_mean":0.3166666667,"nlu_score":0.6552152029}
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1217550899,"keyword-extraction_SynKeywords_precision_mean":0.1020894964,"keyword-extraction_SynKeywords_recall_mean":0.1608695652,"nlu_score":0.6758278127}
{"Model Name":"o4-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":null,"keyword-extraction_SynKeywords_precision_mean":null,"keyword-extraction_SynKeywords_recall_mean":null,"nlu_score":null}
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.3352048805,"keyword-extraction_SynKeywords_precision_mean":0.2914121808,"keyword-extraction_SynKeywords_recall_mean":0.4166666667,"nlu_score":0.699116864}
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2384077673,"keyword-extraction_SynKeywords_precision_mean":0.2041836259,"keyword-extraction_SynKeywords_recall_mean":0.3015398551,"nlu_score":0.6898261633}
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.233766167,"keyword-extraction_SynKeywords_precision_mean":0.1893302534,"keyword-extraction_SynKeywords_recall_mean":0.3297101449,"nlu_score":0.6460328733}
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1369232983,"keyword-extraction_SynKeywords_precision_mean":0.1117212542,"keyword-extraction_SynKeywords_recall_mean":0.1863224638,"nlu_score":0.6714091535}
{"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0169533238,"keyword-extraction_SynKeywords_precision_mean":0.015422274,"keyword-extraction_SynKeywords_recall_mean":0.0206521739,"nlu_score":0.4086928082}
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2568096145,"keyword-extraction_SynKeywords_precision_mean":0.2483731877,"keyword-extraction_SynKeywords_recall_mean":0.2765873016,"nlu_score":0.3749414991}
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1942845429,"keyword-extraction_SynKeywords_precision_mean":0.168197784,"keyword-extraction_SynKeywords_recall_mean":0.2451992754,"nlu_score":0.5661558794}
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1409784417,"keyword-extraction_SynKeywords_precision_mean":0.1216706248,"keyword-extraction_SynKeywords_recall_mean":0.1832427536,"nlu_score":0.456845738}
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.261926093,"keyword-extraction_SynKeywords_precision_mean":0.2173028298,"keyword-extraction_SynKeywords_recall_mean":0.3492753623,"nlu_score":0.6752949557}
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2175605056,"keyword-extraction_SynKeywords_precision_mean":0.1768294437,"keyword-extraction_SynKeywords_recall_mean":0.3029891304,"nlu_score":0.5121418762}
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2097414246,"keyword-extraction_SynKeywords_precision_mean":0.1802822781,"keyword-extraction_SynKeywords_recall_mean":0.2621376812,"nlu_score":0.3619547874}
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2199224821,"keyword-extraction_SynKeywords_precision_mean":0.1924904051,"keyword-extraction_SynKeywords_recall_mean":0.2695652174,"nlu_score":0.3928685253}
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2876907753,"keyword-extraction_SynKeywords_precision_mean":0.2733133111,"keyword-extraction_SynKeywords_recall_mean":0.322192029,"nlu_score":0.6800109206}
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1856116909,"keyword-extraction_SynKeywords_precision_mean":0.157770465,"keyword-extraction_SynKeywords_recall_mean":0.2412137681,"nlu_score":0.6833497104}
{"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1555238683,"keyword-extraction_SynKeywords_precision_mean":0.1317069998,"keyword-extraction_SynKeywords_recall_mean":0.2076992754,"nlu_score":0.7207167537}
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2783400189,"keyword-extraction_SynKeywords_precision_mean":0.2250927598,"keyword-extraction_SynKeywords_recall_mean":0.3842391304,"nlu_score":0.6459120734}
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1901147976,"keyword-extraction_SynKeywords_precision_mean":0.1676428493,"keyword-extraction_SynKeywords_recall_mean":0.2307065217,"nlu_score":0.4824528512}
{"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.205802897,"keyword-extraction_SynKeywords_precision_mean":0.1860666658,"keyword-extraction_SynKeywords_recall_mean":0.2421195652,"nlu_score":0.7050532433}
{"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1986622723,"keyword-extraction_SynKeywords_precision_mean":0.1812999953,"keyword-extraction_SynKeywords_recall_mean":0.2295289855,"nlu_score":0.6944128198}
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1470867235,"keyword-extraction_SynKeywords_precision_mean":0.1387418439,"keyword-extraction_SynKeywords_recall_mean":0.1666666667,"nlu_score":0.6914202844}
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0012077295,"keyword-extraction_SynKeywords_precision_mean":0.0013586957,"keyword-extraction_SynKeywords_recall_mean":0.0010869565,"nlu_score":0.531045981}
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1690961,"keyword-extraction_SynKeywords_precision_mean":0.1495665943,"keyword-extraction_SynKeywords_recall_mean":0.2049818841,"nlu_score":0.6262096694}
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2080704644,"keyword-extraction_SynKeywords_precision_mean":0.1673321849,"keyword-extraction_SynKeywords_recall_mean":0.294384058,"nlu_score":0.5968415875}
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2510623051,"keyword-extraction_SynKeywords_precision_mean":0.1899292026,"keyword-extraction_SynKeywords_recall_mean":0.4099637681,"nlu_score":0.3916645306}
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2160808904,"keyword-extraction_SynKeywords_precision_mean":0.1901842722,"keyword-extraction_SynKeywords_recall_mean":0.2683876812,"nlu_score":0.7146808531}
{"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.3267802104,"keyword-extraction_SynKeywords_precision_mean":0.2985915051,"keyword-extraction_SynKeywords_recall_mean":0.3825181159,"nlu_score":0.6361186163}
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2600489896,"keyword-extraction_SynKeywords_precision_mean":0.2150796745,"keyword-extraction_SynKeywords_recall_mean":0.3497282609,"nlu_score":0.6255818412}
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0224485659,"keyword-extraction_SynKeywords_precision_mean":0.0230331263,"keyword-extraction_SynKeywords_recall_mean":0.022826087,"nlu_score":0.1368924446}
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0,"keyword-extraction_SynKeywords_precision_mean":0.0,"keyword-extraction_SynKeywords_recall_mean":0.0,"nlu_score":0.046805056}
{"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1559734933,"keyword-extraction_SynKeywords_precision_mean":0.1449240072,"keyword-extraction_SynKeywords_recall_mean":0.1766304348,"nlu_score":0.6992555201}