mehran
update gpt-oss resutls
9221caa
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8606070195,"sts_FarSICK_corrcoef":0.8606070195,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7143086066}
{"Model Name":"gpt-oss:20b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8438423833,"sts_FarSICK_corrcoef":0.8438423833,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.628506628}
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8471466571,"sts_FarSICK_corrcoef":0.8471466571,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6241793507}
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8332013424,"sts_FarSICK_corrcoef":0.8332013424,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6297634971}
{"Model Name":"gpt-5-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8521163575,"sts_FarSICK_corrcoef":0.8521163575,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7144353486}
{"Model Name":"gpt-5-nano","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8404353896,"sts_FarSICK_corrcoef":0.8404353896,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6749652797}
{"Model Name":"gpt-oss:120b","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"20000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8412365875,"sts_FarSICK_corrcoef":0.8412365875,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6458443785}
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8602460091,"sts_FarSICK_corrcoef":0.8602460091,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6552152029}
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8624442565,"sts_FarSICK_corrcoef":0.8624442565,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6758278127}
{"Model Name":"o4-mini","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":null,"sts_FarSICK_corrcoef":null,"sts_FarSICK_valid_output_ratio":null,"nlu_score":null}
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8767598269,"sts_FarSICK_corrcoef":0.8767598269,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.699116864}
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8796836219,"sts_FarSICK_corrcoef":0.8796836219,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6898261633}
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8509006434,"sts_FarSICK_corrcoef":0.8509006434,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6460328733}
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8550824218,"sts_FarSICK_corrcoef":0.8550824218,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6714091535}
{"Model Name":"llama4:scout","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"109000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0982656524,"sts_FarSICK_corrcoef":0.9633887492,"sts_FarSICK_valid_output_ratio":0.102,"nlu_score":0.4086928082}
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.2533632205,"sts_FarSICK_corrcoef":0.8617796616,"sts_FarSICK_valid_output_ratio":0.294,"nlu_score":0.3749414991}
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8346099969,"sts_FarSICK_corrcoef":0.8346099969,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.5661558794}
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8020636156,"sts_FarSICK_corrcoef":0.8020636156,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.456845738}
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8641781993,"sts_FarSICK_corrcoef":0.8641781993,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6752949557}
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8363152655,"sts_FarSICK_corrcoef":0.8430597434,"sts_FarSICK_valid_output_ratio":0.992,"nlu_score":0.5121418762}
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.6678492429,"sts_FarSICK_corrcoef":0.6913553239,"sts_FarSICK_valid_output_ratio":0.966,"nlu_score":0.3619547874}
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.0,"nlu_score":0.3928685253}
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8274969834,"sts_FarSICK_corrcoef":0.8274969834,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6800109206}
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.86471356,"sts_FarSICK_corrcoef":0.86471356,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6833497104}
{"Model Name":"o3","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8663758584,"sts_FarSICK_corrcoef":0.8663758584,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7207167537}
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8671704383,"sts_FarSICK_corrcoef":0.8671704383,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6459120734}
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.058,"nlu_score":0.4824528512}
{"Model Name":"gemini-2.0-flash","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8492628764,"sts_FarSICK_corrcoef":0.8492628764,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7050532433}
{"Model Name":"gemini-2.5-flash","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8367188896,"sts_FarSICK_corrcoef":0.8367188896,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6944128198}
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8461251715,"sts_FarSICK_corrcoef":0.8461251715,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6914202844}
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8375953381,"sts_FarSICK_corrcoef":0.8375953381,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.531045981}
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8720703866,"sts_FarSICK_corrcoef":0.8720703866,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6262096694}
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8497629768,"sts_FarSICK_corrcoef":0.8497629768,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.5968415875}
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.5531047251,"sts_FarSICK_corrcoef":0.8039312865,"sts_FarSICK_valid_output_ratio":0.688,"nlu_score":0.3916645306}
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8612153956,"sts_FarSICK_corrcoef":0.8612153956,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7146808531}
{"Model Name":"deepseek-reasoner","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8547994421,"sts_FarSICK_corrcoef":0.8547994421,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6361186163}
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8643540763,"sts_FarSICK_corrcoef":0.8643540763,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6255818412}
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.0,"nlu_score":0.1368924446}
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.018,"nlu_score":0.046805056}
{"Model Name":"gemini-2.5-pro","thinking_method":"βœ”οΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8357730413,"sts_FarSICK_corrcoef":0.8357730413,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6992555201}