Upload organize_model_results.json with huggingface_hub
Browse files- organize_model_results.json +76 -0
organize_model_results.json
CHANGED
|
@@ -6,11 +6,13 @@
|
|
| 6 |
"Qwen2-Audio-7B-Instruct": 29.187525646286417,
|
| 7 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 24.640951990151827,
|
| 8 |
"phi_4_multimodal_instruct": 26.815757078375054,
|
|
|
|
| 9 |
"WavLLM_fairseq": 39.96717275338531,
|
| 10 |
"SALMONN_7B": 34.222404595814524,
|
| 11 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 39.32704144439885
|
| 12 |
},
|
| 13 |
"gpt4o_judge": {
|
|
|
|
| 14 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 39.462453836684446
|
| 15 |
}
|
| 16 |
},
|
|
@@ -51,6 +53,7 @@
|
|
| 51 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 3.445086705202312
|
| 52 |
},
|
| 53 |
"gpt4o_judge": {
|
|
|
|
| 54 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 4.61271676300578
|
| 55 |
}
|
| 56 |
},
|
|
@@ -81,6 +84,7 @@
|
|
| 81 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.0
|
| 82 |
},
|
| 83 |
"gpt4o_judge": {
|
|
|
|
| 84 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 67.0
|
| 85 |
}
|
| 86 |
},
|
|
@@ -119,6 +123,9 @@
|
|
| 119 |
"WavLLM_fairseq": 44.3133951137321,
|
| 120 |
"SALMONN_7B": 50.88458298230834,
|
| 121 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 56.44481887110362
|
|
|
|
|
|
|
|
|
|
| 122 |
}
|
| 123 |
},
|
| 124 |
"imda_30s_sqa_human_test": {
|
|
@@ -228,6 +235,7 @@
|
|
| 228 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 63.0
|
| 229 |
},
|
| 230 |
"gpt4o_judge": {
|
|
|
|
| 231 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 75.0
|
| 232 |
}
|
| 233 |
},
|
|
@@ -241,6 +249,9 @@
|
|
| 241 |
"WavLLM_fairseq": 59.76095617529881,
|
| 242 |
"SALMONN_7B": 23.804780876494025,
|
| 243 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 46.713147410358566
|
|
|
|
|
|
|
|
|
|
| 244 |
}
|
| 245 |
},
|
| 246 |
"public_sg_speech_qa_test": {
|
|
@@ -256,6 +267,7 @@
|
|
| 256 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.94186046511628
|
| 257 |
},
|
| 258 |
"gpt4o_judge": {
|
|
|
|
| 259 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 73.02325581395348
|
| 260 |
}
|
| 261 |
},
|
|
@@ -292,11 +304,13 @@
|
|
| 292 |
"Qwen2-Audio-7B-Instruct": 64.86264249672958,
|
| 293 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 88.61894972902262,
|
| 294 |
"phi_4_multimodal_instruct": 77.58549803774996,
|
|
|
|
| 295 |
"WavLLM_fairseq": 77.64903756307233,
|
| 296 |
"SALMONN_7B": 66.39506634273968,
|
| 297 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 83.81984675761541
|
| 298 |
},
|
| 299 |
"gpt4o_judge": {
|
|
|
|
| 300 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 90.12521024107643
|
| 301 |
}
|
| 302 |
},
|
|
@@ -326,6 +340,9 @@
|
|
| 326 |
"WavLLM_fairseq": 51.072796934865906,
|
| 327 |
"SALMONN_7B": 41.7624521072797,
|
| 328 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 45.593869731800766
|
|
|
|
|
|
|
|
|
|
| 329 |
}
|
| 330 |
},
|
| 331 |
"imda_part4_30s_sqa_human_test": {
|
|
@@ -341,6 +358,7 @@
|
|
| 341 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 53.8
|
| 342 |
},
|
| 343 |
"gpt4o_judge": {
|
|
|
|
| 344 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 61.4
|
| 345 |
}
|
| 346 |
},
|
|
@@ -354,6 +372,9 @@
|
|
| 354 |
"WavLLM_fairseq": 69.61427985227739,
|
| 355 |
"SALMONN_7B": 88.79770209273697,
|
| 356 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 42.921624948707425
|
|
|
|
|
|
|
|
|
|
| 357 |
}
|
| 358 |
},
|
| 359 |
"imda_gr_dialogue": {
|
|
@@ -367,6 +388,9 @@
|
|
| 367 |
"WavLLM_fairseq": 46.766666666666666,
|
| 368 |
"SALMONN_7B": 42.733333333333334,
|
| 369 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 25.433333333333337
|
|
|
|
|
|
|
|
|
|
| 370 |
}
|
| 371 |
},
|
| 372 |
"imda_30s_ds_human_test": {
|
|
@@ -521,6 +545,7 @@
|
|
| 521 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 37.400000000000006
|
| 522 |
},
|
| 523 |
"gpt4o_judge": {
|
|
|
|
| 524 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 47.400000000000006
|
| 525 |
}
|
| 526 |
},
|
|
@@ -535,6 +560,9 @@
|
|
| 535 |
"WavLLM_fairseq": 0.23333333333333336,
|
| 536 |
"SALMONN_7B": 0.06666666666666667,
|
| 537 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 9.666666666666666
|
|
|
|
|
|
|
|
|
|
| 538 |
}
|
| 539 |
},
|
| 540 |
"iemocap_gender_test": {
|
|
@@ -548,6 +576,9 @@
|
|
| 548 |
"WavLLM_fairseq": 51.932270916334666,
|
| 549 |
"SALMONN_7B": 81.31474103585658,
|
| 550 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 44.22310756972111
|
|
|
|
|
|
|
|
|
|
| 551 |
}
|
| 552 |
},
|
| 553 |
"ytb_asr_batch2": {
|
|
@@ -586,6 +617,9 @@
|
|
| 586 |
"WavLLM_fairseq": 66.31439894319684,
|
| 587 |
"SALMONN_7B": 50.99075297225891,
|
| 588 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 85.2928225451343
|
|
|
|
|
|
|
|
|
|
| 589 |
}
|
| 590 |
},
|
| 591 |
"dream_tts_mcq_test": {
|
|
@@ -599,6 +633,9 @@
|
|
| 599 |
"WavLLM_fairseq": 66.5446941975954,
|
| 600 |
"SALMONN_7B": 56.455828541557764,
|
| 601 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 86.4610559330894
|
|
|
|
|
|
|
|
|
|
| 602 |
}
|
| 603 |
},
|
| 604 |
"imda_part5_30s_ds_human_test": {
|
|
@@ -614,6 +651,7 @@
|
|
| 614 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 49.0
|
| 615 |
},
|
| 616 |
"gpt4o_judge": {
|
|
|
|
| 617 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 56.8
|
| 618 |
}
|
| 619 |
},
|
|
@@ -644,6 +682,7 @@
|
|
| 644 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 49.0
|
| 645 |
},
|
| 646 |
"gpt4o_judge": {
|
|
|
|
| 647 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 52.800000000000004
|
| 648 |
}
|
| 649 |
},
|
|
@@ -660,6 +699,7 @@
|
|
| 660 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.0
|
| 661 |
},
|
| 662 |
"gpt4o_judge": {
|
|
|
|
| 663 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 48.2
|
| 664 |
}
|
| 665 |
},
|
|
@@ -682,6 +722,7 @@
|
|
| 682 |
"Qwen2-Audio-7B-Instruct": 40.77727272727273,
|
| 683 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 3.0954545454545457,
|
| 684 |
"phi_4_multimodal_instruct": 26.386363636363637,
|
|
|
|
| 685 |
"WavLLM_fairseq": 5.5,
|
| 686 |
"SALMONN_7B": 37.445454545454545,
|
| 687 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 2.4727272727272727
|
|
@@ -698,6 +739,7 @@
|
|
| 698 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.07953048457785493
|
| 699 |
},
|
| 700 |
"gpt4o_judge": {
|
|
|
|
| 701 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 4.868181818181818
|
| 702 |
}
|
| 703 |
},
|
|
@@ -712,6 +754,9 @@
|
|
| 712 |
"WavLLM_fairseq": 2.6833333333333336,
|
| 713 |
"SALMONN_7B": 2.5166666666666666,
|
| 714 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 12.416666666666666
|
|
|
|
|
|
|
|
|
|
| 715 |
}
|
| 716 |
},
|
| 717 |
"imda_part6_30s_sqa_test": {
|
|
@@ -748,6 +793,7 @@
|
|
| 748 |
"whisper_large_v3": 1.600581653970121,
|
| 749 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 27.620150160643625,
|
| 750 |
"phi_4_multimodal_instruct": 15.012558278964478,
|
|
|
|
| 751 |
"WavLLM_fairseq": 13.841886973016162,
|
| 752 |
"SALMONN_7B": 14.102682915273142,
|
| 753 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 10.930203684508578
|
|
@@ -766,6 +812,7 @@
|
|
| 766 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 29.47134606841404
|
| 767 |
},
|
| 768 |
"gpt4o_judge": {
|
|
|
|
| 769 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 28.076410484229232
|
| 770 |
}
|
| 771 |
},
|
|
@@ -809,6 +856,7 @@
|
|
| 809 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 70.8
|
| 810 |
},
|
| 811 |
"gpt4o_judge": {
|
|
|
|
| 812 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 77.8
|
| 813 |
}
|
| 814 |
},
|
|
@@ -848,6 +896,7 @@
|
|
| 848 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 17.380191693290733
|
| 849 |
},
|
| 850 |
"gpt4o_judge": {
|
|
|
|
| 851 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 14.63258785942492
|
| 852 |
}
|
| 853 |
},
|
|
@@ -886,6 +935,7 @@
|
|
| 886 |
"whisper_large_v3": 0.02107778621423822,
|
| 887 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 8.433062902024755,
|
| 888 |
"phi_4_multimodal_instruct": 0.19835914151649442,
|
|
|
|
| 889 |
"WavLLM_fairseq": 0.0033159224040994286,
|
| 890 |
"SALMONN_7B": 0.00046745670226766583,
|
| 891 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 1.0368044741318085
|
|
@@ -904,6 +954,7 @@
|
|
| 904 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 16.710526315789473
|
| 905 |
},
|
| 906 |
"gpt4o_judge": {
|
|
|
|
| 907 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 14.736842105263158
|
| 908 |
}
|
| 909 |
},
|
|
@@ -927,6 +978,7 @@
|
|
| 927 |
"whisper_large_v3": 0.16408986541757878,
|
| 928 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 35.274306071307024,
|
| 929 |
"phi_4_multimodal_instruct": 45.295964957544776,
|
|
|
|
| 930 |
"WavLLM_fairseq": 31.96381187282953,
|
| 931 |
"SALMONN_7B": 33.88941292215531,
|
| 932 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 5.987143868370054
|
|
@@ -1051,6 +1103,9 @@
|
|
| 1051 |
"WavLLM_fairseq": 49.06666666666666,
|
| 1052 |
"SALMONN_7B": 59.766666666666666,
|
| 1053 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.016666666666666
|
|
|
|
|
|
|
|
|
|
| 1054 |
}
|
| 1055 |
},
|
| 1056 |
"slue_p2_sqa5_test": {
|
|
@@ -1066,6 +1121,7 @@
|
|
| 1066 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 82.99019607843137
|
| 1067 |
},
|
| 1068 |
"gpt4o_judge": {
|
|
|
|
| 1069 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 87.79411764705883
|
| 1070 |
}
|
| 1071 |
},
|
|
@@ -1094,6 +1150,9 @@
|
|
| 1094 |
"WavLLM_fairseq": 41.57088122605364,
|
| 1095 |
"SALMONN_7B": 30.536398467432953,
|
| 1096 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.81992337164751
|
|
|
|
|
|
|
|
|
|
| 1097 |
}
|
| 1098 |
},
|
| 1099 |
"tedlium3_test": {
|
|
@@ -1150,6 +1209,7 @@
|
|
| 1150 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 57.800000000000004
|
| 1151 |
},
|
| 1152 |
"gpt4o_judge": {
|
|
|
|
| 1153 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.80000000000001
|
| 1154 |
}
|
| 1155 |
},
|
|
@@ -1166,6 +1226,7 @@
|
|
| 1166 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 57.199999999999996
|
| 1167 |
},
|
| 1168 |
"gpt4o_judge": {
|
|
|
|
| 1169 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.4
|
| 1170 |
}
|
| 1171 |
},
|
|
@@ -1218,6 +1279,21 @@
|
|
| 1218 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 63.68000000000001
|
| 1219 |
}
|
| 1220 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1221 |
"imda_30s_ar_test": {
|
| 1222 |
"llama3_70b_judge": {
|
| 1223 |
"Qwen2-Audio-7B-Instruct": 5.106666666666667,
|
|
|
|
| 6 |
"Qwen2-Audio-7B-Instruct": 29.187525646286417,
|
| 7 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 24.640951990151827,
|
| 8 |
"phi_4_multimodal_instruct": 26.815757078375054,
|
| 9 |
+
"seallms_audio_7b": 8.658186294624539,
|
| 10 |
"WavLLM_fairseq": 39.96717275338531,
|
| 11 |
"SALMONN_7B": 34.222404595814524,
|
| 12 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 39.32704144439885
|
| 13 |
},
|
| 14 |
"gpt4o_judge": {
|
| 15 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 14.813295034878948,
|
| 16 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 39.462453836684446
|
| 17 |
}
|
| 18 |
},
|
|
|
|
| 53 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 3.445086705202312
|
| 54 |
},
|
| 55 |
"gpt4o_judge": {
|
| 56 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 31.641618497109825,
|
| 57 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 4.61271676300578
|
| 58 |
}
|
| 59 |
},
|
|
|
|
| 84 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.0
|
| 85 |
},
|
| 86 |
"gpt4o_judge": {
|
| 87 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 71.6,
|
| 88 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 67.0
|
| 89 |
}
|
| 90 |
},
|
|
|
|
| 123 |
"WavLLM_fairseq": 44.3133951137321,
|
| 124 |
"SALMONN_7B": 50.88458298230834,
|
| 125 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 56.44481887110362
|
| 126 |
+
},
|
| 127 |
+
"gpt4o_judge": {
|
| 128 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 57.87700084245998
|
| 129 |
}
|
| 130 |
},
|
| 131 |
"imda_30s_sqa_human_test": {
|
|
|
|
| 235 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 63.0
|
| 236 |
},
|
| 237 |
"gpt4o_judge": {
|
| 238 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 74.2,
|
| 239 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 75.0
|
| 240 |
}
|
| 241 |
},
|
|
|
|
| 249 |
"WavLLM_fairseq": 59.76095617529881,
|
| 250 |
"SALMONN_7B": 23.804780876494025,
|
| 251 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 46.713147410358566
|
| 252 |
+
},
|
| 253 |
+
"gpt4o_judge": {
|
| 254 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 37.45019920318725
|
| 255 |
}
|
| 256 |
},
|
| 257 |
"public_sg_speech_qa_test": {
|
|
|
|
| 267 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.94186046511628
|
| 268 |
},
|
| 269 |
"gpt4o_judge": {
|
| 270 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.18604651162791,
|
| 271 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 73.02325581395348
|
| 272 |
}
|
| 273 |
},
|
|
|
|
| 304 |
"Qwen2-Audio-7B-Instruct": 64.86264249672958,
|
| 305 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 88.61894972902262,
|
| 306 |
"phi_4_multimodal_instruct": 77.58549803774996,
|
| 307 |
+
"seallms_audio_7b": 67.73313399364605,
|
| 308 |
"WavLLM_fairseq": 77.64903756307233,
|
| 309 |
"SALMONN_7B": 66.39506634273968,
|
| 310 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 83.81984675761541
|
| 311 |
},
|
| 312 |
"gpt4o_judge": {
|
| 313 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 74.99159035694262,
|
| 314 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 90.12521024107643
|
| 315 |
}
|
| 316 |
},
|
|
|
|
| 340 |
"WavLLM_fairseq": 51.072796934865906,
|
| 341 |
"SALMONN_7B": 41.7624521072797,
|
| 342 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 45.593869731800766
|
| 343 |
+
},
|
| 344 |
+
"gpt4o_judge": {
|
| 345 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 47.356321839080465
|
| 346 |
}
|
| 347 |
},
|
| 348 |
"imda_part4_30s_sqa_human_test": {
|
|
|
|
| 358 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 53.8
|
| 359 |
},
|
| 360 |
"gpt4o_judge": {
|
| 361 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 60.0,
|
| 362 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 61.4
|
| 363 |
}
|
| 364 |
},
|
|
|
|
| 372 |
"WavLLM_fairseq": 69.61427985227739,
|
| 373 |
"SALMONN_7B": 88.79770209273697,
|
| 374 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 42.921624948707425
|
| 375 |
+
},
|
| 376 |
+
"gpt4o_judge": {
|
| 377 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 99.46655724251129
|
| 378 |
}
|
| 379 |
},
|
| 380 |
"imda_gr_dialogue": {
|
|
|
|
| 388 |
"WavLLM_fairseq": 46.766666666666666,
|
| 389 |
"SALMONN_7B": 42.733333333333334,
|
| 390 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 25.433333333333337
|
| 391 |
+
},
|
| 392 |
+
"gpt4o_judge": {
|
| 393 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 93.86666666666666
|
| 394 |
}
|
| 395 |
},
|
| 396 |
"imda_30s_ds_human_test": {
|
|
|
|
| 545 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 37.400000000000006
|
| 546 |
},
|
| 547 |
"gpt4o_judge": {
|
| 548 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 59.2,
|
| 549 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 47.400000000000006
|
| 550 |
}
|
| 551 |
},
|
|
|
|
| 560 |
"WavLLM_fairseq": 0.23333333333333336,
|
| 561 |
"SALMONN_7B": 0.06666666666666667,
|
| 562 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 9.666666666666666
|
| 563 |
+
},
|
| 564 |
+
"gpt4o_judge": {
|
| 565 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 77.13333333333333
|
| 566 |
}
|
| 567 |
},
|
| 568 |
"iemocap_gender_test": {
|
|
|
|
| 576 |
"WavLLM_fairseq": 51.932270916334666,
|
| 577 |
"SALMONN_7B": 81.31474103585658,
|
| 578 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 44.22310756972111
|
| 579 |
+
},
|
| 580 |
+
"gpt4o_judge": {
|
| 581 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 98.20717131474103
|
| 582 |
}
|
| 583 |
},
|
| 584 |
"ytb_asr_batch2": {
|
|
|
|
| 617 |
"WavLLM_fairseq": 66.31439894319684,
|
| 618 |
"SALMONN_7B": 50.99075297225891,
|
| 619 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 85.2928225451343
|
| 620 |
+
},
|
| 621 |
+
"gpt4o_judge": {
|
| 622 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 88.77146631439894
|
| 623 |
}
|
| 624 |
},
|
| 625 |
"dream_tts_mcq_test": {
|
|
|
|
| 633 |
"WavLLM_fairseq": 66.5446941975954,
|
| 634 |
"SALMONN_7B": 56.455828541557764,
|
| 635 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 86.4610559330894
|
| 636 |
+
},
|
| 637 |
+
"gpt4o_judge": {
|
| 638 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 84.31782540512285
|
| 639 |
}
|
| 640 |
},
|
| 641 |
"imda_part5_30s_ds_human_test": {
|
|
|
|
| 651 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 49.0
|
| 652 |
},
|
| 653 |
"gpt4o_judge": {
|
| 654 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.0,
|
| 655 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 56.8
|
| 656 |
}
|
| 657 |
},
|
|
|
|
| 682 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 49.0
|
| 683 |
},
|
| 684 |
"gpt4o_judge": {
|
| 685 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 60.599999999999994,
|
| 686 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 52.800000000000004
|
| 687 |
}
|
| 688 |
},
|
|
|
|
| 699 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.0
|
| 700 |
},
|
| 701 |
"gpt4o_judge": {
|
| 702 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 55.199999999999996,
|
| 703 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 48.2
|
| 704 |
}
|
| 705 |
},
|
|
|
|
| 722 |
"Qwen2-Audio-7B-Instruct": 40.77727272727273,
|
| 723 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 3.0954545454545457,
|
| 724 |
"phi_4_multimodal_instruct": 26.386363636363637,
|
| 725 |
+
"seallms_audio_7b": 53.20909090909091,
|
| 726 |
"WavLLM_fairseq": 5.5,
|
| 727 |
"SALMONN_7B": 37.445454545454545,
|
| 728 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 2.4727272727272727
|
|
|
|
| 739 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.07953048457785493
|
| 740 |
},
|
| 741 |
"gpt4o_judge": {
|
| 742 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 39.29545454545455,
|
| 743 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 4.868181818181818
|
| 744 |
}
|
| 745 |
},
|
|
|
|
| 754 |
"WavLLM_fairseq": 2.6833333333333336,
|
| 755 |
"SALMONN_7B": 2.5166666666666666,
|
| 756 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 12.416666666666666
|
| 757 |
+
},
|
| 758 |
+
"gpt4o_judge": {
|
| 759 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 10.116666666666667
|
| 760 |
}
|
| 761 |
},
|
| 762 |
"imda_part6_30s_sqa_test": {
|
|
|
|
| 793 |
"whisper_large_v3": 1.600581653970121,
|
| 794 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 27.620150160643625,
|
| 795 |
"phi_4_multimodal_instruct": 15.012558278964478,
|
| 796 |
+
"seallms_audio_7b": 27.583542512329426,
|
| 797 |
"WavLLM_fairseq": 13.841886973016162,
|
| 798 |
"SALMONN_7B": 14.102682915273142,
|
| 799 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 10.930203684508578
|
|
|
|
| 812 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 29.47134606841404
|
| 813 |
},
|
| 814 |
"gpt4o_judge": {
|
| 815 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.09333981526495,
|
| 816 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 28.076410484229232
|
| 817 |
}
|
| 818 |
},
|
|
|
|
| 856 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 70.8
|
| 857 |
},
|
| 858 |
"gpt4o_judge": {
|
| 859 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 78.60000000000001,
|
| 860 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 77.8
|
| 861 |
}
|
| 862 |
},
|
|
|
|
| 896 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 17.380191693290733
|
| 897 |
},
|
| 898 |
"gpt4o_judge": {
|
| 899 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 50.60702875399361,
|
| 900 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 14.63258785942492
|
| 901 |
}
|
| 902 |
},
|
|
|
|
| 935 |
"whisper_large_v3": 0.02107778621423822,
|
| 936 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 8.433062902024755,
|
| 937 |
"phi_4_multimodal_instruct": 0.19835914151649442,
|
| 938 |
+
"seallms_audio_7b": 0.012334972259958572,
|
| 939 |
"WavLLM_fairseq": 0.0033159224040994286,
|
| 940 |
"SALMONN_7B": 0.00046745670226766583,
|
| 941 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 1.0368044741318085
|
|
|
|
| 954 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 16.710526315789473
|
| 955 |
},
|
| 956 |
"gpt4o_judge": {
|
| 957 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 46.31578947368421,
|
| 958 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 14.736842105263158
|
| 959 |
}
|
| 960 |
},
|
|
|
|
| 978 |
"whisper_large_v3": 0.16408986541757878,
|
| 979 |
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 35.274306071307024,
|
| 980 |
"phi_4_multimodal_instruct": 45.295964957544776,
|
| 981 |
+
"seallms_audio_7b": 36.4496678966979,
|
| 982 |
"WavLLM_fairseq": 31.96381187282953,
|
| 983 |
"SALMONN_7B": 33.88941292215531,
|
| 984 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 5.987143868370054
|
|
|
|
| 1103 |
"WavLLM_fairseq": 49.06666666666666,
|
| 1104 |
"SALMONN_7B": 59.766666666666666,
|
| 1105 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.016666666666666
|
| 1106 |
+
},
|
| 1107 |
+
"gpt4o_judge": {
|
| 1108 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 66.9
|
| 1109 |
}
|
| 1110 |
},
|
| 1111 |
"slue_p2_sqa5_test": {
|
|
|
|
| 1121 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 82.99019607843137
|
| 1122 |
},
|
| 1123 |
"gpt4o_judge": {
|
| 1124 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 88.23529411764707,
|
| 1125 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 87.79411764705883
|
| 1126 |
}
|
| 1127 |
},
|
|
|
|
| 1150 |
"WavLLM_fairseq": 41.57088122605364,
|
| 1151 |
"SALMONN_7B": 30.536398467432953,
|
| 1152 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.81992337164751
|
| 1153 |
+
},
|
| 1154 |
+
"gpt4o_judge": {
|
| 1155 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 36.206896551724135
|
| 1156 |
}
|
| 1157 |
},
|
| 1158 |
"tedlium3_test": {
|
|
|
|
| 1209 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 57.800000000000004
|
| 1210 |
},
|
| 1211 |
"gpt4o_judge": {
|
| 1212 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 66.8,
|
| 1213 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.80000000000001
|
| 1214 |
}
|
| 1215 |
},
|
|
|
|
| 1226 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 57.199999999999996
|
| 1227 |
},
|
| 1228 |
"gpt4o_judge": {
|
| 1229 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 70.0,
|
| 1230 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.4
|
| 1231 |
}
|
| 1232 |
},
|
|
|
|
| 1279 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 63.68000000000001
|
| 1280 |
}
|
| 1281 |
},
|
| 1282 |
+
"gigaspeech2_viet": {
|
| 1283 |
+
"wer": {
|
| 1284 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.4960741822016732
|
| 1285 |
+
}
|
| 1286 |
+
},
|
| 1287 |
+
"gigaspeech2_thai": {
|
| 1288 |
+
"wer": {
|
| 1289 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.8901628256099774
|
| 1290 |
+
}
|
| 1291 |
+
},
|
| 1292 |
+
"gigaspeech2_indo": {
|
| 1293 |
+
"wer": {
|
| 1294 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5087211232500294
|
| 1295 |
+
}
|
| 1296 |
+
},
|
| 1297 |
"imda_30s_ar_test": {
|
| 1298 |
"llama3_70b_judge": {
|
| 1299 |
"Qwen2-Audio-7B-Instruct": 5.106666666666667,
|