Spaces:
Running
Running
File size: 33,672 Bytes
977c84d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
Task Name,Metric,HigherBetter,Whisper-LLaMA,LTU-AS,SALMONN-7B,SALMONN-13B,Qwen-Audio-Chat,Qwen2-Audio-7B-Instruct,WavLLM,MU-LLaMA,GAMA-IT,Taxonomy
ChordClassification_AcousticGuitarAndPiano,LLM-C,TRUE,4.00%,10.00%,0.50%,9.00%,44.00%,28.50%,44.50%,51.00%,0.00%,Audio/Harmony & Pitch/Harmony
MARBLEKeyDetection_Giantstepskey,LLM-C,TRUE,3.00%,0.50%,1.50%,1.00%,3.00%,17.00%,2.00%,0.50%,0.00%,Audio/Harmony & Pitch/Harmony
HEARMusicTranscription_MAESTRO-5hr,X,FALSE,92.43%,100.00%,99.46%,100.00%,96.76%,96.76%,96.76%,100.00%,100.00%,Audio/Harmony & Pitch/Pitch
HEARMusicTranscription_MAESTRO-5hr,X,FALSE,1.0065,-,36.0000,-,8.3438,1.6875,6.25,-,-,Audio/Harmony & Pitch/Pitch
HEARPercussionInstrumentsTonicClassification_MridangamTonic,LLM-C,TRUE,2.00%,2.50%,12.70%,13.40%,11.30%,11.80%,12.80%,12.50%,5.70%,Audio/Harmony & Pitch/Pitch
InstrumentPitchClassification_Nsynth,LLM-C,TRUE,0.56%,0.56%,0.11%,0.22%,0.67%,18.11%,0.78%,0.33%,0.78%,Audio/Harmony & Pitch/Pitch
PitchExtractionByLyrics_CSD,NAR,FALSE,37.50%,94.00%,47.50%,73.50%,65.50%,84.50%,76.00%,98.00%,68.50%,Audio/Harmony & Pitch/Pitch
PitchExtractionByLyrics_CSD,TER,FALSE,146.28%,116.67%,226.21%,639.24%,106.45%,165.96%,136.11%,100.00%,195.18%,Audio/Harmony & Pitch/Pitch
EmotionClassificaitonInSongs_EMOTIFY,LLM-C,TRUE,10.00%,0.00%,1.67%,3.33%,13.33%,8.33%,8.33%,1.67%,0.00%,Audio/Music Classification/Emotion
MARBLEEmotionDetection_MTGMoodTheme,LLM-C,TRUE,0.00%,1.50%,0.00%,0.00%,2.70%,0.80%,0.80%,0.80%,0.10%,Audio/Music Classification/Emotion
HEARMusicGenreClassification_ISMIR04,LLM-C,TRUE,15.58%,7.04%,31.16%,44.22%,33.67%,37.69%,16.58%,27.64%,2.51%,Audio/Music Classification/Genre
MARBLEGenreClassification_MTG-Genre,LLM-C,TRUE,0.00%,1.60%,0.10%,0.20%,3.20%,0.10%,0.20%,0.20%,0.00%,Audio/Music Classification/Genre
MARBLEMusicTagging_MagnaTagATune,LLM-C,TRUE,5.50%,1.00%,3.50%,9.00%,4.50%,1.50%,1.00%,3.50%,7.50%,Audio/Music Classification/Genre
MARBLEMusicTagging_MTGTop50,LLM-C,TRUE,0.10%,0.30%,0.00%,0.00%,2.90%,0.30%,0.10%,0.50%,0.10%,Audio/Music Classification/Genre
MusicGenreClassification_FMA,LLM-C,TRUE,10.71%,1.79%,9.82%,16.07%,8.04%,15.18%,9.82%,9.82%,4.46%,Audio/Music Classification/Genre
HEARPercussionInstrumentsClassification_BeijingOperaPercussion,LLM-C,TRUE,3.39%,4.24%,10.17%,20.76%,16.10%,27.12%,24.15%,22.03%,11.44%,Audio/Music Classification/Instrument
HEARPercussionInstrumentsStrokeClassification_MridangamStroke,LLM-C,TRUE,0.10%,3.80%,7.60%,4.90%,11.50%,10.80%,13.70%,10.30%,0.10%,Audio/Music Classification/Instrument
InstrumentClassification_Nsynth,LLM-C,TRUE,1.23%,9.56%,13.97%,14.58%,17.89%,54.41%,11.89%,6.99%,10.17%,Audio/Music Classification/Instrument
InstrumentCombinationRecognition-OpenMIC-2018,LLM-C,TRUE,4.20%,13.51%,17.72%,24.02%,25.83%,24.92%,20.42%,19.52%,8.11%,Audio/Music Classification/Instrument
InstrumentSourceClassification_Nsynth,LLM-C,TRUE,7.22%,28.33%,37.33%,43.56%,32.67%,35.22%,30.89%,38.00%,35.89%,Audio/Music Classification/Instrument
MARBLEInstrumentClassification_MTGInstrument,LLM-C,TRUE,0.40%,2.30%,0.00%,0.00%,6.40%,1.40%,0.20%,0.60%,0.20%,Audio/Music Classification/Instrument
SingingAutomaticMOSPrediction_SingMOS,NAR,FALSE,99.50%,67.72%,0.33%,11.15%,17.30%,0.00%,22.96%,97.34%,98.34%,Audio/Quality Assessment/Singing
SingingAutomaticMOSPrediction_SingMOS,MSE,FALSE,12.17,2.3872,10.0777,11.2308,14.8895,1.4485,11.421,4.53,2.08,Audio/Quality Assessment/Singing
SingingAutomaticMOSPrediction_SingMOS,KTAU,TRUE,-0.3333,0.0126,-0.1105,0.0166,0.0712,0.0099,0.0103,0.1705,0.3539,Audio/Quality Assessment/Singing
SingingAutomaticMOSPrediction_SingMOS,LCC,TRUE,-0.6747,-0.0706,-0.1354,0.036,0.0824,0.0255,-0.0216,0.1866,0.3846,Audio/Quality Assessment/Singing
SingingAutomaticMOSPrediction_SingMOS,SRCC,TRUE,-0.5,0.0183,-0.1325,0.0231,0.0872,0.0124,0.0118,0.2281,0.4247,Audio/Quality Assessment/Singing
MARBLEBeatTracking_ASAP_MAESTRO,X,FALSE,100.00%,100.00%,100.00%,100.00%,100.00%,100.00%,100.00%,100.00%,100.00%,Audio/Rhythm Analysis
MARBLEBeatTracking_ASAP_MAESTRO,X,FALSE,-,-,-,-,-,-,-,-,-,Audio/Rhythm Analysis
MusicBeatTracking_ASAP,NAR,FALSE,54.55%,74.55%,51.82%,63.64%,62.73%,40.91%,47.27%,80.00%,84.55%,Audio/Rhythm Analysis
MusicBeatTracking_ASAP,Miss Time,FALSE,135.6077,183.6786,118.4047,77.9687,157.5917,76.0904,111.7759,240,93.8751,Audio/Rhythm Analysis
AudioEditingIdentification_PeoplesSpeech,NAR,FALSE,84.38%,87.50%,62.50%,75.00%,75.00%,68.75%,96.88%,65.62%,37.50%,Audio/Safety/Audio Integrity
AudioEditingIdentification_PeoplesSpeech,ACC,TRUE,204.3094,53.6938,165.0617,43.1693,184.695,154.6202,100,164.0558,154.8122,Audio/Safety/Audio Integrity
SceneFakeDetection_SceneFake_ASPIRE,LLM-C,TRUE,22.00%,46.00%,48.00%,37.00%,49.00%,54.00%,49.00%,20.00%,41.00%,Audio/Safety/Audio Integrity
AudioDeepFakeDetection_LJSpeech_WaveFake_MUSDB18HQ,LLM-C,TRUE,8.25%,24.56%,22.79%,38.31%,31.43%,27.90%,30.84%,39.10%,25.54%,Audio/Safety/Deepfake
SingingVoiceDeepfakeDetection_CtrSVDD_ACEKiSing_M4Singer,LLM-C,TRUE,5.56%,22.06%,22.06%,26.19%,24.44%,21.43%,9.21%,20.95%,9.84%,Audio/Safety/Deepfake
AudioDurationPrediction_NTUML2021,NAR,FALSE,33.00%,37.00%,10.50%,55.00%,0.00%,0.00%,0.50%,46.50%,14.00%,Audio/Signal Characteristics Analysis
AudioDurationPrediction_NTUML2021,MSE,FALSE,13.3657,39.1111,31652.7877,2216.7111,28.985,62.515,3581.0352,1527.785,62.7674,Audio/Signal Characteristics Analysis
HEARMusicSpeechClassification_MAESTRO_Librispeech,LLM-C,TRUE,72.50%,77.50%,91.50%,99.50%,98.00%,89.50%,59.50%,51.50%,88.00%,Audio/Signal Characteristics Analysis
SoundEffectDetection_RemFx,LLM-C,TRUE,2.00%,6.50%,14.17%,17.67%,15.83%,19.83%,17.50%,17.00%,15.83%,Audio/Signal Characteristics Analysis
SpeechDetection_LibriSpeech-TestClean,LLM-C,TRUE,48.00%,29.00%,46.00%,66.00%,45.50%,56.50%,35.00%,46.50%,38.50%,Audio/Signal Characteristics Analysis
SpeechDetection_LibriSpeech-TestOther,LLM-C,TRUE,52.00%,31.50%,46.00%,66.50%,44.50%,55.50%,37.00%,45.50%,36.00%,Audio/Signal Characteristics Analysis
SpeechDetection_LJSpeech,LLM-C,TRUE,54.00%,44.50%,60.50%,74.00%,57.00%,45.00%,53.00%,49.00%,55.50%,Audio/Signal Characteristics Analysis
Children_Song_Transcript_Verification_CSD,WER,FALSE,58.68%,102.68%,104.02%,100.99%,128.67%,94.93%,155.48%,100.00%,100.12%,Audio/Singing Analysis/Lyrics
LyricTranslation_SingSet,Sacre Bleu,TRUE,2.1093,1.0357,2.8126,0.9941,3.3975,3.9155,1.8969,0.1593,0.0739,Audio/Singing Analysis/Lyrics
SongLyricRecognition_SingSet,MER,FALSE,101.93%,164.53%,356.36%,266.00%,118.41%,115.62%,481.13%,117.53%,168.52%,Audio/Singing Analysis/Lyrics
MARBLEVocalTechniqueDetection_VocalSet,LLM-C,TRUE,8.00%,1.00%,11.00%,6.50%,6.50%,15.50%,8.00%,8.50%,2.50%,Audio/Singing Analysis/Vocal Techniques
AudioSegmentRetrieval_Clotho,NAR,FALSE,80.00%,78.00%,2.00%,4.00%,12.00%,4.00%,14.00%,82.00%,6.00%,Audio/Sound Event/Advanced Understanding
AudioSegmentRetrieval_Clotho,IoU,TRUE,8.17%,22.17%,8.25%,8.68%,4.58%,7.07%,6.90%,0.00%,10.45%,Audio/Sound Event/Advanced Understanding
HEARSoundEventDetection_DCASE2016Task2,LLM-C,TRUE,0.00%,0.00%,21.43%,35.71%,7.14%,14.29%,21.43%,21.43%,7.14%,Audio/Sound Event/Advanced Understanding
MultichannelSoundEventUnderstanding_STARSS23,LLM-C,TRUE,38.73%,4.23%,37.32%,26.06%,38.03%,41.55%,21.83%,4.93%,11.27%,Audio/Sound Event/Advanced Understanding
AnimalClassification_WaveSource-Test,LLM-C,TRUE,9.25%,40.50%,58.25%,67.00%,75.75%,34.00%,12.00%,4.25%,15.25%,Audio/Sound Event/Animal
BirdSoundDetection_Warblrb10k,LLM-C,TRUE,28.50%,43.00%,74.50%,75.00%,78.50%,79.00%,75.00%,40.50%,33.00%,Audio/Sound Event/Animal
CatEmotionClassification_CatSoundClassificationDataset-V2,LLM-C,TRUE,6.00%,0.00%,2.00%,4.00%,6.00%,14.00%,8.00%,2.00%,14.00%,Audio/Sound Event/Animal
CornellBirdcallIdentification,LLM-C,TRUE,0.00%,0.00%,0.00%,3.33%,10.00%,10.00%,13.33%,3.33%,0.00%,Audio/Sound Event/Animal
EnvironmentalSoundClassification_ESC50-Animals,LLM-C,TRUE,14.50%,8.00%,14.00%,3.00%,82.00%,36.00%,5.00%,0.00%,2.00%,Audio/Sound Event/Animal
HEARBeehiveStatesClassification_BeehiveStates,LLM-C,TRUE,43.00%,35.00%,54.00%,42.50%,19.00%,42.50%,37.00%,37.50%,19.00%,Audio/Sound Event/Animal
DomesticEnvironmentSoundEventDetection_DESED-PublicEval,NAR,FALSE,91.39%,99.44%,99.72%,99.72%,99.17%,99.72%,99.72%,100.00%,99.72%,Audio/Sound Event/Environment
DomesticEnvironmentSoundEventDetection_DESED-PublicEval,Event-based F1,TRUE,0,0.6,0,0,0,0.01,0,0,0,Audio/Sound Event/Environment
EmergencyTrafficDetection_Large-Scale-Audio-dataset,LLM-C,TRUE,39.00%,34.00%,55.50%,7.25%,65.50%,74.50%,48.00%,43.50%,69.50%,Audio/Sound Event/Environment
EnvironmentalSoundClassification_ESC50-ExteriorAndUrbanNoises,LLM-C,TRUE,4.50%,14.50%,0.50%,0.50%,77.00%,25.00%,6.50%,0.00%,6.50%,Audio/Sound Event/Environment
EnvironmentalSoundClassification_ESC50-InteriorAndDomesticSounds,LLM-C,TRUE,4.50%,3.50%,0.50%,0.00%,59.00%,2.50%,4.00%,0.00%,0.00%,Audio/Sound Event/Environment
EnvironmentalSoundClassification_ESC50-NaturalSoundscapesAndWaterSounds,LLM-C,TRUE,3.00%,7.50%,6.50%,1.00%,78.00%,5.00%,7.00%,0.00%,7.00%,Audio/Sound Event/Environment
EnvironmentalSoundClassification_UrbanSound8K-UrbanNoises,LLM-C,TRUE,10.00%,3.26%,1.40%,0.00%,39.53%,4.88%,10.47%,0.00%,0.70%,Audio/Sound Event/Environment
EnvironmentRecognition_ESC50,LLM-C,TRUE,7.02%,38.60%,42.11%,19.30%,56.14%,63.16%,40.35%,10.53%,31.58%,Audio/Sound Event/Environment
HEAREnvironmentalSoundClassification_ESC50,LLM-C,TRUE,4.80%,25.30%,61.00%,73.40%,68.80%,43.90%,9.80%,0.40%,35.30%,Audio/Sound Event/Environment
HEARVocalImitationClassification_VocalImitations,LLM-C,TRUE,15.17%,5.50%,1.17%,1.83%,18.67%,11.17%,17.50%,3.83%,5.83%,Audio/Sound Event/Human
AudioSpatialDistancePrediction_SpatialLibriSpeech,X,FALSE,50.50%,59.00%,62.50%,100.00%,85.50%,58.00%,44.00%,97.00%,71.00%,Audio/Spatial Audio
AudioSpatialDistancePrediction_SpatialLibriSpeech,X,FALSE,0.6651,0.933,0.449,-,1.5011,0.6915,0.9011,0.9212,0.5,Audio/Spatial Audio
HowFarAreYou_3DSpeaker,LLM-C,TRUE,26.00%,13.00%,4.50%,8.00%,30.00%,26.00%,32.00%,18.00%,2.50%,Audio/Spatial Audio
SoundPositionPrediction_Dataset,X,FALSE,81.25%,100.00%,100.00%,100.00%,100.00%,25.00%,93.75%,100.00%,100.00%,Audio/Spatial Audio
SoundPositionPrediction_Dataset,X,FALSE,1.4006,-,-,-,-,1.1063,0.8681,-,-,Audio/Spatial Audio
DialogueEmotionClassification_DailyTalk,LLM-C,TRUE,33.50%,16.50%,27.00%,11.50%,33.00%,54.00%,18.50%,19.00%,33.00%,Speech/Paralinguistics/Emotion Analysis
EmojiGroundedSpeechEmotionRecognition_RAVDESS,LLM-C,TRUE,1.10%,0.00%,0.00%,0.10%,1.30%,1.20%,0.10%,1.90%,48.50%,Speech/Paralinguistics/Emotion Analysis
EmotionChangeDetection_Ravdess,LLM-C,TRUE,0.08%,0.42%,0.58%,0.96%,0.04%,2.96%,0.08%,0.58%,0.04%,Speech/Paralinguistics/Emotion Analysis
EmotionRecognition_MultimodalEmotionlinesDataset,LLM-C,TRUE,22.50%,14.50%,19.00%,9.50%,43.50%,36.50%,40.50%,0.50%,2.50%,Speech/Paralinguistics/Emotion Analysis
HEAREmotionRecognition_CREMAD,LLM-C,TRUE,6.70%,9.30%,18.80%,18.10%,62.50%,61.10%,25.60%,12.40%,13.20%,Speech/Paralinguistics/Emotion Analysis
SuperbER_RAVDESS,LLM-C,TRUE,12.50%,25.83%,12.08%,12.50%,70.42%,75.83%,12.08%,12.92%,0.00%,Speech/Paralinguistics/Emotion Analysis
Covid19CoughAudioClassification_CoughVid,LLM-C,TRUE,0.93%,1.03%,0.00%,0.00%,4.12%,0.72%,4.22%,21.91%,0.21%,Speech/Paralinguistics/Vocal Event Detection
EnvironmentalSoundClassification_ESC50-HumanAndNonSpeechSounds,LLM-C,TRUE,17.50%,11.00%,8.50%,3.00%,82.00%,29.00%,16.50%,0.00%,2.00%,Speech/Paralinguistics/Vocal Event Detection
HumanNonSpeechSoundRecognition_Nonspeech7k-test_CommonVoice-DeltaSegment-15,LLM-C,TRUE,2.86%,5.00%,44.29%,30.71%,27.14%,23.57%,14.29%,17.14%,32.86%,Speech/Paralinguistics/Vocal Event Detection
HumanScreamingDetection_Environmentdb,LLM-C,TRUE,52.50%,25.00%,52.50%,65.00%,62.50%,87.50%,47.50%,42.50%,60.00%,Speech/Paralinguistics/Vocal Event Detection
VocalSoundRecognition_VocalSound,LLM-C,TRUE,29.86%,1.53%,11.53%,4.31%,75.56%,30.97%,14.44%,0.28%,2.78%,Speech/Paralinguistics/Vocal Event Detection
PhonemeSegmentCounting_Librispeech-words,NAR,FALSE,16.29%,48.64%,39.09%,11.62%,5.99%,1.73%,0.50%,2.67%,72.22%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Phoneme Recognition Tasks"
PhonemeSegmentCounting_Librispeech-words,ACC,TRUE,13.56%,11.77%,11.84%,8.25%,5.42%,13.46%,9.55%,0.47%,2.29%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Phoneme Recognition Tasks"
PhonemeSegmentCounting_Librispeech-words,Abs Diff,FALSE,2.5378,1012.8103,6.5285,50.7724,7.1857,3.0622,7.9109,15.6777,5.0491,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Phoneme Recognition Tasks"
PhoneSegmentCounting_VoxAngeles,NAR,FALSE,19.19%,57.24%,41.88%,12.56%,0.41%,16.11%,15.04%,41.47%,18.79%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Phoneme Recognition Tasks"
PhoneSegmentCounting_VoxAngeles,ACC,TRUE,13.20%,1.69%,15.81%,20.04%,1.77%,6.60%,10.11%,9.14%,3.80%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Phoneme Recognition Tasks"
PhoneSegmentCounting_VoxAngeles,Abs Diff,FALSE,41.236,3759.3878,5.4857,4.9331,3.1286,2.8706,45754.9952,5.4242,2.8694,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Phoneme Recognition Tasks"
SuperbPR_LibriSpeech-TestClean,PER,FALSE,100.12%,102.75%,25.36%,24.60%,100.58%,100.96%,99.99%,110.27%,100.37%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Phoneme Recognition Tasks"
SuperbPR_LibriSpeech-TestOther,PER,FALSE,100.17%,101.08%,22.79%,22.91%,100.62%,100.88%,99.99%,111.18%,100.61%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Phoneme Recognition Tasks"
PhonologicalFeatureClassification_VoxAngeles-ConsonantPlaceOfArticulation,LLM-C,TRUE,25.77%,1.37%,1.88%,1.37%,1.54%,3.24%,1.88%,1.54%,0.51%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Phonological Feature Analysis"
PhonologicalFeatureClassification_VoxAngeles-MannerOfArticulation,LLM-C,TRUE,17.42%,9.36%,2.06%,1.50%,5.62%,6.74%,6.84%,7.96%,8.99%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Phonological Feature Analysis"
PhonologicalFeatureClassification_VoxAngeles-Phone,X,TRUE,5.73%,0.09%,1.76%,-,3.60%,3.97%,0.00%,0.00%,0.09%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Phonological Feature Analysis"
PhonologicalFeatureClassification_VoxAngeles-VowelFrontness,LLM-C,TRUE,50.71%,17.52%,41.14%,38.09%,48.47%,57.23%,36.46%,42.97%,10.39%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Phonological Feature Analysis"
PhonologicalFeatureClassification_VoxAngeles-VowelHeight,LLM-C,TRUE,24.44%,17.11%,31.77%,35.23%,29.53%,36.86%,37.88%,39.10%,24.44%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Phonological Feature Analysis"
PhonologicalFeatureClassification_VoxAngeles-VowelRoundedness,LLM-C,TRUE,43.38%,21.18%,27.29%,61.30%,46.44%,69.65%,38.90%,21.79%,18.94%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Phonological Feature Analysis"
HeteronymDifferentiation_HeteronymEn,LLM-C,TRUE,55.00%,26.00%,37.00%,45.00%,52.00%,44.00%,51.00%,30.00%,20.00%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Pronounciation Evaluation"
L2EnglishAccuracy_speechocean762-Ranking,LLM-C,TRUE,24.72%,33.89%,50.00%,49.72%,34.44%,50.00%,41.67%,48.06%,45.56%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Pronounciation Evaluation"
L2EnglishAccuracy_speechocean762-scoring,NAR,FALSE,15.44%,95.03%,0.40%,0.27%,37.72%,0.94%,9.53%,90.47%,50.07%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Pronounciation Evaluation"
L2EnglishAccuracy_speechocean762-scoring,PCC,TRUE,0.0185,-0.183,0.0633,0.0438,0.0293,-0.0159,0.0439,0.0727,0.0151,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Pronounciation Evaluation"
MultilingualPronunciationSimilarity_VoxAngeles,LLM-C,TRUE,38.40%,13.50%,47.20%,48.50%,23.70%,44.70%,48.00%,25.10%,40.60%,"Speech/Phonetics, Phonology, Prosody/Phonetics and Phoneme Processing/Pronounciation Evaluation"
AccentClassification_AccentdbExtended,LLM-C,TRUE,17.50%,5.50%,3.00%,4.50%,26.50%,14.00%,7.00%,27.00%,4.00%,"Speech/Phonetics, Phonology, Prosody/Prosody/Prosodic Feature Classification"
StressDetection_MIRSD,LLM-C,TRUE,15.50%,3.00%,2.00%,13.00%,16.50%,23.50%,25.50%,0.00%,1.00%,"Speech/Phonetics, Phonology, Prosody/Prosody/Prosodic Feature Classification"
ThirdToneSandhiRecognition_NCCUCorpusofSpokenTaiwanMandarin,NAR,FALSE,59.38%,93.75%,100.00%,87.50%,84.38%,84.38%,28.12%,96.88%,100.00%,"Speech/Phonetics, Phonology, Prosody/Prosody/Prosodic Feature Classification"
ThirdToneSandhiRecognition_NCCUCorpusofSpokenTaiwanMandarin,IoU,TRUE,0.2179,0.5,0,0.75,0,0,0.1304,0,0,"Speech/Phonetics, Phonology, Prosody/Prosody/Prosodic Feature Classification"
L2EnglishFluency_speechocean762-Ranking,LLM-C,TRUE,31.39%,21.94%,50.00%,50.00%,40.28%,50.56%,27.78%,49.44%,45.00%,"Speech/Phonetics, Phonology, Prosody/Prosody/Prosodic Quality Assessment"
L2EnglishFluency_speechocean762-Scoring,NAR,FALSE,14.67%,88.16%,0.00%,0.67%,8.75%,0.81%,41.86%,63.53%,55.72%,"Speech/Phonetics, Phonology, Prosody/Prosody/Prosodic Quality Assessment"
L2EnglishFluency_speechocean762-Scoring,PCC,TRUE,0.0055,0.0332,0.0292,0.0183,-0.0422,-0.0858,0.0222,0.0505,0.0532,"Speech/Phonetics, Phonology, Prosody/Prosody/Prosodic Quality Assessment"
L2EnglishProsodic_speechocean762-Ranking,LLM-C,TRUE,26.39%,35.00%,46.67%,52.50%,34.44%,51.11%,34.44%,49.72%,42.22%,"Speech/Phonetics, Phonology, Prosody/Prosody/Prosodic Quality Assessment"
L2EnglishProsodic_speechocean762-Scoring,NAR,FALSE,39.43%,94.75%,0.27%,1.21%,71.47%,3.10%,11.98%,94.62%,82.91%,"Speech/Phonetics, Phonology, Prosody/Prosody/Prosodic Quality Assessment"
L2EnglishProsodic_speechocean762-Scoring,PCC,TRUE,0.0435,-0.1747,0.0427,0.0775,0.1446,0.0201,0.0461,-0.1535,0.0973,"Speech/Phonetics, Phonology, Prosody/Prosody/Prosodic Quality Assessment"
ProsodyNaturalness_ProsAudit-Lexical,LLM-C,TRUE,49.81%,32.82%,48.26%,47.10%,21.62%,51.74%,54.83%,47.10%,5.41%,"Speech/Phonetics, Phonology, Prosody/Prosody/Prosodic Quality Assessment"
ProsodyNaturalness_ProsAudit-Protosyntax,LLM-C,TRUE,53.44%,31.68%,46.56%,46.18%,25.95%,50.76%,43.51%,49.24%,4.20%,"Speech/Phonetics, Phonology, Prosody/Prosody/Prosodic Quality Assessment"
SpoofDetection_ASVspoof2015,LLM-C,TRUE,41.00%,19.00%,55.00%,14.00%,13.00%,23.00%,19.50%,69.50%,0.50%,Speech/Safety & Security/Spoofing and Anti-Spoofing
SpoofDetection_ASVspoof2017,LLM-C,TRUE,49.00%,4.50%,63.00%,38.00%,27.00%,32.50%,21.00%,64.00%,0.00%,Speech/Safety & Security/Spoofing and Anti-Spoofing
DeepFakeVoiceRecognition_DEEP-VOICE,LLM-C,TRUE,27.25%,7.75%,50.75%,40.00%,48.75%,50.50%,43.50%,45.00%,19.75%,Speech/Safety & Security/Synthetic Speech Detection
EnhancementDetection_LibriTTS-TestClean_WHAM,LLM-C,TRUE,50.50%,30.50%,30.00%,46.50%,55.00%,56.50%,30.00%,50.00%,21.00%,Speech/Safety & Security/Synthetic Speech Detection
FraudRobocallRecognition_CallHome,LLM-C,TRUE,73.33%,13.33%,0.00%,100.00%,100.00%,100.00%,90.00%,26.67%,100.00%,Speech/Safety & Security/Synthetic Speech Detection
FraudRobocallRecognition_Promo,LLM-C,TRUE,47.37%,10.53%,0.00%,73.68%,57.89%,63.16%,63.16%,5.26%,100.00%,Speech/Safety & Security/Synthetic Speech Detection
FraudRobocallRecognition_Robocall,LLM-C,TRUE,94.87%,51.28%,100.00%,20.51%,51.28%,61.54%,30.77%,30.77%,0.00%,Speech/Safety & Security/Synthetic Speech Detection
HEARLanguageIdentification_VoxLingua107Top10,LLM-C,TRUE,92.18%,1.95%,6.28%,11.01%,18.00%,36.11%,1.85%,0.00%,0.10%,Speech/Speaker & Language/Language/Language Identification
LanguageIdentification_VoxForge,LLM-C,TRUE,95.50%,13.50%,22.00%,8.50%,84.50%,93.00%,18.00%,6.00%,0.00%,Speech/Speaker & Language/Language/Language Identification
AgeClassification_CommonVoiceCorpus-Test,LLM-C,TRUE,0.50%,19.75%,22.75%,21.00%,26.00%,35.50%,21.00%,23.50%,17.75%,Speech/Speaker & Language/Speaker/Speaker Characteristics
GenderRecognitionbyVoice-CommonVoice-DeltaSegment-15,LLM-C,TRUE,1.00%,81.50%,63.00%,57.00%,53.50%,97.50%,35.00%,51.00%,63.00%,Speech/Speaker & Language/Speaker/Speaker Characteristics
HEARSpeakerCountIdentification_LibriCount,NAR,FALSE,30.40%,42.10%,35.90%,42.50%,3.50%,0.40%,22.00%,23.20%,26.20%,Speech/Speaker & Language/Speaker/Speaker Recognition
HEARSpeakerCountIdentification_LibriCount,ACC,TRUE,17.53%,16.93%,13.26%,17.22%,10.26%,13.76%,5.51%,13.80%,17.48%,Speech/Speaker & Language/Speaker/Speaker Recognition
MultiSpeakerDetection_VCTK,LLM-C,TRUE,46.50%,23.50%,36.00%,30.50%,57.00%,51.00%,32.50%,39.00%,48.50%,Speech/Speaker & Language/Speaker/Speaker Recognition
SpeakerCounting_LibriTTS-TestClean,LLM-C,TRUE,18.50%,6.00%,14.50%,13.50%,17.50%,26.50%,14.00%,4.00%,12.00%,Speech/Speaker & Language/Speaker/Speaker Recognition
SpeakerVerification_LibriSpeech-TestClean,LLM-C,TRUE,48.00%,37.50%,45.00%,54.50%,43.50%,52.00%,19.00%,24.00%,13.00%,Speech/Speaker & Language/Speaker/Speaker Recognition
SpeakerVerification_LibriSpeech-TestOther,LLM-C,TRUE,41.00%,36.00%,51.00%,43.00%,49.00%,50.00%,20.00%,27.00%,23.00%,Speech/Speaker & Language/Speaker/Speaker Recognition
SpeakerVerification_VCTK,LLM-C,TRUE,48.00%,38.50%,54.00%,58.50%,32.50%,51.00%,24.00%,19.50%,7.00%,Speech/Speaker & Language/Speaker/Speaker Recognition
SuperbSD_Libri2Mix-Test,X,FALSE,93.00%,99.50%,100.00%,100.00%,99.50%,98.00%,97.50%,100.00%,60.50%,Speech/Speaker & Language/Speaker/Speaker Recognition
SuperbSD_Libri2Mix-Test,X,FALSE,74.81%,74.62%,-,-,44.26%,93.29%,90.53%,-,74.12%,Speech/Speaker & Language/Speaker/Speaker Recognition
SuperbSV_SuperbHiddenSet,LLM-C,TRUE,48.00%,47.00%,45.00%,49.00%,51.00%,50.00%,55.00%,44.00%,45.00%,Speech/Speaker & Language/Speaker/Speaker Recognition
NoiseDetection_LJSpeech_MUSAN-Gaussian,LLM-C,TRUE,45.50%,18.50%,50.00%,47.50%,42.00%,49.00%,39.00%,41.00%,21.50%,Speech/Speech Enhancement/Degradation Detection
NoiseDetection_LJSpeech_MUSAN-Music,LLM-C,TRUE,52.00%,8.50%,52.00%,47.00%,50.50%,47.00%,44.50%,33.00%,6.00%,Speech/Speech Enhancement/Degradation Detection
NoiseDetection_LJSpeech_MUSAN-Noise,LLM-C,TRUE,47.00%,13.50%,50.50%,49.00%,50.50%,49.50%,44.50%,48.00%,9.00%,Speech/Speech Enhancement/Degradation Detection
NoiseDetection_LJSpeech_MUSAN-Speech,LLM-C,TRUE,48.00%,13.50%,53.00%,36.00%,50.50%,47.00%,45.00%,50.50%,51.00%,Speech/Speech Enhancement/Degradation Detection
NoiseDetection_VCTK_MUSAN-Music,LLM-C,TRUE,46.00%,32.50%,45.50%,57.50%,42.00%,54.50%,52.00%,37.50%,3.50%,Speech/Speech Enhancement/Degradation Detection
NoiseDetection_VCTK_MUSAN-Noise,LLM-C,TRUE,44.50%,26.50%,45.50%,55.00%,44.50%,61.00%,47.00%,54.00%,4.50%,Speech/Speech Enhancement/Degradation Detection
NoiseDetection_VCTK_MUSAN-Speech,LLM-C,TRUE,45.00%,51.50%,43.50%,52.50%,46.50%,57.00%,44.50%,48.50%,56.50%,Speech/Speech Enhancement/Degradation Detection
NoiseDetection_VCTK-MUSAN-Gaussian,LLM-C,TRUE,53.00%,15.50%,49.00%,56.50%,45.50%,54.50%,46.00%,38.50%,11.50%,Speech/Speech Enhancement/Degradation Detection
NoiseSNRLevelPrediction_VCTK_MUSAN-Gaussian,LLM-C,TRUE,24.00%,8.00%,13.50%,15.50%,13.00%,17.50%,17.50%,15.00%,1.50%,Speech/Speech Enhancement/Degradation Detection
NoiseSNRLevelPrediction_VCTK_MUSAN-Music,LLM-C,TRUE,23.00%,9.00%,10.00%,8.50%,10.00%,14.00%,15.50%,9.00%,1.50%,Speech/Speech Enhancement/Degradation Detection
NoiseSNRLevelPrediction_VCTK_MUSAN-Noise,LLM-C,TRUE,23.00%,12.50%,9.50%,16.00%,7.50%,19.00%,14.00%,11.00%,0.50%,Speech/Speech Enhancement/Degradation Detection
NoiseSNRLevelPrediction_VCTK_MUSAN-Speech,LLM-C,TRUE,23.50%,11.50%,13.00%,1.50%,14.00%,20.50%,15.00%,10.50%,0.50%,Speech/Speech Enhancement/Degradation Detection
ReverberationDetection_LJSpeech_RirsNoises-LargeRoom,LLM-C,TRUE,44.50%,11.00%,19.00%,34.00%,28.00%,48.00%,42.50%,17.50%,6.00%,Speech/Speech Enhancement/Degradation Detection
ReverberationDetection_LJSpeech_RirsNoises-MediumRoom,LLM-C,TRUE,40.00%,4.50%,14.50%,25.00%,31.00%,48.00%,41.00%,10.50%,5.00%,Speech/Speech Enhancement/Degradation Detection
ReverberationDetection_LJSpeech_RirsNoises-SmallRoom,LLM-C,TRUE,45.00%,4.50%,9.00%,19.50%,24.00%,48.00%,42.50%,14.00%,6.50%,Speech/Speech Enhancement/Degradation Detection
ReverberationDetection_VCTK_RirsNoises-LargeRoom,LLM-C,TRUE,43.00%,24.50%,18.50%,25.00%,18.00%,46.00%,37.00%,14.50%,7.50%,Speech/Speech Enhancement/Degradation Detection
ReverberationDetection_VCTK_RirsNoises-MediumRoom,LLM-C,TRUE,46.50%,22.50%,8.50%,19.50%,20.00%,46.00%,38.50%,14.00%,6.50%,Speech/Speech Enhancement/Degradation Detection
ReverberationDetection_VCTK_RirsNoises-SmallRoom,LLM-C,TRUE,47.00%,18.50%,8.00%,13.00%,17.50%,46.00%,36.00%,10.50%,11.00%,Speech/Speech Enhancement/Degradation Detection
NBestCorrection_Librispeech-TestOther,LLM-C,TRUE,31.80%,22.80%,23.00%,29.20%,32.80%,29.00%,32.60%,20.80%,30.80%,Speech/Speech Recognition/ASR Post-Processing
AAVESpeechRecognition_CORAAL,WER,FALSE,21.73%,97.34%,23.99%,31.56%,96.81%,38.55%,34.91%,136.50%,102.92%,Speech/Speech Recognition/Language
Code-switchSpeechRecognition_NTUML2021,MER,FALSE,424.58%,215.51%,293.50%,185.27%,165.54%,116.88%,172.03%,130.18%,193.47%,Speech/Speech Recognition/Language
CodeSwitchingSpeechRecognition_ASCEND,NAR,FALSE,68.20%,12.20%,5.40%,88.80%,1.60%,0.00%,30.40%,69.40%,14.60%,Speech/Speech Recognition/Language
CodeSwitchingSpeechRecognition_ASCEND,ACC,TRUE,28.30%,50.57%,58.14%,14.29%,44.31%,60.80%,10.92%,13.73%,9.84%,Speech/Speech Recognition/Language
MultiLingualSpeechRecognition_MLS-de,WER,FALSE,73.80%,132.35%,34.46%,25.21%,79.60%,24.96%,49.56%,99.37%,105.81%,Speech/Speech Recognition/Language
MultiLingualSpeechRecognition_MLS-en,WER,FALSE,65.03%,100.29%,9.37%,9.47%,27.97%,17.47%,17.23%,96.03%,97.09%,Speech/Speech Recognition/Language
MultiLingualSpeechRecognition_MLS-es,WER,FALSE,72.49%,123.26%,23.45%,16.10%,58.82%,18.19%,44.90%,103.19%,101.75%,Speech/Speech Recognition/Language
MultiLingualSpeechRecognition_MLS-fr,WER,FALSE,71.09%,133.86%,26.52%,21.27%,46.27%,19.01%,42.35%,102.25%,102.01%,Speech/Speech Recognition/Language
MultiLingualSpeechRecognition_MLS-it,WER,FALSE,85.38%,125.39%,39.11%,31.54%,56.55%,33.16%,46.14%,102.72%,108.84%,Speech/Speech Recognition/Language
MultiLingualSpeechRecognition_MLS-nl,WER,FALSE,74.96%,120.29%,32.11%,29.37%,140.01%,42.94%,55.56%,102.05%,101.22%,Speech/Speech Recognition/Language
MultiLingualSpeechRecognition_MLS-pl,WER,FALSE,76.79%,139.04%,73.83%,51.53%,278.19%,101.41%,90.11%,100.23%,105.43%,Speech/Speech Recognition/Language
MultiLingualSpeechRecognition_MLS-pt,WER,FALSE,73.63%,136.66%,31.88%,25.07%,117.33%,24.72%,46.26%,99.71%,103.48%,Speech/Speech Recognition/Language
PTBRSpeechRecognition_CommonVoice17-Test,WER,FALSE,45.75%,248.05%,69.88%,73.91%,233.85%,38.57%,62.93%,639.12%,270.64%,Speech/Speech Recognition/Language
SuperbASR_LibriSpeech-TestClean,WER,FALSE,33.96%,96.28%,15.11%,2.79%,69.35%,36.70%,6.87%,103.67%,116.66%,Speech/Speech Recognition/Language
SuperbASR_LibriSpeech-TestOther,WER,FALSE,42.14%,91.89%,11.44%,4.31%,79.53%,40.28%,9.17%,111.03%,130.00%,Speech/Speech Recognition/Language
SuperbOODAsrAr_CommonVoice7-Test,WER,FALSE,51.04%,245.85%,216.51%,178.02%,289.25%,178.21%,149.15%,225.28%,504.53%,Speech/Speech Recognition/Language
SuperbOODAsrEs_CommonVoice7-Test,WER,FALSE,10.93%,150.00%,98.96%,99.22%,100.83%,75.08%,99.38%,141.68%,303.28%,Speech/Speech Recognition/Language
SuperbOODAsrSpon_CHIME6-Test,WER,FALSE,65.71%,122.86%,61.11%,62.80%,92.17%,80.21%,136.24%,137.25%,266.72%,Speech/Speech Recognition/Language
SuperbOODAsrZh_CommonVoice7-Test,CER,FALSE,29.67%,508.02%,609.28%,310.33%,449.32%,270.63%,445.36%,435.33%,1097.41%,Speech/Speech Recognition/Language
TargetSpeaker-ASR_AMItest,WER,FALSE,143.02%,133.86%,273.90%,187.15%,207.01%,132.13%,266.26%,108.32%,140.37%,Speech/Speech Recognition/Speaker
MultiSpeakerDetection_LibriSpeech-TestClean,LLM-C,TRUE,46.00%,29.00%,16.50%,22.00%,58.00%,52.00%,22.00%,42.50%,53.00%,Speech/Speech Recognition/Specific Recognition Tasks
SpeechCommandRecognition_AudioMNIST,NAR,FALSE,4.80%,76.40%,3.07%,0.40%,0.67%,0.00%,0.67%,1.60%,76.40%,Speech/Speech Recognition/Specific Recognition Tasks
SpeechCommandRecognition_AudioMNIST,ACC,TRUE,88.80%,77.40%,96.70%,74.43%,96.24%,77.07%,93.42%,9.89%,8.47%,Speech/Speech Recognition/Specific Recognition Tasks
SpeechTextMatching_LibriSpeech-TestClean,LLM-C,TRUE,86.50%,43.50%,57.00%,59.50%,64.00%,92.00%,52.00%,44.50%,42.00%,Speech/Speech Recognition/Specific Recognition Tasks
SpeechTextMatching_LibriSpeech-TestOther,LLM-C,TRUE,80.50%,42.50%,56.50%,60.00%,66.00%,92.50%,53.00%,47.00%,40.00%,Speech/Speech Recognition/Specific Recognition Tasks
SpeechTextMatching_LJSpeech,LLM-C,TRUE,83.50%,44.50%,57.00%,60.00%,67.00%,90.00%,52.00%,37.50%,41.00%,Speech/Speech Recognition/Specific Recognition Tasks
SpokenTermDetection_LibriSpeech-TestClean,LLM-C,TRUE,76.50%,28.00%,60.00%,54.50%,77.00%,61.50%,51.50%,24.00%,37.50%,Speech/Speech Recognition/Specific Recognition Tasks
SpokenTermDetection_LibriSpeech-TestOther,LLM-C,TRUE,75.50%,23.00%,54.50%,50.50%,72.50%,64.00%,46.50%,25.50%,34.00%,Speech/Speech Recognition/Specific Recognition Tasks
SpokenTermDetection_LJSpeech,LLM-C,TRUE,83.50%,33.50%,57.00%,53.50%,74.50%,79.50%,51.00%,25.00%,25.00%,Speech/Speech Recognition/Specific Recognition Tasks
SuperbKS_SpeechCommandsV1-Test,LLM-C,TRUE,36.50%,1.00%,30.50%,2.00%,60.50%,47.00%,43.00%,4.00%,2.00%,Speech/Speech Recognition/Specific Recognition Tasks
SuperbQbE_Quesst14-Eval,LLM-C,TRUE,46.50%,45.00%,49.00%,51.50%,48.00%,53.50%,49.50%,51.00%,2.50%,Speech/Speech Recognition/Specific Recognition Tasks
StutteringDetection_SEP28k,LLM-C,TRUE,49.10%,51.00%,50.50%,50.30%,50.50%,52.40%,55.00%,49.60%,47.40%,"Speech/Speech, Voice, Hearing Disorder/Disorder Detection and Classification"
VoiceDisorderClassification_VOICED,LLM-C,TRUE,13.46%,1.92%,13.46%,17.31%,13.46%,16.35%,18.27%,21.15%,6.73%,"Speech/Speech, Voice, Hearing Disorder/Disorder Detection and Classification"
ConversationMatching_EnShortConversation,LLM-C,TRUE,77.78%,5.56%,57.41%,37.04%,51.85%,66.67%,62.96%,3.70%,24.07%,Speech/Spoken Language Understanding/Intent & Meaning
Dialogue_Act_Classification_SLUE-HVB,LLM-C,TRUE,11.08%,0.00%,36.00%,0.92%,5.00%,15.83%,13.17%,1.83%,1.33%,Speech/Spoken Language Understanding/Intent & Meaning
DialogueActClassification_DailyTalk,LLM-C,TRUE,29.00%,10.50%,34.00%,40.50%,42.00%,30.00%,36.50%,18.50%,4.00%,Speech/Spoken Language Understanding/Intent & Meaning
DialogueActPairing_DailyTalk,LLM-C,TRUE,50.00%,3.50%,51.00%,48.00%,43.50%,46.00%,37.00%,40.00%,25.00%,Speech/Spoken Language Understanding/Intent & Meaning
IntentClassification_SLURP_MINDS14,LLM-C,TRUE,36.50%,0.06%,21.00%,5.50%,26.50%,26.00%,28.00%,3.50%,1.00%,Speech/Spoken Language Understanding/Intent & Meaning
IntentClassification_SLURP_MINDS14-Action,LLM-C,TRUE,27.00%,14.00%,28.50%,28.50%,44.00%,68.50%,37.50%,6.50%,1.50%,Speech/Spoken Language Understanding/Intent & Meaning
IntentClassification_SLURP_MINDS14-Intent,LLM-C,TRUE,45.50%,11.50%,27.00%,19.00%,68.50%,56.00%,56.50%,4.00%,0.50%,Speech/Spoken Language Understanding/Intent & Meaning
Named_Entity_Localization_SLUE-VoxPopuli,X,FALSE,81.25%,100.00%,100.00%,100.00%,100.00%,25.00%,93.75%,100.00%,100.00%,Speech/Spoken Language Understanding/Intent & Meaning
Named_Entity_Localization_SLUE-VoxPopuli,X,TRUE,1.4006,-,-,-,-,1.1063,0.8681,-,-,Speech/Spoken Language Understanding/Intent & Meaning
Named_Entity_Recognition_SLUE-VoxPopuli,NAR,FALSE,90.76%,98.37%,95.65%,98.37%,95.11%,91.30%,91.85%,97.83%,96.74%,Speech/Spoken Language Understanding/Intent & Meaning
Named_Entity_Recognition_SLUE-VoxPopuli,IoU,TRUE,0,0.3333,0,0,0,0.0625,0.0222,0,0,Speech/Spoken Language Understanding/Intent & Meaning
SarcasmDetection_Mustard,LLM-C,TRUE,44.50%,15.00%,38.50%,46.00%,42.50%,48.00%,46.50%,43.00%,8.00%,Speech/Spoken Language Understanding/Intent & Meaning
SemanticTextualSimilarity_SpokenSTS,LLM-C,TRUE,48.80%,47.20%,47.20%,50.40%,42.80%,46.80%,50.40%,51.20%,38.80%,Speech/Spoken Language Understanding/Intent & Meaning
SpeechSentimentAnalysis_MELD,LLM-C,TRUE,38.50%,37.50%,33.50%,35.09%,38.50%,49.00%,44.00%,27.50%,27.00%,Speech/Spoken Language Understanding/Intent & Meaning
SpokenDigitArithmetic_AudioMNIST,LLM-C,TRUE,43.50%,13.00%,31.00%,33.00%,15.00%,43.50%,40.50%,4.50%,13.00%,Speech/Spoken Language Understanding/Intent & Meaning
SuperbSF_AudioSnips-Test,NAR,FALSE,38.00%,88.50%,34.00%,68.50%,51.00%,45.00%,44.00%,99.00%,97.00%,Speech/Spoken Language Understanding/Intent & Meaning
SuperbSF_AudioSnips-Test,Slot Type F1,TRUE,0.8637,0.8864,0.9325,0.9396,0.8616,0.7704,0.907,1,0.8939,Speech/Spoken Language Understanding/Intent & Meaning
SuperbSF_AudioSnips-Test,Slot Value CER,FALSE,0.3178,0.6432,0.4059,0.3349,0.4417,0.3065,0.4752,1.587,1.425,Speech/Spoken Language Understanding/Intent & Meaning
CodeSwitchingSemanticGrammarAcceptabilityComparison_CSZS-zh-en,LLM-C,TRUE,51.50%,45.00%,50.00%,49.50%,27.50%,49.50%,27.50%,28.00%,19.00%,Speech/Spoken Language Understanding/Linguistic Structure & Grammar
NonceWordDetection_sWUGGY,LLM-C,TRUE,48.43%,30.20%,48.43%,48.43%,21.08%,50.14%,48.15%,35.04%,19.37%,Speech/Spoken Language Understanding/Linguistic Structure & Grammar
PoS_Estimation_LibriTTS_PoS,POS,FALSE,2.5722,1.2792,2.9675,1.199,3.062,2.3091,3.1126,1.5327,1.9635,Speech/Spoken Language Understanding/Linguistic Structure & Grammar
PoS_Estimation_LibriTTS_PoS_with_transcription,POS,FALSE,0.9081,1.3507,1.7583,1.0601,2.158,1.7514,2.0463,1.2394,1.3911,Speech/Spoken Language Understanding/Linguistic Structure & Grammar
SentenceGrammarAcceptability_sBLIMP,LLM-C,TRUE,52.78%,6.75%,27.38%,42.06%,28.97%,49.40%,49.80%,50.79%,3.57%,Speech/Spoken Language Understanding/Linguistic Structure & Grammar
SuperbST_CoVoST2-Test,Sacre Bleu,TRUE,17.372,0.107,18.7884,16.7835,6.9846,23.3458,21.7423,0.0566,0.0219,Speech/Spoken Language Understanding/Speech Translation |