Spaces:

MERaLiON
/

AudioBench-Leaderboard

Running

App Files Files Community

zhuohan-7 commited on 23 days ago

Commit

067fc9d

1 Parent(s): 16943d7

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app/content.py +5 -5
app/show_examples.py +0 -8

app/content.py CHANGED Viewed

@@ -146,18 +146,18 @@ dataset_diaplay_information = {
     'YouTube ASR: English with Strong Emotion'  : 'YouTube Evaluation Dataset for ASR Task: <br> This dataset contains English and some unknown languages audio clips, featuring speech with strong emotional expression. <br> It includes approximately 3.9 hours of audio, with each clip lasting 30 seconds.',
-    'YouTube ASR: Malay English Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset mainly contains Malay and some English audio clips, featuring with English prompts. <br> It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.',
-    'YouTube ASR: Malay with Malay Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset use the same audio from <i>YouTube ASR: Malay English Prompt</i>, except featuring with Malay prompts. <br> It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.',
     'SEAME-Dev-Mandarin'   : 'Under Development',
     'SEAME-Dev-Singlish'   : 'Under Development',
-    'YouTube SQA: English with Singapore Content': 'YouTube Evaluation Dataset for Speech-QA Task: <br> This dataset use the same audio from <i>YouTube ASR: English Singapore Content</i>, featuring Singapore-related content. <br> It includes approximately 2.5 hours of audio, with individual clips ranging from 2 seconds to 30 seconds in length.',
-    'YouTube SDS: English with Singapore Content': 'YouTube Evaluation Dataset for Summary Task: <br> This dataset use the same audio from <i>YouTube ASR: English Singapore Content</i>, featuring Singapore-related content. <br> It includes approximately 2.5 hours of audio, with individual clips ranging from 2 seconds to 30 seconds in length.',
-    'YouTube PQA: English with Singapore Content': 'YouTube Evaluation Dataset for Paralinguistics Task: <br> This dataset use the same audio from <i>YouTube ASR: English Singapore Content</i>, featuring Singapore-related content. <br> It includes approximately 2.5 hours of audio, with individual clips ranging from 2 seconds to 30 seconds in length.',
                 }

     'YouTube ASR: English with Strong Emotion'  : 'YouTube Evaluation Dataset for ASR Task: <br> This dataset contains English and some unknown languages audio clips, featuring speech with strong emotional expression. <br> It includes approximately 3.9 hours of audio, with each clip lasting 30 seconds.',
+    'YouTube ASR: Malay English Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset mainly contains Malay and some Malay-English codeswitch audio clips, featuring with English prompts. <br> It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.',
+    # 'YouTube ASR: Malay with Malay Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset use the same audio from <i>YouTube ASR: Malay English Prompt</i>, except featuring with Malay prompts. <br> It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.',
     'SEAME-Dev-Mandarin'   : 'Under Development',
     'SEAME-Dev-Singlish'   : 'Under Development',
+    'YouTube SQA: English with Singapore Content': 'YouTube Evaluation Dataset for Speech-QA Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 7.6 hours of audio, with individual clips ranging from 8 seconds to 32 seconds in length.',
+    'YouTube SDS: English with Singapore Content': 'YouTube Evaluation Dataset for Summary Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 5.4 hours of audio, with individual clips ranging from 8 seconds to 32 seconds in length.',
+    'YouTube PQA: English with Singapore Content': 'YouTube Evaluation Dataset for Paralinguistics QA Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 41.4 hours of audio, with individual clips ranging from 41 seconds to 83 seconds in length.',
                 }

app/show_examples.py CHANGED Viewed

@@ -60,14 +60,6 @@ def show_dataset_examples(display_name):
                         """
             st.markdown(custom_css, unsafe_allow_html=True)
-            # s = f"""<tr>
-            #         <td><b>{html.escape(question_text.replace('(A)', '<br>(A)').replace('(B)', '<br>(B)').replace('(C)', '<br>(C)'))}
-            #         </td>
-            #         <td><b>{html.escape(dataset[index]['answer']['text'])}
-            #         </td>
-            # </tr>
-            # """
             body_details = f"""<table style="table-layout: fixed; width:100%">
             <thead>
                 <tr style="text-align: center;">

                         """
             st.markdown(custom_css, unsafe_allow_html=True)
             body_details = f"""<table style="table-layout: fixed; width:100%">
             <thead>
                 <tr style="text-align: center;">