zhuohan-7 commited on
Commit
067fc9d
·
1 Parent(s): 16943d7

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app/content.py +5 -5
  2. app/show_examples.py +0 -8
app/content.py CHANGED
@@ -146,18 +146,18 @@ dataset_diaplay_information = {
146
 
147
  'YouTube ASR: English with Strong Emotion' : 'YouTube Evaluation Dataset for ASR Task: <br> This dataset contains English and some unknown languages audio clips, featuring speech with strong emotional expression. <br> It includes approximately 3.9 hours of audio, with each clip lasting 30 seconds.',
148
 
149
- 'YouTube ASR: Malay English Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset mainly contains Malay and some English audio clips, featuring with English prompts. <br> It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.',
150
 
151
- 'YouTube ASR: Malay with Malay Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset use the same audio from <i>YouTube ASR: Malay English Prompt</i>, except featuring with Malay prompts. <br> It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.',
152
 
153
  'SEAME-Dev-Mandarin' : 'Under Development',
154
  'SEAME-Dev-Singlish' : 'Under Development',
155
 
156
- 'YouTube SQA: English with Singapore Content': 'YouTube Evaluation Dataset for Speech-QA Task: <br> This dataset use the same audio from <i>YouTube ASR: English Singapore Content</i>, featuring Singapore-related content. <br> It includes approximately 2.5 hours of audio, with individual clips ranging from 2 seconds to 30 seconds in length.',
157
 
158
- 'YouTube SDS: English with Singapore Content': 'YouTube Evaluation Dataset for Summary Task: <br> This dataset use the same audio from <i>YouTube ASR: English Singapore Content</i>, featuring Singapore-related content. <br> It includes approximately 2.5 hours of audio, with individual clips ranging from 2 seconds to 30 seconds in length.',
159
 
160
- 'YouTube PQA: English with Singapore Content': 'YouTube Evaluation Dataset for Paralinguistics Task: <br> This dataset use the same audio from <i>YouTube ASR: English Singapore Content</i>, featuring Singapore-related content. <br> It includes approximately 2.5 hours of audio, with individual clips ranging from 2 seconds to 30 seconds in length.',
161
 
162
 
163
  }
 
146
 
147
  'YouTube ASR: English with Strong Emotion' : 'YouTube Evaluation Dataset for ASR Task: <br> This dataset contains English and some unknown languages audio clips, featuring speech with strong emotional expression. <br> It includes approximately 3.9 hours of audio, with each clip lasting 30 seconds.',
148
 
149
+ 'YouTube ASR: Malay English Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset mainly contains Malay and some Malay-English codeswitch audio clips, featuring with English prompts. <br> It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.',
150
 
151
+ # 'YouTube ASR: Malay with Malay Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset use the same audio from <i>YouTube ASR: Malay English Prompt</i>, except featuring with Malay prompts. <br> It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.',
152
 
153
  'SEAME-Dev-Mandarin' : 'Under Development',
154
  'SEAME-Dev-Singlish' : 'Under Development',
155
 
156
+ 'YouTube SQA: English with Singapore Content': 'YouTube Evaluation Dataset for Speech-QA Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 7.6 hours of audio, with individual clips ranging from 8 seconds to 32 seconds in length.',
157
 
158
+ 'YouTube SDS: English with Singapore Content': 'YouTube Evaluation Dataset for Summary Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 5.4 hours of audio, with individual clips ranging from 8 seconds to 32 seconds in length.',
159
 
160
+ 'YouTube PQA: English with Singapore Content': 'YouTube Evaluation Dataset for Paralinguistics QA Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 41.4 hours of audio, with individual clips ranging from 41 seconds to 83 seconds in length.',
161
 
162
 
163
  }
app/show_examples.py CHANGED
@@ -60,14 +60,6 @@ def show_dataset_examples(display_name):
60
  """
61
  st.markdown(custom_css, unsafe_allow_html=True)
62
 
63
- # s = f"""<tr>
64
- # <td><b>{html.escape(question_text.replace('(A)', '<br>(A)').replace('(B)', '<br>(B)').replace('(C)', '<br>(C)'))}
65
- # </td>
66
- # <td><b>{html.escape(dataset[index]['answer']['text'])}
67
- # </td>
68
- # </tr>
69
- # """
70
-
71
  body_details = f"""<table style="table-layout: fixed; width:100%">
72
  <thead>
73
  <tr style="text-align: center;">
 
60
  """
61
  st.markdown(custom_css, unsafe_allow_html=True)
62
 
 
 
 
 
 
 
 
 
63
  body_details = f"""<table style="table-layout: fixed; width:100%">
64
  <thead>
65
  <tr style="text-align: center;">