Safetensors
English
omni_speech2s_llama
SandO114 commited on
Commit
b67b102
·
verified ·
1 Parent(s): 916c867

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +124 -0
README.md CHANGED
@@ -122,6 +122,75 @@ VocalNet-8B was evaluated on [OpenAudioBench](https://huggingface.co/datasets/ba
122
  <th style="padding: 10px; border: 1px solid #ddd;">Web Questions</th>
123
  </tr>
124
  </thead>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  <tbody>
126
  <tr>
127
  <td colspan="7" style="padding: 10px; border: 1px solid #ddd; font-weight: bold; background-color: #e6f3ff;">Base Models</td>
@@ -298,6 +367,61 @@ VocalNet-8B was evaluated on [OpenAudioBench](https://huggingface.co/datasets/ba
298
  <td style="padding: 10px; border: 1px solid #ddd;">WER</td>
299
  <td style="padding: 10px; border: 1px solid #ddd;">UTMOS</td>
300
  </tr>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  <tr>
302
  <td colspan="11" style="padding: 10px; border: 1px solid #ddd; font-weight: bold; background-color: #e6f3ff;">Base Models</td>
303
  </tr>
 
122
  <th style="padding: 10px; border: 1px solid #ddd;">Web Questions</th>
123
  </tr>
124
  </thead>
125
+ <tbody>
126
+ <tr>
127
+ <td colspan="7" style="padding: 10px; border: 1px solid #ddd; font-weight: bold; background-color: #e6f3ff;">Tiny Models</td>
128
+ </tr>
129
+ <tr>
130
+ <td rowspan="2" style="padding: 10px; border: 1px solid #ddd;">Mini-Omni</td>
131
+ <td rowspan="2" style="padding: 10px; border: 1px solid #ddd;">0.5B</td>
132
+ <td style="padding: 10px; border: 1px solid #ddd;">s→t</td>
133
+ <td style="padding: 10px; border: 1px solid #ddd;">1.84</td>
134
+ <td style="padding: 10px; border: 1px solid #ddd;">2.7</td>
135
+ <td style="padding: 10px; border: 1px solid #ddd;">0.12</td>
136
+ <td style="padding: 10px; border: 1px solid #ddd;">0.22</td>
137
+ </tr>
138
+ <tr>
139
+ <td style="padding: 10px; border: 1px solid #ddd;">s→s</td>
140
+ <td style="padding: 10px; border: 1px solid #ddd;">1.80</td>
141
+ <td style="padding: 10px; border: 1px solid #ddd;">2.7</td>
142
+ <td style="padding: 10px; border: 1px solid #ddd;">0.08</td>
143
+ <td style="padding: 10px; border: 1px solid #ddd;">0.20</td>
144
+ </tr>
145
+ <tr>
146
+ <td rowspan="2" style="padding: 10px; border: 1px solid #ddd;">SLAM-Omni</td>
147
+ <td rowspan="2" style="padding: 10px; border: 1px solid #ddd;">0.5B</td>
148
+ <td style="padding: 10px; border: 1px solid #ddd;">s→t</td>
149
+ <td style="padding: 10px; border: 1px solid #ddd;">3.50</td>
150
+ <td style="padding: 10px; border: 1px solid #ddd;">29.4</td>
151
+ <td style="padding: 10px; border: 1px solid #ddd;">0.39</td>
152
+ <td style="padding: 10px; border: 1px solid #ddd;">0.84</td>
153
+ </tr>
154
+ <tr>
155
+ <td style="padding: 10px; border: 1px solid #ddd;">s→s</td>
156
+ <td style="padding: 10px; border: 1px solid #ddd;">3.01</td>
157
+ <td style="padding: 10px; border: 1px solid #ddd;">26.7</td>
158
+ <td style="padding: 10px; border: 1px solid #ddd;">0.34</td>
159
+ <td style="padding: 10px; border: 1px solid #ddd;">0.69</td>
160
+ </tr>
161
+ <tr>
162
+ <td rowspan="2" style="padding: 10px; border: 1px solid #ddd;">VocalNet-1B (VA)</td>
163
+ <td rowspan="2" style="padding: 10px; border: 1px solid #ddd;">1B</td>
164
+ <td style="padding: 10px; border: 1px solid #ddd;">s→t</td>
165
+ <td style="padding: 10px; border: 1px solid #ddd;">5.38</td>
166
+ <td style="padding: 10px; border: 1px solid #ddd;">70.3</td>
167
+ <td style="padding: 10px; border: 1px solid #ddd;">3.38</td>
168
+ <td style="padding: 10px; border: 1px solid #ddd;">4.93</td>
169
+ </tr>
170
+ <tr>
171
+ <td style="padding: 10px; border: 1px solid #ddd;">s→s</td>
172
+ <td style="padding: 10px; border: 1px solid #ddd;">4.83</td>
173
+ <td style="padding: 10px; border: 1px solid #ddd;">61.0</td>
174
+ <td style="padding: 10px; border: 1px solid #ddd;">2.78</td>
175
+ <td style="padding: 10px; border: 1px solid #ddd;">4.47</td>
176
+ </tr>
177
+ <tr>
178
+ <td rowspan="2" style="padding: 10px; border: 1px solid #ddd;">VocalNet-1B</td>
179
+ <td rowspan="2" style="padding: 10px; border: 1px solid #ddd;">1B</td>
180
+ <td style="padding: 10px; border: 1px solid #ddd;">s→t</td>
181
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>5.79</b></td>
182
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>71.7</b></td>
183
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>3.60</b></td>
184
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>5.16</b></td>
185
+ </tr>
186
+ <tr>
187
+ <td style="padding: 10px; border: 1px solid #ddd;">s→s</td>
188
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>5.03</b></td>
189
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>63.7</b></td>
190
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>3.06</b></td>
191
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>4.68</b></td>
192
+ </tr>
193
+ </tbody>
194
  <tbody>
195
  <tr>
196
  <td colspan="7" style="padding: 10px; border: 1px solid #ddd; font-weight: bold; background-color: #e6f3ff;">Base Models</td>
 
367
  <td style="padding: 10px; border: 1px solid #ddd;">WER</td>
368
  <td style="padding: 10px; border: 1px solid #ddd;">UTMOS</td>
369
  </tr>
370
+ <tr>
371
+ <td colspan="11" style="padding: 10px; border: 1px solid #ddd; font-weight: bold; background-color: #e6f3ff;">Tiny Models</td>
372
+ </tr>
373
+ <tr>
374
+ <td style="padding: 10px; border: 1px solid #ddd;">Mini-Omni</td>
375
+ <td style="padding: 10px; border: 1px solid #ddd;">20.78</td>
376
+ <td style="padding: 10px; border: 1px solid #ddd;">4.429</td>
377
+ <td style="padding: 10px; border: 1px solid #ddd;">5.20</td>
378
+ <td style="padding: 10px; border: 1px solid #ddd;">4.428</td>
379
+ <td style="padding: 10px; border: 1px solid #ddd;">7.43</td>
380
+ <td style="padding: 10px; border: 1px solid #ddd;">4.428</td>
381
+ <td style="padding: 10px; border: 1px solid #ddd;">8.51</td>
382
+ <td style="padding: 10px; border: 1px solid #ddd;">4.433</td>
383
+ <td style="padding: 10px; border: 1px solid #ddd;">8.66</td>
384
+ <td style="padding: 10px; border: 1px solid #ddd;">4.430</td>
385
+ </tr>
386
+ <tr>
387
+ <td style="padding: 10px; border: 1px solid #ddd;">SLAM-Omni</td>
388
+ <td style="padding: 10px; border: 1px solid #ddd;">5.52</td>
389
+ <td style="padding: 10px; border: 1px solid #ddd;">4.439</td>
390
+ <td style="padding: 10px; border: 1px solid #ddd;">5.55</td>
391
+ <td style="padding: 10px; border: 1px solid #ddd;">4.467</td>
392
+ <td style="padding: 10px; border: 1px solid #ddd;">6.16</td>
393
+ <td style="padding: 10px; border: 1px solid #ddd;">4.470</td>
394
+ <td style="padding: 10px; border: 1px solid #ddd;">6.50</td>
395
+ <td style="padding: 10px; border: 1px solid #ddd;">4.461</td>
396
+ <td style="padding: 10px; border: 1px solid #ddd;">6.17</td>
397
+ <td style="padding: 10px; border: 1px solid #ddd;">4.464</td>
398
+ </tr>
399
+ <tr>
400
+ <td style="padding: 10px; border: 1px solid #ddd;">VocalNet-1B (VA)</td>
401
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>3.43</b></td>
402
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>4.495</b></td>
403
+ <td style="padding: 10px; border: 1px solid #ddd;">3.65</td>
404
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>4.498</b></td>
405
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>5.97</b></td>
406
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>4.499</b></td>
407
+ <td style="padding: 10px; border: 1px solid #ddd;">6.40</td>
408
+ <td style="padding: 10px; border: 1px solid #ddd;">4.489</td>
409
+ <td style="padding: 10px; border: 1px solid #ddd;">5.66</td>
410
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>4.495</b></td>
411
+ </tr>
412
+ <tr>
413
+ <td style="padding: 10px; border: 1px solid #ddd;">VocalNet-1B</td>
414
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>3.43</b></td>
415
+ <td style="padding: 10px; border: 1px solid #ddd;">4.491</td>
416
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>3.27</b></td>
417
+ <td style="padding: 10px; border: 1px solid #ddd;">4.497</td>
418
+ <td style="padding: 10px; border: 1px solid #ddd;">6.73</td>
419
+ <td style="padding: 10px; border: 1px solid #ddd;">4.486</td>
420
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>4.88</b></td>
421
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>4.493</b></td>
422
+ <td style="padding: 10px; border: 1px solid #ddd;"><b>5.31</b></td>
423
+ <td style="padding: 10px; border: 1px solid #ddd;">4.491</td>
424
+ </tr>
425
  <tr>
426
  <td colspan="11" style="padding: 10px; border: 1px solid #ddd; font-weight: bold; background-color: #e6f3ff;">Base Models</td>
427
  </tr>