Update librispeech_asr.py
Browse files- librispeech_asr.py +3 -8
librispeech_asr.py
CHANGED
@@ -3,15 +3,10 @@ import datasets
|
|
3 |
import glob
|
4 |
|
5 |
# hugging face sucks
|
|
|
6 |
|
7 |
-
#
|
8 |
-
#
|
9 |
-
# "librispeech_asr",
|
10 |
-
# "clean",
|
11 |
-
# split=["train_clean_100", "test_clean"],
|
12 |
-
# cache_dir=CACHE_DIR,
|
13 |
-
# storage_options={'client_kwargs': {'timeout': aiohttp.ClientTimeout(total=3600, connect=60)}}
|
14 |
-
# ).rename_column("text", "transcription").cast_column("audio", Audio(sampling_rate=sample_rate))
|
15 |
|
16 |
_URL = "http://www.openslr.org/12"
|
17 |
_DL_URL = "http://www.openslr.org/resources/12/"
|
|
|
3 |
import glob
|
4 |
|
5 |
# hugging face sucks
|
6 |
+
# this works until they break something else
|
7 |
|
8 |
+
# train = load_dataset("./local_path_to/librispeech_asr.py", "clean", split="train_clean_100", trust_remote_code=True, storage_options={'client_kwargs': {'timeout': aiohttp.ClientTimeout(total=3600, connect=60)}}).rename_column("text", "transcription")
|
9 |
+
# test = load_dataset("./local_path_to/librispeech_asr.py", "clean", split="test_clean", trust_remote_code=True, storage_options={'client_kwargs': {'timeout': aiohttp.ClientTimeout(total=3600, connect=60)}}).rename_column("text", "transcription")
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
_URL = "http://www.openslr.org/12"
|
12 |
_DL_URL = "http://www.openslr.org/resources/12/"
|