Update app.py
Browse files
app.py
CHANGED
@@ -20,10 +20,10 @@ miso_tokenizer = WhisperTokenizer.from_pretrained("mskov/whisper_miso", use_auth
|
|
20 |
|
21 |
dataset = load_dataset("mskov/miso_test", split="test").cast_column("audio", Audio(sampling_rate=16000))
|
22 |
|
23 |
-
print(dataset, "and at 0 ", dataset[0])
|
24 |
|
25 |
inputs = feature_extractor(dataset[0]["audio"]["array"], return_tensors="pt")
|
26 |
-
tokenized_dataset = miso_tokenizer(
|
27 |
|
28 |
input_ids = features.input_ids
|
29 |
attention_mask = features.attention_mask
|
|
|
20 |
|
21 |
dataset = load_dataset("mskov/miso_test", split="test").cast_column("audio", Audio(sampling_rate=16000))
|
22 |
|
23 |
+
print(dataset, "and at 0[audio][array] ", dataset[0]["audio"]["array"], type(dataset[0]["audio"]["array"]), "and at audio : " dataset[0]["audio"])
|
24 |
|
25 |
inputs = feature_extractor(dataset[0]["audio"]["array"], return_tensors="pt")
|
26 |
+
tokenized_dataset = miso_tokenizer(dataset["audio"]["array"]) # Tokenize the dataset
|
27 |
|
28 |
input_ids = features.input_ids
|
29 |
attention_mask = features.attention_mask
|