Update app.py
Browse files
app.py
CHANGED
@@ -19,7 +19,7 @@ huggingface_token = os.environ["huggingface_token"]
|
|
19 |
pipe = pipeline(model="mskov/whisper-small-esc50")
|
20 |
print(pipe)
|
21 |
processor = WhisperProcessor.from_pretrained("mskov/whisper-small-esc50")
|
22 |
-
dataset = load_dataset("mskov/
|
23 |
|
24 |
print(dataset, "and at 0[audio][array] ", dataset[0]["audio"]["array"], type(dataset[0]["audio"]["array"]), "and at audio : ", dataset[0]["audio"])
|
25 |
|
@@ -31,7 +31,7 @@ model = WhisperForConditionalGeneration.from_pretrained("mskov/whisper-small-esc
|
|
31 |
def map_to_pred(batch):
|
32 |
audio = batch["audio"]
|
33 |
input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
|
34 |
-
batch["reference"] = processor.tokenizer._normalize(batch['
|
35 |
|
36 |
|
37 |
with torch.no_grad():
|
|
|
19 |
pipe = pipeline(model="mskov/whisper-small-esc50")
|
20 |
print(pipe)
|
21 |
processor = WhisperProcessor.from_pretrained("mskov/whisper-small-esc50")
|
22 |
+
dataset = load_dataset("mskov/misophonia_sounds", split="test").cast_column("audio", Audio(sampling_rate=16000))
|
23 |
|
24 |
print(dataset, "and at 0[audio][array] ", dataset[0]["audio"]["array"], type(dataset[0]["audio"]["array"]), "and at audio : ", dataset[0]["audio"])
|
25 |
|
|
|
31 |
def map_to_pred(batch):
|
32 |
audio = batch["audio"]
|
33 |
input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
|
34 |
+
batch["reference"] = processor.tokenizer._normalize(batch['sentence'])
|
35 |
|
36 |
|
37 |
with torch.no_grad():
|