Spaces:

mskov
/

test

Runtime error

File size: 1,617 Bytes

de07127
 
e6f4bc9
79f46c8
256b607
2f1b912
b4725a8
de07127
8a965da
 
de07127
8a965da
79f46c8
2996449
decc59e
664eb76
5625d5f
90d7b9b
0b5b7f4
8a965da
 
3b57b43
3826e01
973bb39
62ac43e
 
dca4d0e
 
db75012
 
75a25c7

import os 
import sys 
os.system("pip install transformers==4.27.0")
os.system("pip install torch")
os.system("pip install openai")
os.system("pip install accelerate")
from transformers import pipeline, WhisperModel, WhisperTokenizer, WhisperFeatureExtractor, AutoFeatureExtractor, AutoProcessor, WhisperConfig
os.system("pip install evaluate")
#import evaluate
#os.system("pip install evaluate[evaluator]")
os.system("pip install datasets")
# os.system("pip install llvmlite")
os.system("pip install spicy==1.8.1")
os.system("pip install soundfile")
os.system("pip install jiwer")
os.system("pip install datasets[audio]")
os.system("pip install numba==0.51.2")
import torch
from evaluate import evaluator
from datasets import load_dataset, Audio, disable_caching, set_caching_enabled

set_caching_enabled(False)
disable_caching()

huggingface_token = os.environ["huggingface_token"]

model = WhisperModel.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
feature_extractor = AutoFeatureExtractor.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)


ds = load_dataset("mskov/miso_test", split="test").cast_column("audio", Audio(sampling_rate=16000))

print(ds, "and at 0 ", ds[0])

inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt")
print("check check")
print(inputs)
input_features = inputs.input_features
decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state
list(last_hidden_state.shape)
print(list(last_hidden_state.shape))