mskov commited on
Commit
5684ca8
Β·
1 Parent(s): beee031

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -17
app.py CHANGED
@@ -24,22 +24,6 @@ disable_caching()
24
 
25
  huggingface_token = os.environ["huggingface_token"]
26
 
27
- processor = AutoProcessor.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
28
-
29
- def prepare_dataset(batch):
30
- audio = batch["audio"]
31
- batch["input_values"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_values[0]
32
- batch["input_length"] = len(batch["input_values"])
33
- with processor.as_target_processor():
34
- batch["labels"] = processor(batch["sentence"]).input_ids
35
- return batch
36
- dataset = load_dataset("mskov/miso_test", split="test")
37
- dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
38
- testRun = dataset.map(prepare_dataset, remove_columns=dataset.column_names)
39
- print(testRun)
40
-
41
- '''
42
-
43
  model = WhisperModel.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
44
  feature_extractor = AutoFeatureExtractor.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
45
 
@@ -55,4 +39,3 @@ decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
55
  last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state
56
  list(last_hidden_state.shape)
57
  print(list(last_hidden_state.shape))
58
- '''
 
24
 
25
  huggingface_token = os.environ["huggingface_token"]
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  model = WhisperModel.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
28
  feature_extractor = AutoFeatureExtractor.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
29
 
 
39
  last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state
40
  list(last_hidden_state.shape)
41
  print(list(last_hidden_state.shape))