MusIre commited on
Commit
d16271b
·
1 Parent(s): 27f3b82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -1,13 +1,22 @@
1
  import subprocess
 
2
 
3
  subprocess.run(["python", "-m", "pip", "install", "--upgrade", "pip"])
4
  subprocess.run(["pip", "install", "gradio", "--upgrade"])
5
  subprocess.run(["pip", "install", "datasets"])
6
  subprocess.run(["pip", "install", "transformers"])
7
  subprocess.run(["pip", "install", "torch", "torchvision", "torchaudio", "-f", "https://download.pytorch.org/whl/torch_stable.html"])
 
8
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
9
  from datasets import load_dataset
10
 
 
 
 
 
 
 
 
11
  # load model and processor
12
  processor = WhisperProcessor.from_pretrained("openai/whisper-small")
13
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
@@ -16,14 +25,6 @@ forced_decoder_ids = processor.get_decoder_prompt_ids(language="italian", task="
16
  # load dummy dataset and read audio files
17
  ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
18
  sample = ds[0]["audio"]
19
- input_features = processor(sample["array"], sampling_rate=sample["sampling_rate"], return_tensors="pt").input_features
20
-
21
- # generate token ids
22
- predicted_ids = model.generate(input_features)
23
- # decode token ids to text
24
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False)
25
-
26
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
27
 
28
  # Create Gradio interface
29
  audio_input = gr.Audio()
 
1
  import subprocess
2
+ import gradio as gr # Add this import statement
3
 
4
  subprocess.run(["python", "-m", "pip", "install", "--upgrade", "pip"])
5
  subprocess.run(["pip", "install", "gradio", "--upgrade"])
6
  subprocess.run(["pip", "install", "datasets"])
7
  subprocess.run(["pip", "install", "transformers"])
8
  subprocess.run(["pip", "install", "torch", "torchvision", "torchaudio", "-f", "https://download.pytorch.org/whl/torch_stable.html"])
9
+
10
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
11
  from datasets import load_dataset
12
 
13
+ # Define the transcribe_audio function
14
+ def transcribe_audio(audio):
15
+ input_features = processor(audio, return_tensors="pt").input_features
16
+ predicted_ids = model.generate(input_features)
17
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
18
+ return transcription[0]
19
+
20
  # load model and processor
21
  processor = WhisperProcessor.from_pretrained("openai/whisper-small")
22
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
 
25
  # load dummy dataset and read audio files
26
  ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
27
  sample = ds[0]["audio"]
 
 
 
 
 
 
 
 
28
 
29
  # Create Gradio interface
30
  audio_input = gr.Audio()