MusIre commited on
Commit
ce7e2d6
·
1 Parent(s): c19f1d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -8,24 +8,24 @@ subprocess.run(["pip", "install", "transformers"])
8
  subprocess.run(["pip", "install", "librosa", "soundfile"])
9
  subprocess.run(["pip", "install", "torch", "torchvision", "torchaudio", "-f", "https://download.pytorch.org/whl/torch_stable.html"])
10
 
 
11
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
12
- from datasets import load_dataset
13
 
14
- # Define the transcribe_audio function
15
- def transcribe_audio(audio):
16
- input_features = processor(audio, return_tensors="pt").input_features
17
- predicted_ids = model.generate(input_features)
18
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
19
- return transcription[0]
20
-
21
- # load model and processor
22
  processor = WhisperProcessor.from_pretrained("openai/whisper-small")
23
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
24
  forced_decoder_ids = processor.get_decoder_prompt_ids(language="italian", task="transcribe")
25
 
26
- # load dummy dataset and read audio files
27
- ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
28
- sample = ds[0]["audio"]
 
 
 
 
 
 
29
 
30
  # Create Gradio interface
31
  audio_input = gr.Audio()
 
8
  subprocess.run(["pip", "install", "librosa", "soundfile"])
9
  subprocess.run(["pip", "install", "torch", "torchvision", "torchaudio", "-f", "https://download.pytorch.org/whl/torch_stable.html"])
10
 
11
+ import gradio as gr
12
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
13
+ import numpy as np
14
 
15
+ # Load model and processor
 
 
 
 
 
 
 
16
  processor = WhisperProcessor.from_pretrained("openai/whisper-small")
17
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
18
  forced_decoder_ids = processor.get_decoder_prompt_ids(language="italian", task="transcribe")
19
 
20
+ def transcribe_audio(audio):
21
+ # Assuming sampling_rate is known
22
+ sampling_rate = 16000 # Change this to the actual sampling rate of your audio
23
+
24
+ # Ensure to pass the sampling_rate parameter
25
+ input_features = processor(audio, sampling_rate=sampling_rate, return_tensors="pt").input_features
26
+ predicted_ids = model.generate(input_features)
27
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
28
+ return transcription[0]
29
 
30
  # Create Gradio interface
31
  audio_input = gr.Audio()