MusIre commited on
Commit
88dc3ba
·
1 Parent(s): 6c09812

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -0
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+
3
+ subprocess.run(["pip", "install", "datasets"])
4
+ subprocess.run(["pip", "install", "transformers"])
5
+ subprocess.run(["pip", "install", "torch", "torchvision", "torchaudio", "-f", "https://download.pytorch.org/whl/torch_stable.html"])
6
+
7
+ import gradio as gr
8
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
9
+
10
+ # Load model and processor
11
+ processor = WhisperProcessor.from_pretrained("openai/whisper-large")
12
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
13
+ forced_decoder_ids = processor.get_decoder_prompt_ids(language="italian", task="transcribe")
14
+
15
+
16
+ # Custom preprocessing function
17
+ def preprocess_audio(audio_data):
18
+ # Apply any custom preprocessing to the audio data here if needed
19
+ return processor(audio_data, return_tensors="pt").input_features
20
+
21
+ # Function to perform ASR on audio data
22
+ def transcribe_audio(input_features):
23
+ # Generate token ids
24
+ predicted_ids = model.generate(input_features)
25
+
26
+ # Decode token ids to text
27
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
28
+
29
+ return transcription[0]
30
+
31
+ # Create Gradio interface
32
+ audio_input = gr.Audio(preprocess=preprocess_audio)
33
+ gr.Interface(fn=transcribe_audio, inputs=audio_input, outputs="text").launch()