Ivan000 commited on
Commit
ff83bcc
·
verified ·
1 Parent(s): 53dacfb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ # =============
3
+ # This is a complete app.py file for an automatic speech recognition app using the openai/whisper-large-v3-turbo model.
4
+ # The app is built using Gradio and Hugging Face Transformers, and it runs on the CPU to avoid video memory usage.
5
+
6
+ import torch
7
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
8
+ import gradio as gr
9
+
10
+ # Set device to CPU
11
+ device = "cpu"
12
+ torch_dtype = torch.float32
13
+
14
+ # Load the model and processor
15
+ model_id = "openai/whisper-large-v3-turbo"
16
+
17
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
18
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
19
+ )
20
+ model.to(device)
21
+
22
+ processor = AutoProcessor.from_pretrained(model_id)
23
+
24
+ # Create the ASR pipeline
25
+ pipe = pipeline(
26
+ "automatic-speech-recognition",
27
+ model=model,
28
+ tokenizer=processor.tokenizer,
29
+ feature_extractor=processor.feature_extractor,
30
+ torch_dtype=torch_dtype,
31
+ device=device,
32
+ )
33
+
34
+ def transcribe_audio(audio_file):
35
+ """
36
+ Transcribe the given audio file using the Whisper model.
37
+
38
+ Parameters:
39
+ audio_file (str): Path to the audio file.
40
+
41
+ Returns:
42
+ str: Transcribed text.
43
+ """
44
+ result = pipe(audio_file)
45
+ return result["text"]
46
+
47
+ # Define the Gradio interface
48
+ iface = gr.Interface(
49
+ fn=transcribe_audio,
50
+ inputs=gr.inputs.Audio(source="upload", type="filepath"),
51
+ outputs="text",
52
+ title="Whisper ASR Demo",
53
+ description="Upload an audio file and get the transcribed text using the openai/whisper-large-v3-turbo model.",
54
+ )
55
+
56
+ # Launch the Gradio app
57
+ if __name__ == "__main__":
58
+ iface.launch()