Staticaliza commited on
Commit
0bc0be3
·
verified ·
1 Parent(s): a3c1698

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -38
app.py CHANGED
@@ -4,48 +4,42 @@ import spaces
4
  import torch
5
 
6
  from transformers import pipeline
7
- from transformers.pipelines.audio_utils import ffmpeg_read
8
 
9
- import tempfile
10
- import os
 
 
 
11
 
12
- MODEL_NAME = "openai/whisper-large-v3-turbo"
13
  BATCH_SIZE = 8
14
 
15
- device = 0 if torch.cuda.is_available() else "cpu"
16
-
17
- pipe = pipeline(task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=30, device=device,)
18
-
19
 
20
  @spaces.GPU
21
  def transcribe(inputs, task):
22
- if inputs is None:
23
- raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
24
-
25
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
26
- return text
27
-
28
-
29
- demo = gr.Blocks(theme=gr.themes.Ocean())
30
-
31
-
32
- file_transcribe = gr.Interface(
33
- fn=transcribe,
34
- inputs=[
35
- gr.Audio(sources="upload", type="filepath", label="Audio file"),
36
- gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
37
- ],
38
- outputs="text",
39
- title="Whisper Large V3: Transcribe Audio",
40
- description=(
41
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
42
- f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
43
- " of arbitrary length."
44
- ),
45
- allow_flagging="never",
46
- )
47
-
48
- with demo:
49
- gr.TabbedInterface([file_transcribe], ["Audio file"])
50
-
51
- demo.queue().launch(ssr_mode=False)
 
4
  import torch
5
 
6
  from transformers import pipeline
 
7
 
8
+ # Pre-Initialize
9
+ DEVICE = "auto"
10
+ if DEVICE == "auto":
11
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
12
+ print(f"[SYSTEM] | Using {DEVICE} type compute device.")
13
 
14
+ # Variables
15
  BATCH_SIZE = 8
16
 
17
+ pipe = pipeline(task="automatic-speech-recognition", model="openai/whisper-large-v3-turbo", chunk_length_s=30, device=device)
 
 
 
18
 
19
  @spaces.GPU
20
  def transcribe(inputs, task):
21
+ if inputs is None: raise gr.Error("Invalid input.")
22
+ output = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
23
+ return output
24
+
25
+ def cloud():
26
+ print("[CLOUD] | Space maintained.")
27
+
28
+ # Initialize
29
+ with gr.Blocks(css=css) as main:
30
+ with gr.Column():
31
+ gr.Markdown("🪄 Transcribe audio to text.")
32
+
33
+ with gr.Column():
34
+ input = gr.Audio(sources="upload", type="filepath", label="Input"),
35
+ type = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
36
+ submit = gr.Button("▶")
37
+ maintain = gr.Button("☁️")
38
+
39
+ with gr.Column():
40
+ output = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Output")
41
+
42
+ submit.click(transcribe, inputs=[input, type], outputs=[output], queue=False)
43
+ maintain.click(cloud, inputs=[], outputs=[], queue=False)
44
+
45
+ main.launch(show_api=True)