Staticaliza commited on
Commit
4946e6a
Β·
verified Β·
1 Parent(s): 5498c5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -1,7 +1,7 @@
 
1
  import torch
2
 
3
  import gradio as gr
4
- import spaces
5
  import yt_dlp as youtube_dl
6
  from transformers import pipeline
7
  from transformers.pipelines.audio_utils import ffmpeg_read
@@ -9,7 +9,7 @@ from transformers.pipelines.audio_utils import ffmpeg_read
9
  import tempfile
10
  import os
11
 
12
- MODEL_NAME = "openai/whisper-large-v3"
13
  BATCH_SIZE = 8
14
  FILE_LIMIT_MB = 1000
15
  YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
@@ -23,6 +23,7 @@ pipe = pipeline(
23
  device=device,
24
  )
25
 
 
26
  @spaces.GPU
27
  def transcribe(inputs, task):
28
  if inputs is None:
@@ -71,7 +72,7 @@ def download_yt_audio(yt_url, filename):
71
  except youtube_dl.utils.ExtractorError as err:
72
  raise gr.Error(str(err))
73
 
74
-
75
  def yt_transcribe(yt_url, task, max_filesize=75.0):
76
  html_embed_str = _return_yt_html_embed(yt_url)
77
 
@@ -94,14 +95,13 @@ demo = gr.Blocks()
94
  mf_transcribe = gr.Interface(
95
  fn=transcribe,
96
  inputs=[
97
- gr.Audio(type="filepath"),
98
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
99
  ],
100
  outputs="text",
101
- theme="huggingface",
102
- title="Whisper Large V3: Transcribe Audio",
103
  description=(
104
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the OpenAI Whisper"
105
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and πŸ€— Transformers to transcribe audio files"
106
  " of arbitrary length."
107
  ),
@@ -111,14 +111,13 @@ mf_transcribe = gr.Interface(
111
  file_transcribe = gr.Interface(
112
  fn=transcribe,
113
  inputs=[
114
- gr.Audio(type="filepath", label="Audio file"),
115
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
116
  ],
117
  outputs="text",
118
- theme="huggingface",
119
  title="Whisper Large V3: Transcribe Audio",
120
  description=(
121
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the OpenAI Whisper"
122
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and πŸ€— Transformers to transcribe audio files"
123
  " of arbitrary length."
124
  ),
@@ -132,10 +131,9 @@ yt_transcribe = gr.Interface(
132
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
133
  ],
134
  outputs=["html", "text"],
135
- theme="huggingface",
136
  title="Whisper Large V3: Transcribe YouTube",
137
  description=(
138
- "Transcribe long-form YouTube videos with the click of a button! Demo uses the OpenAI Whisper checkpoint"
139
  f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and πŸ€— Transformers to transcribe video files of"
140
  " arbitrary length."
141
  ),
 
1
+ import spaces
2
  import torch
3
 
4
  import gradio as gr
 
5
  import yt_dlp as youtube_dl
6
  from transformers import pipeline
7
  from transformers.pipelines.audio_utils import ffmpeg_read
 
9
  import tempfile
10
  import os
11
 
12
+ MODEL_NAME = "ylacombe/whisper-large-v3-turbo"
13
  BATCH_SIZE = 8
14
  FILE_LIMIT_MB = 1000
15
  YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
 
23
  device=device,
24
  )
25
 
26
+
27
  @spaces.GPU
28
  def transcribe(inputs, task):
29
  if inputs is None:
 
72
  except youtube_dl.utils.ExtractorError as err:
73
  raise gr.Error(str(err))
74
 
75
+ @spaces.GPU
76
  def yt_transcribe(yt_url, task, max_filesize=75.0):
77
  html_embed_str = _return_yt_html_embed(yt_url)
78
 
 
95
  mf_transcribe = gr.Interface(
96
  fn=transcribe,
97
  inputs=[
98
+ gr.Audio(sources="microphone", type="filepath"),
99
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
100
  ],
101
  outputs="text",
102
+ title="Whisper Large V3 Turbo: Transcribe Audio",
 
103
  description=(
104
+ "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
105
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and πŸ€— Transformers to transcribe audio files"
106
  " of arbitrary length."
107
  ),
 
111
  file_transcribe = gr.Interface(
112
  fn=transcribe,
113
  inputs=[
114
+ gr.Audio(sources="upload", type="filepath", label="Audio file"),
115
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
116
  ],
117
  outputs="text",
 
118
  title="Whisper Large V3: Transcribe Audio",
119
  description=(
120
+ "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
121
  f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and πŸ€— Transformers to transcribe audio files"
122
  " of arbitrary length."
123
  ),
 
131
  gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
132
  ],
133
  outputs=["html", "text"],
 
134
  title="Whisper Large V3: Transcribe YouTube",
135
  description=(
136
+ "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
137
  f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and πŸ€— Transformers to transcribe video files of"
138
  " arbitrary length."
139
  ),