muhtasham commited on
Commit
cc96a73
·
1 Parent(s): 478eee2
Files changed (2) hide show
  1. app.py +3 -147
  2. requirements.txt +0 -2
app.py CHANGED
@@ -1,19 +1,14 @@
1
  import spaces
2
  import torch
3
  import gradio as gr
4
- import yt_dlp as youtube_dl
5
  from transformers import pipeline
6
  from transformers.pipelines.audio_utils import ffmpeg_read
7
- import tempfile
8
- import os
9
- import time
10
  import subprocess
11
  from loguru import logger
12
 
13
  MODEL_NAME = "muhtasham/whisper-tg"
14
- BATCH_SIZE = 32
15
  FILE_LIMIT_MB = 1000
16
- YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
17
 
18
  # Check if ffmpeg is installed
19
  def check_ffmpeg():
@@ -48,130 +43,10 @@ def transcribe(inputs):
48
  start_time = chunk["timestamp"][0]
49
  end_time = chunk["timestamp"][1]
50
  text = chunk["text"].strip()
51
- timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text}")
52
 
53
  return result["text"], "\n".join(timestamps)
54
 
55
- def _return_yt_html_embed(yt_url):
56
- try:
57
- video_id = yt_url.split("?v=")[-1]
58
- HTML_str = (
59
- f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
60
- " </center>"
61
- )
62
- return HTML_str
63
- except Exception as e:
64
- logger.error(f"Error creating embed HTML: {str(e)}")
65
- raise gr.Error("Invalid YouTube URL format")
66
-
67
- def download_yt_audio(yt_url, filename):
68
- logger.info(f"Starting download for URL: {yt_url}")
69
-
70
- # Configure yt-dlp options with anti-bot detection measures
71
- ydl_opts = {
72
- "format": "bestaudio/best",
73
- "postprocessors": [{
74
- "key": "FFmpegExtractAudio",
75
- "preferredcodec": "mp3",
76
- "preferredquality": "192",
77
- }],
78
- "outtmpl": filename,
79
- "quiet": True,
80
- "no_warnings": True,
81
- "extract_flat": False,
82
- "force_generic_extractor": False,
83
- "nocheckcertificate": True,
84
- "ignoreerrors": False,
85
- "logtostderr": False,
86
- "verbose": False,
87
- # Anti-bot detection options
88
- "cookiesfrombrowser": ("chrome",),
89
- "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
90
- "http_headers": {
91
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
92
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
93
- "Accept-Language": "en-us,en;q=0.5",
94
- "Sec-Fetch-Mode": "navigate",
95
- },
96
- "socket_timeout": 30,
97
- "retries": 10,
98
- "fragment_retries": 10,
99
- "file_access_retries": 10,
100
- "extractor_retries": 10,
101
- "ignoreerrors": False,
102
- "no_warnings": True,
103
- "quiet": True,
104
- }
105
-
106
- try:
107
- # First, get video info without downloading
108
- with youtube_dl.YoutubeDL({"quiet": True}) as ydl:
109
- logger.info("Extracting video information...")
110
- info = ydl.extract_info(yt_url, download=False)
111
-
112
- # Check video duration
113
- file_length = info.get("duration_string", "0:00:00")
114
- file_h_m_s = file_length.split(":")
115
- file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
116
-
117
- if len(file_h_m_s) == 1:
118
- file_h_m_s.insert(0, 0)
119
- if len(file_h_m_s) == 2:
120
- file_h_m_s.insert(0, 0)
121
- file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
122
-
123
- if file_length_s > YT_LENGTH_LIMIT_S:
124
- yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
125
- file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
126
- raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
127
-
128
- # Check if video is age-restricted or private
129
- if info.get("age_limit") or info.get("is_private"):
130
- raise gr.Error("This video is age-restricted or private and cannot be processed.")
131
-
132
- logger.info("Video information extracted successfully")
133
-
134
- # Now download the audio
135
- logger.info("Starting audio download...")
136
- with youtube_dl.YoutubeDL(ydl_opts) as ydl:
137
- ydl.download([yt_url])
138
- logger.info("Audio download completed successfully")
139
-
140
- except youtube_dl.utils.DownloadError as err:
141
- logger.error(f"Download error: {str(err)}")
142
- raise gr.Error(f"Failed to download video: {str(err)}")
143
- except youtube_dl.utils.ExtractorError as err:
144
- logger.error(f"Extraction error: {str(err)}")
145
- raise gr.Error(f"Failed to extract video information: {str(err)}")
146
- except Exception as e:
147
- logger.error(f"Unexpected error: {str(e)}")
148
- raise gr.Error(f"An unexpected error occurred: {str(e)}")
149
-
150
- @spaces.GPU
151
- def yt_transcribe(yt_url):
152
- html_embed_str = _return_yt_html_embed(yt_url)
153
-
154
- with tempfile.TemporaryDirectory() as tmpdirname:
155
- filepath = os.path.join(tmpdirname, "video.mp4")
156
- download_yt_audio(yt_url, filepath)
157
- with open(filepath, "rb") as f:
158
- inputs = f.read()
159
-
160
- inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
161
- inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
162
-
163
- result = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps=True)
164
-
165
- # Format timestamps with text
166
- timestamps = []
167
- for chunk in result["chunks"]:
168
- start_time = chunk["timestamp"][0]
169
- end_time = chunk["timestamp"][1]
170
- text = chunk["text"].strip()
171
- timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text}")
172
-
173
- return html_embed_str, result["text"], "\n".join(timestamps)
174
-
175
  demo = gr.Blocks(theme=gr.themes.Ocean())
176
 
177
  mf_transcribe = gr.Interface(
@@ -210,27 +85,8 @@ file_transcribe = gr.Interface(
210
  allow_flagging="never",
211
  )
212
 
213
- yt_transcribe = gr.Interface(
214
- fn=yt_transcribe,
215
- inputs=[
216
- gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
217
- ],
218
- outputs=[
219
- gr.HTML(label="Video"),
220
- gr.Textbox(label="Transcription", lines=10),
221
- gr.Textbox(label="Timestamps", lines=10),
222
- ],
223
- title="Whisper Large V3: Transcribe YouTube",
224
- description=(
225
- "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
226
- f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe video files of"
227
- " arbitrary length."
228
- ),
229
- allow_flagging="never",
230
- )
231
-
232
  with demo:
233
- gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
234
 
235
  demo.queue().launch(ssr_mode=False)
236
 
 
1
  import spaces
2
  import torch
3
  import gradio as gr
 
4
  from transformers import pipeline
5
  from transformers.pipelines.audio_utils import ffmpeg_read
 
 
 
6
  import subprocess
7
  from loguru import logger
8
 
9
  MODEL_NAME = "muhtasham/whisper-tg"
10
+ BATCH_SIZE = 8
11
  FILE_LIMIT_MB = 1000
 
12
 
13
  # Check if ffmpeg is installed
14
  def check_ffmpeg():
 
43
  start_time = chunk["timestamp"][0]
44
  end_time = chunk["timestamp"][1]
45
  text = chunk["text"].strip()
46
+ timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text} \n \n")
47
 
48
  return result["text"], "\n".join(timestamps)
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  demo = gr.Blocks(theme=gr.themes.Ocean())
51
 
52
  mf_transcribe = gr.Interface(
 
85
  allow_flagging="never",
86
  )
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  with demo:
89
+ gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
90
 
91
  demo.queue().launch(ssr_mode=False)
92
 
requirements.txt CHANGED
@@ -1,4 +1,2 @@
1
  transformers
2
- yt-dlp
3
  loguru
4
- browser-cookie3
 
1
  transformers
 
2
  loguru