muhtasham commited on
Commit
478eee2
·
1 Parent(s): 7b9063f
Files changed (1) hide show
  1. app.py +56 -41
app.py CHANGED
@@ -9,10 +9,9 @@ import os
9
  import time
10
  import subprocess
11
  from loguru import logger
12
- import browser_cookie3 # Add this import for browser cookies
13
 
14
  MODEL_NAME = "muhtasham/whisper-tg"
15
- BATCH_SIZE = 8
16
  FILE_LIMIT_MB = 1000
17
  YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
18
 
@@ -37,12 +36,21 @@ pipe = pipeline(
37
  )
38
 
39
  @spaces.GPU
40
- def transcribe(inputs, task):
41
  if inputs is None:
42
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
43
 
44
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
45
- return text
 
 
 
 
 
 
 
 
 
46
 
47
  def _return_yt_html_embed(yt_url):
48
  try:
@@ -56,33 +64,10 @@ def _return_yt_html_embed(yt_url):
56
  logger.error(f"Error creating embed HTML: {str(e)}")
57
  raise gr.Error("Invalid YouTube URL format")
58
 
59
- def get_youtube_cookies():
60
- """Get YouTube cookies from the browser"""
61
- try:
62
- # Try Chrome first
63
- cookies = browser_cookie3.chrome(domain_name='.youtube.com')
64
- except:
65
- try:
66
- # Try Firefox if Chrome fails
67
- cookies = browser_cookie3.firefox(domain_name='.youtube.com')
68
- except:
69
- try:
70
- # Try Safari if Firefox fails
71
- cookies = browser_cookie3.safari(domain_name='.youtube.com')
72
- except Exception as e:
73
- logger.warning(f"Could not get browser cookies: {str(e)}")
74
- return None
75
-
76
- # Convert cookies to the format yt-dlp expects
77
- return {cookie.name: cookie.value for cookie in cookies}
78
-
79
  def download_yt_audio(yt_url, filename):
80
  logger.info(f"Starting download for URL: {yt_url}")
81
 
82
- # Get YouTube cookies
83
- cookies = get_youtube_cookies()
84
-
85
- # Configure yt-dlp options
86
  ydl_opts = {
87
  "format": "bestaudio/best",
88
  "postprocessors": [{
@@ -99,13 +84,25 @@ def download_yt_audio(yt_url, filename):
99
  "ignoreerrors": False,
100
  "logtostderr": False,
101
  "verbose": False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  }
103
 
104
- # Add cookies if available
105
- if cookies:
106
- ydl_opts["cookiesfrombrowser"] = ("chrome",) # or "firefox" or "safari"
107
- logger.info("Using browser cookies for YouTube authentication")
108
-
109
  try:
110
  # First, get video info without downloading
111
  with youtube_dl.YoutubeDL({"quiet": True}) as ydl:
@@ -151,7 +148,7 @@ def download_yt_audio(yt_url, filename):
151
  raise gr.Error(f"An unexpected error occurred: {str(e)}")
152
 
153
  @spaces.GPU
154
- def yt_transcribe(yt_url, task, max_filesize=75.0):
155
  html_embed_str = _return_yt_html_embed(yt_url)
156
 
157
  with tempfile.TemporaryDirectory() as tmpdirname:
@@ -163,9 +160,17 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
163
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
164
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
165
 
166
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
167
-
168
- return html_embed_str, text
 
 
 
 
 
 
 
 
169
 
170
  demo = gr.Blocks(theme=gr.themes.Ocean())
171
 
@@ -174,7 +179,10 @@ mf_transcribe = gr.Interface(
174
  inputs=[
175
  gr.Audio(sources="microphone", type="filepath"),
176
  ],
177
- outputs="text",
 
 
 
178
  title="Whisper Large V3 Turbo: Transcribe Audio",
179
  description=(
180
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
@@ -189,7 +197,10 @@ file_transcribe = gr.Interface(
189
  inputs=[
190
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
191
  ],
192
- outputs="text",
 
 
 
193
  title="Whisper Large V3: Transcribe Audio",
194
  description=(
195
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
@@ -204,7 +215,11 @@ yt_transcribe = gr.Interface(
204
  inputs=[
205
  gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
206
  ],
207
- outputs=["html", "text"],
 
 
 
 
208
  title="Whisper Large V3: Transcribe YouTube",
209
  description=(
210
  "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
 
9
  import time
10
  import subprocess
11
  from loguru import logger
 
12
 
13
  MODEL_NAME = "muhtasham/whisper-tg"
14
+ BATCH_SIZE = 32
15
  FILE_LIMIT_MB = 1000
16
  YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
17
 
 
36
  )
37
 
38
  @spaces.GPU
39
+ def transcribe(inputs):
40
  if inputs is None:
41
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
42
 
43
+ result = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps=True)
44
+
45
+ # Format timestamps with text
46
+ timestamps = []
47
+ for chunk in result["chunks"]:
48
+ start_time = chunk["timestamp"][0]
49
+ end_time = chunk["timestamp"][1]
50
+ text = chunk["text"].strip()
51
+ timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text}")
52
+
53
+ return result["text"], "\n".join(timestamps)
54
 
55
  def _return_yt_html_embed(yt_url):
56
  try:
 
64
  logger.error(f"Error creating embed HTML: {str(e)}")
65
  raise gr.Error("Invalid YouTube URL format")
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def download_yt_audio(yt_url, filename):
68
  logger.info(f"Starting download for URL: {yt_url}")
69
 
70
+ # Configure yt-dlp options with anti-bot detection measures
 
 
 
71
  ydl_opts = {
72
  "format": "bestaudio/best",
73
  "postprocessors": [{
 
84
  "ignoreerrors": False,
85
  "logtostderr": False,
86
  "verbose": False,
87
+ # Anti-bot detection options
88
+ "cookiesfrombrowser": ("chrome",),
89
+ "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
90
+ "http_headers": {
91
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
92
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
93
+ "Accept-Language": "en-us,en;q=0.5",
94
+ "Sec-Fetch-Mode": "navigate",
95
+ },
96
+ "socket_timeout": 30,
97
+ "retries": 10,
98
+ "fragment_retries": 10,
99
+ "file_access_retries": 10,
100
+ "extractor_retries": 10,
101
+ "ignoreerrors": False,
102
+ "no_warnings": True,
103
+ "quiet": True,
104
  }
105
 
 
 
 
 
 
106
  try:
107
  # First, get video info without downloading
108
  with youtube_dl.YoutubeDL({"quiet": True}) as ydl:
 
148
  raise gr.Error(f"An unexpected error occurred: {str(e)}")
149
 
150
  @spaces.GPU
151
+ def yt_transcribe(yt_url):
152
  html_embed_str = _return_yt_html_embed(yt_url)
153
 
154
  with tempfile.TemporaryDirectory() as tmpdirname:
 
160
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
161
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
162
 
163
+ result = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps=True)
164
+
165
+ # Format timestamps with text
166
+ timestamps = []
167
+ for chunk in result["chunks"]:
168
+ start_time = chunk["timestamp"][0]
169
+ end_time = chunk["timestamp"][1]
170
+ text = chunk["text"].strip()
171
+ timestamps.append(f"[{start_time:.2f}s - {end_time:.2f}s] {text}")
172
+
173
+ return html_embed_str, result["text"], "\n".join(timestamps)
174
 
175
  demo = gr.Blocks(theme=gr.themes.Ocean())
176
 
 
179
  inputs=[
180
  gr.Audio(sources="microphone", type="filepath"),
181
  ],
182
+ outputs=[
183
+ gr.Textbox(label="Transcription", lines=10),
184
+ gr.Textbox(label="Timestamps", lines=10),
185
+ ],
186
  title="Whisper Large V3 Turbo: Transcribe Audio",
187
  description=(
188
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
 
197
  inputs=[
198
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
199
  ],
200
+ outputs=[
201
+ gr.Textbox(label="Transcription", lines=10),
202
+ gr.Textbox(label="Timestamps", lines=10),
203
+ ],
204
  title="Whisper Large V3: Transcribe Audio",
205
  description=(
206
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
 
215
  inputs=[
216
  gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
217
  ],
218
+ outputs=[
219
+ gr.HTML(label="Video"),
220
+ gr.Textbox(label="Transcription", lines=10),
221
+ gr.Textbox(label="Timestamps", lines=10),
222
+ ],
223
  title="Whisper Large V3: Transcribe YouTube",
224
  description=(
225
  "Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"