developer28 commited on
Commit
a6273c7
·
verified ·
1 Parent(s): d33e944

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -6
app.py CHANGED
@@ -104,12 +104,32 @@ def download_audio(url, cookies_file_path=None):
104
  'outtmpl': output_path + '.%(ext)s',
105
  'quiet': True,
106
  'no_warnings': True,
 
 
 
 
 
 
107
  }
108
 
109
  # Add cookies file if provided
110
  if cookies_file_path and os.path.exists(cookies_file_path):
111
  ydl_opts['cookiefile'] = cookies_file_path
112
  print(f"Using cookies file: {cookies_file_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  with YoutubeDL(ydl_opts) as ydl:
115
  info_dict = ydl.extract_info(url, download=True)
@@ -124,7 +144,10 @@ def download_audio(url, cookies_file_path=None):
124
  raise FileNotFoundError(f"Downloaded audio file not found")
125
 
126
  except Exception as e:
127
- raise Exception(f"Failed to download audio: {str(e)}")
 
 
 
128
 
129
  def transcribe_audio(file_path):
130
  """Transcribe audio file using Whisper"""
@@ -352,11 +375,17 @@ with gr.Blocks(
352
  )
353
 
354
  gr.Markdown("""
355
- **How to get cookies.txt:**
356
- 1. Install browser extension like "Get cookies.txt LOCALLY"
357
- 2. Visit YouTube in your browser (logged in)
358
- 3. Export cookies for youtube.com
359
  4. Upload the downloaded cookies.txt file here
 
 
 
 
 
 
360
  """)
361
 
362
  url_input = gr.Textbox(
@@ -380,10 +409,11 @@ with gr.Blocks(
380
 
381
  gr.Markdown("""
382
  ### 💡 Tips:
383
- - Upload cookies.txt to avoid bot detection
384
  - Works best with financial YouTube channels
385
  - Ensure video has clear audio
386
  - English content works best
 
387
  """)
388
 
389
  with gr.Row():
 
104
  'outtmpl': output_path + '.%(ext)s',
105
  'quiet': True,
106
  'no_warnings': True,
107
+ # Anti-bot detection measures
108
+ 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
109
+ 'referer': 'https://www.youtube.com/',
110
+ 'extractor_retries': 3,
111
+ 'fragment_retries': 3,
112
+ 'retry_sleep_functions': {'http': lambda n: 2 ** n},
113
  }
114
 
115
  # Add cookies file if provided
116
  if cookies_file_path and os.path.exists(cookies_file_path):
117
  ydl_opts['cookiefile'] = cookies_file_path
118
  print(f"Using cookies file: {cookies_file_path}")
119
+ else:
120
+ print("No cookies file provided - may encounter bot detection")
121
+ # Additional headers without cookies
122
+ ydl_opts.update({
123
+ 'headers': {
124
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
125
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
126
+ 'Accept-Language': 'en-us,en;q=0.5',
127
+ 'Accept-Encoding': 'gzip,deflate',
128
+ 'DNT': '1',
129
+ 'Connection': 'keep-alive',
130
+ 'Upgrade-Insecure-Requests': '1',
131
+ }
132
+ })
133
 
134
  with YoutubeDL(ydl_opts) as ydl:
135
  info_dict = ydl.extract_info(url, download=True)
 
144
  raise FileNotFoundError(f"Downloaded audio file not found")
145
 
146
  except Exception as e:
147
+ if "403" in str(e) or "Forbidden" in str(e):
148
+ raise Exception(f"YouTube blocked the request (403 Forbidden). Please upload your cookies.txt file to bypass bot detection. Original error: {str(e)}")
149
+ else:
150
+ raise Exception(f"Failed to download audio: {str(e)}")
151
 
152
  def transcribe_audio(file_path):
153
  """Transcribe audio file using Whisper"""
 
375
  )
376
 
377
  gr.Markdown("""
378
+ **How to get cookies.txt to fix 403 Forbidden errors:**
379
+ 1. Install browser extension: "Get cookies.txt LOCALLY"
380
+ 2. Visit YouTube in your browser (while logged in)
381
+ 3. Click the extension icon and export cookies for youtube.com
382
  4. Upload the downloaded cookies.txt file here
383
+
384
+ **Alternative extensions:**
385
+ - "cookies.txt" (Chrome/Firefox)
386
+ - "Export Cookies" (Chrome)
387
+
388
+ ⚠️ **Important**: Without cookies, you'll likely get 403 Forbidden errors
389
  """)
390
 
391
  url_input = gr.Textbox(
 
409
 
410
  gr.Markdown("""
411
  ### 💡 Tips:
412
+ - **MUST upload cookies.txt** to avoid 403 Forbidden errors
413
  - Works best with financial YouTube channels
414
  - Ensure video has clear audio
415
  - English content works best
416
+ - Try shorter videos first (under 10 minutes)
417
  """)
418
 
419
  with gr.Row():