Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -104,12 +104,32 @@ def download_audio(url, cookies_file_path=None):
|
|
104 |
'outtmpl': output_path + '.%(ext)s',
|
105 |
'quiet': True,
|
106 |
'no_warnings': True,
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
}
|
108 |
|
109 |
# Add cookies file if provided
|
110 |
if cookies_file_path and os.path.exists(cookies_file_path):
|
111 |
ydl_opts['cookiefile'] = cookies_file_path
|
112 |
print(f"Using cookies file: {cookies_file_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
with YoutubeDL(ydl_opts) as ydl:
|
115 |
info_dict = ydl.extract_info(url, download=True)
|
@@ -124,7 +144,10 @@ def download_audio(url, cookies_file_path=None):
|
|
124 |
raise FileNotFoundError(f"Downloaded audio file not found")
|
125 |
|
126 |
except Exception as e:
|
127 |
-
|
|
|
|
|
|
|
128 |
|
129 |
def transcribe_audio(file_path):
|
130 |
"""Transcribe audio file using Whisper"""
|
@@ -352,11 +375,17 @@ with gr.Blocks(
|
|
352 |
)
|
353 |
|
354 |
gr.Markdown("""
|
355 |
-
**How to get cookies.txt:**
|
356 |
-
1. Install browser extension
|
357 |
-
2. Visit YouTube in your browser (logged in)
|
358 |
-
3.
|
359 |
4. Upload the downloaded cookies.txt file here
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
""")
|
361 |
|
362 |
url_input = gr.Textbox(
|
@@ -380,10 +409,11 @@ with gr.Blocks(
|
|
380 |
|
381 |
gr.Markdown("""
|
382 |
### 💡 Tips:
|
383 |
-
-
|
384 |
- Works best with financial YouTube channels
|
385 |
- Ensure video has clear audio
|
386 |
- English content works best
|
|
|
387 |
""")
|
388 |
|
389 |
with gr.Row():
|
|
|
104 |
'outtmpl': output_path + '.%(ext)s',
|
105 |
'quiet': True,
|
106 |
'no_warnings': True,
|
107 |
+
# Anti-bot detection measures
|
108 |
+
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
109 |
+
'referer': 'https://www.youtube.com/',
|
110 |
+
'extractor_retries': 3,
|
111 |
+
'fragment_retries': 3,
|
112 |
+
'retry_sleep_functions': {'http': lambda n: 2 ** n},
|
113 |
}
|
114 |
|
115 |
# Add cookies file if provided
|
116 |
if cookies_file_path and os.path.exists(cookies_file_path):
|
117 |
ydl_opts['cookiefile'] = cookies_file_path
|
118 |
print(f"Using cookies file: {cookies_file_path}")
|
119 |
+
else:
|
120 |
+
print("No cookies file provided - may encounter bot detection")
|
121 |
+
# Additional headers without cookies
|
122 |
+
ydl_opts.update({
|
123 |
+
'headers': {
|
124 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
125 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
126 |
+
'Accept-Language': 'en-us,en;q=0.5',
|
127 |
+
'Accept-Encoding': 'gzip,deflate',
|
128 |
+
'DNT': '1',
|
129 |
+
'Connection': 'keep-alive',
|
130 |
+
'Upgrade-Insecure-Requests': '1',
|
131 |
+
}
|
132 |
+
})
|
133 |
|
134 |
with YoutubeDL(ydl_opts) as ydl:
|
135 |
info_dict = ydl.extract_info(url, download=True)
|
|
|
144 |
raise FileNotFoundError(f"Downloaded audio file not found")
|
145 |
|
146 |
except Exception as e:
|
147 |
+
if "403" in str(e) or "Forbidden" in str(e):
|
148 |
+
raise Exception(f"YouTube blocked the request (403 Forbidden). Please upload your cookies.txt file to bypass bot detection. Original error: {str(e)}")
|
149 |
+
else:
|
150 |
+
raise Exception(f"Failed to download audio: {str(e)}")
|
151 |
|
152 |
def transcribe_audio(file_path):
|
153 |
"""Transcribe audio file using Whisper"""
|
|
|
375 |
)
|
376 |
|
377 |
gr.Markdown("""
|
378 |
+
**How to get cookies.txt to fix 403 Forbidden errors:**
|
379 |
+
1. Install browser extension: "Get cookies.txt LOCALLY"
|
380 |
+
2. Visit YouTube in your browser (while logged in)
|
381 |
+
3. Click the extension icon and export cookies for youtube.com
|
382 |
4. Upload the downloaded cookies.txt file here
|
383 |
+
|
384 |
+
**Alternative extensions:**
|
385 |
+
- "cookies.txt" (Chrome/Firefox)
|
386 |
+
- "Export Cookies" (Chrome)
|
387 |
+
|
388 |
+
⚠️ **Important**: Without cookies, you'll likely get 403 Forbidden errors
|
389 |
""")
|
390 |
|
391 |
url_input = gr.Textbox(
|
|
|
409 |
|
410 |
gr.Markdown("""
|
411 |
### 💡 Tips:
|
412 |
+
- **MUST upload cookies.txt** to avoid 403 Forbidden errors
|
413 |
- Works best with financial YouTube channels
|
414 |
- Ensure video has clear audio
|
415 |
- English content works best
|
416 |
+
- Try shorter videos first (under 10 minutes)
|
417 |
""")
|
418 |
|
419 |
with gr.Row():
|