developer28 commited on
Commit
24a0a36
Β·
verified Β·
1 Parent(s): 4355e8e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -25
app.py CHANGED
@@ -58,12 +58,25 @@ def download_audio(url):
58
  'ignoreerrors': False,
59
  # Add user agent to avoid bot detection
60
  'http_headers': {
61
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
62
  },
63
  # Add additional options to avoid bot detection
64
  'extractor_retries': 3,
65
  'fragment_retries': 3,
66
- 'retry_sleep_functions': {'http': lambda n: 2 ** n},
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  }
68
 
69
  # Add cookies if available
@@ -74,19 +87,32 @@ def download_audio(url):
74
  print("No cookies.txt found - proceeding without cookies")
75
 
76
  with YoutubeDL(ydl_opts) as ydl:
77
- # Extract info first to check if video is available
78
- info_dict = ydl.extract_info(url, download=False)
79
-
80
- # Check if video is available
81
- if info_dict.get('availability') == 'private':
82
- raise Exception("Video is private")
83
- elif info_dict.get('availability') == 'premium_only':
84
- raise Exception("Video requires premium subscription")
85
- elif info_dict.get('live_status') == 'is_live':
86
- raise Exception("Cannot download live streams")
87
-
88
- # Download the audio
89
- ydl.download([url])
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  # Find the downloaded file
92
  for ext in ['.m4a', '.webm', '.mp4', '.mp3']:
@@ -100,15 +126,55 @@ def download_audio(url):
100
  except Exception as e:
101
  error_msg = str(e)
102
  if "Sign in to confirm your age" in error_msg:
103
- raise Exception("Video is age-restricted. Please use a different video or update your cookies.")
104
  elif "Private video" in error_msg:
105
- raise Exception("Video is private and cannot be accessed.")
106
- elif "This video is unavailable" in error_msg:
107
- raise Exception("Video is unavailable or has been removed.")
 
 
108
  elif "blocked" in error_msg.lower():
109
- raise Exception("Access to this video is blocked. Try using updated cookies or a different video.")
 
 
 
 
110
  else:
111
- raise Exception(f"Failed to download audio: {error_msg}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  def transcribe_audio(file_path):
114
  """Transcribe audio file using Whisper"""
@@ -329,20 +395,49 @@ with gr.Blocks(
329
  lines=2
330
  )
331
 
332
- process_btn = gr.Button(
333
- "πŸš€ Extract Stock Information",
334
- variant="primary",
335
- size="lg"
 
 
 
 
 
 
 
 
 
 
 
336
  )
337
 
338
  gr.Markdown("""
339
  ### πŸ’‘ Tips:
 
340
  - Works best with financial YouTube channels
341
  - Ensure video has clear audio
342
  - English content works best
343
  - If you get bot detection errors, try updating cookies.txt
 
 
 
 
344
  """)
345
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  with gr.Row():
347
  with gr.Column():
348
  transcript_output = gr.Textbox(
 
58
  'ignoreerrors': False,
59
  # Add user agent to avoid bot detection
60
  'http_headers': {
61
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
62
  },
63
  # Add additional options to avoid bot detection
64
  'extractor_retries': 3,
65
  'fragment_retries': 3,
66
+ 'retry_sleep_functions': {'http': lambda n: min(2 ** n, 30)},
67
+ # Add geo bypass options
68
+ 'geo_bypass': True,
69
+ 'geo_bypass_country': 'US',
70
+ # Add more headers
71
+ 'http_headers': {
72
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
73
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
74
+ 'Accept-Language': 'en-us,en;q=0.5',
75
+ 'Accept-Encoding': 'gzip,deflate',
76
+ 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
77
+ 'Connection': 'keep-alive',
78
+ 'Upgrade-Insecure-Requests': '1',
79
+ }
80
  }
81
 
82
  # Add cookies if available
 
87
  print("No cookies.txt found - proceeding without cookies")
88
 
89
  with YoutubeDL(ydl_opts) as ydl:
90
+ try:
91
+ # Extract info first to check if video is available
92
+ info_dict = ydl.extract_info(url, download=False)
93
+
94
+ # Check if video is available
95
+ if info_dict.get('availability') == 'private':
96
+ raise Exception("Video is private and cannot be accessed")
97
+ elif info_dict.get('availability') == 'premium_only':
98
+ raise Exception("Video requires premium subscription")
99
+ elif info_dict.get('availability') == 'subscriber_only':
100
+ raise Exception("Video is only available to channel subscribers")
101
+ elif info_dict.get('availability') == 'needs_auth':
102
+ raise Exception("Video requires authentication - try updating cookies")
103
+ elif info_dict.get('live_status') == 'is_live':
104
+ raise Exception("Cannot download live streams")
105
+ elif info_dict.get('live_status') == 'was_live':
106
+ print("Note: This was a live stream, trying to download recorded version...")
107
+
108
+ # Download the audio
109
+ ydl.download([url])
110
+
111
+ except Exception as extract_error:
112
+ # If extract_info fails, try direct download as fallback
113
+ print(f"Info extraction failed: {extract_error}")
114
+ print("Attempting direct download...")
115
+ ydl.download([url])
116
 
117
  # Find the downloaded file
118
  for ext in ['.m4a', '.webm', '.mp4', '.mp3']:
 
126
  except Exception as e:
127
  error_msg = str(e)
128
  if "Sign in to confirm your age" in error_msg:
129
+ raise Exception("❌ Video is age-restricted. Please use a different video or update your cookies with an authenticated session.")
130
  elif "Private video" in error_msg:
131
+ raise Exception("❌ Video is private and cannot be accessed.")
132
+ elif "This video is unavailable" in error_msg or "Video unavailable" in error_msg:
133
+ raise Exception("❌ Video is unavailable. This could be due to:\nβ€’ Geographic restrictions\nβ€’ Content removed by uploader\nβ€’ Copyright issues\nβ€’ Try a different video")
134
+ elif "This content isn't available" in error_msg:
135
+ raise Exception("❌ Content not available in your region or has been restricted. Try:\nβ€’ Using a VPN\nβ€’ Different video\nβ€’ Updating cookies")
136
  elif "blocked" in error_msg.lower():
137
+ raise Exception("❌ Access blocked. Try using updated cookies or a different video.")
138
+ elif "HTTP Error 403" in error_msg:
139
+ raise Exception("❌ Access forbidden. Video may be region-locked or require authentication.")
140
+ elif "HTTP Error 404" in error_msg:
141
+ raise Exception("❌ Video not found. It may have been deleted or the URL is incorrect.")
142
  else:
143
+ raise Exception(f"❌ Download failed: {error_msg}")
144
+
145
+ def test_video_access(url):
146
+ """Test if a video is accessible without downloading"""
147
+ try:
148
+ cookies_path = get_cookies_path()
149
+ ydl_opts = {
150
+ 'quiet': True,
151
+ 'no_warnings': True,
152
+ 'extract_flat': False,
153
+ 'skip_download': True,
154
+ }
155
+
156
+ if cookies_path:
157
+ ydl_opts['cookiefile'] = cookies_path
158
+
159
+ with YoutubeDL(ydl_opts) as ydl:
160
+ info_dict = ydl.extract_info(url, download=False)
161
+
162
+ status = "βœ… Video accessible"
163
+ details = []
164
+
165
+ if info_dict.get('title'):
166
+ details.append(f"Title: {info_dict['title'][:50]}...")
167
+ if info_dict.get('duration'):
168
+ details.append(f"Duration: {info_dict['duration']} seconds")
169
+ if info_dict.get('availability'):
170
+ details.append(f"Availability: {info_dict['availability']}")
171
+ if info_dict.get('age_limit'):
172
+ details.append(f"Age limit: {info_dict['age_limit']}+")
173
+
174
+ return status + "\n" + "\n".join(details)
175
+
176
+ except Exception as e:
177
+ return f"❌ Video access test failed: {str(e)}"
178
 
179
  def transcribe_audio(file_path):
180
  """Transcribe audio file using Whisper"""
 
395
  lines=2
396
  )
397
 
398
+ with gr.Row():
399
+ process_btn = gr.Button(
400
+ "πŸš€ Extract Stock Information",
401
+ variant="primary",
402
+ size="lg"
403
+ )
404
+ test_btn = gr.Button(
405
+ "πŸ” Test Video Access",
406
+ variant="secondary"
407
+ )
408
+
409
+ test_result = gr.Textbox(
410
+ label="πŸ“‹ Video Access Test",
411
+ lines=4,
412
+ visible=False
413
  )
414
 
415
  gr.Markdown("""
416
  ### πŸ’‘ Tips:
417
+ - **First try "Test Video Access"** to check if video is available
418
  - Works best with financial YouTube channels
419
  - Ensure video has clear audio
420
  - English content works best
421
  - If you get bot detection errors, try updating cookies.txt
422
+
423
+ ### 🎯 Recommended Financial Channels:
424
+ - Ben Felix, The Plain Bagel, Two Cents, Graham Stephan
425
+ - Make sure videos are public and not age-restricted
426
  """)
427
 
428
+ # Add test button functionality
429
+ def test_and_show(url):
430
+ if not url:
431
+ return "Please enter a YouTube URL first", gr.update(visible=False)
432
+ result = test_video_access(url)
433
+ return result, gr.update(visible=True)
434
+
435
+ test_btn.click(
436
+ fn=test_and_show,
437
+ inputs=[url_input],
438
+ outputs=[test_result, test_result]
439
+ )
440
+
441
  with gr.Row():
442
  with gr.Column():
443
  transcript_output = gr.Textbox(