developer28 commited on
Commit
c963386
Β·
verified Β·
1 Parent(s): 9a50f63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +259 -230
app.py CHANGED
@@ -3,6 +3,44 @@ import tempfile
3
  import gradio as gr
4
  import re
5
  import sys
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Try to import required packages with error handling
8
  try:
@@ -12,30 +50,46 @@ except ImportError as e:
12
  YT_DLP_AVAILABLE = False
13
  print(f"yt-dlp import error: {e}")
14
 
 
 
 
 
15
  try:
16
  import whisper
17
  WHISPER_AVAILABLE = True
 
 
18
  except ImportError as e:
19
- WHISPER_AVAILABLE = False
20
- print(f"whisper import error: {e}")
 
 
 
 
 
 
21
 
22
  print(f"Python version: {sys.version}")
 
23
  print(f"yt-dlp available: {YT_DLP_AVAILABLE}")
24
- print(f"whisper available: {WHISPER_AVAILABLE}")
25
 
26
- def get_cookies_path():
27
- """Get the path to cookies.txt file"""
28
- # Check if cookies.txt exists in the current directory
29
- if os.path.exists('cookies.txt'):
30
- return 'cookies.txt'
31
- # Check in the same directory as the script
32
- script_dir = os.path.dirname(os.path.abspath(__file__))
33
- cookies_path = os.path.join(script_dir, 'cookies.txt')
34
- if os.path.exists(cookies_path):
35
- return cookies_path
36
- return None
 
 
 
37
 
38
- def download_audio(url):
39
  """Download audio from YouTube URL and return the file path"""
40
  if not YT_DLP_AVAILABLE:
41
  raise Exception("yt-dlp is not available. Please check the installation.")
@@ -45,150 +99,82 @@ def download_audio(url):
45
  temp_dir = tempfile.mkdtemp()
46
  output_path = os.path.join(temp_dir, "audio")
47
 
48
- # Get cookies path
49
- cookies_path = get_cookies_path()
50
-
51
- # Base yt-dlp options
52
  ydl_opts = {
53
  'format': 'bestaudio[ext=m4a]/bestaudio/best',
54
  'outtmpl': output_path + '.%(ext)s',
55
  'quiet': True,
56
  'no_warnings': True,
57
- 'extract_flat': False,
58
- 'ignoreerrors': False,
59
- # Add user agent to avoid bot detection
60
- 'http_headers': {
61
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
62
- },
63
- # Add additional options to avoid bot detection
64
  'extractor_retries': 3,
65
  'fragment_retries': 3,
66
- 'retry_sleep_functions': {'http': lambda n: min(2 ** n, 30)},
67
- # Add geo bypass options
68
- 'geo_bypass': True,
69
- 'geo_bypass_country': 'US',
70
- # Add more headers
71
- 'http_headers': {
72
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
73
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
74
- 'Accept-Language': 'en-us,en;q=0.5',
75
- 'Accept-Encoding': 'gzip,deflate',
76
- 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
77
- 'Connection': 'keep-alive',
78
- 'Upgrade-Insecure-Requests': '1',
79
- }
80
  }
81
 
82
- # Add cookies if available
83
- if cookies_path:
84
- ydl_opts['cookiefile'] = cookies_path
85
- print(f"Using cookies from: {cookies_path}")
86
  else:
87
- print("No cookies.txt found - proceeding without cookies")
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  with YoutubeDL(ydl_opts) as ydl:
90
- try:
91
- # Extract info first to check if video is available
92
- info_dict = ydl.extract_info(url, download=False)
93
-
94
- # Check if video is available
95
- if info_dict.get('availability') == 'private':
96
- raise Exception("Video is private and cannot be accessed")
97
- elif info_dict.get('availability') == 'premium_only':
98
- raise Exception("Video requires premium subscription")
99
- elif info_dict.get('availability') == 'subscriber_only':
100
- raise Exception("Video is only available to channel subscribers")
101
- elif info_dict.get('availability') == 'needs_auth':
102
- raise Exception("Video requires authentication - try updating cookies")
103
- elif info_dict.get('live_status') == 'is_live':
104
- raise Exception("Cannot download live streams")
105
- elif info_dict.get('live_status') == 'was_live':
106
- print("Note: This was a live stream, trying to download recorded version...")
107
-
108
- # Download the audio
109
- ydl.download([url])
110
-
111
- except Exception as extract_error:
112
- # If extract_info fails, try direct download as fallback
113
- print(f"Info extraction failed: {extract_error}")
114
- print("Attempting direct download...")
115
- ydl.download([url])
116
 
117
  # Find the downloaded file
118
  for ext in ['.m4a', '.webm', '.mp4', '.mp3']:
119
  potential_file = output_path + ext
120
  if os.path.exists(potential_file):
121
- print(f"Successfully downloaded: {potential_file}")
122
  return potential_file
123
 
124
  raise FileNotFoundError(f"Downloaded audio file not found")
125
 
126
  except Exception as e:
127
- error_msg = str(e)
128
- if "Sign in to confirm your age" in error_msg:
129
- raise Exception("❌ Video is age-restricted. Please use a different video or update your cookies with an authenticated session.")
130
- elif "Private video" in error_msg:
131
- raise Exception("❌ Video is private and cannot be accessed.")
132
- elif "This video is unavailable" in error_msg or "Video unavailable" in error_msg:
133
- raise Exception("❌ Video is unavailable. This could be due to:\nβ€’ Geographic restrictions\nβ€’ Content removed by uploader\nβ€’ Copyright issues\nβ€’ Try a different video")
134
- elif "This content isn't available" in error_msg:
135
- raise Exception("❌ Content not available in your region or has been restricted. Try:\nβ€’ Using a VPN\nβ€’ Different video\nβ€’ Updating cookies")
136
- elif "blocked" in error_msg.lower():
137
- raise Exception("❌ Access blocked. Try using updated cookies or a different video.")
138
- elif "HTTP Error 403" in error_msg:
139
- raise Exception("❌ Access forbidden. Video may be region-locked or require authentication.")
140
- elif "HTTP Error 404" in error_msg:
141
- raise Exception("❌ Video not found. It may have been deleted or the URL is incorrect.")
142
  else:
143
- raise Exception(f"❌ Download failed: {error_msg}")
144
-
145
- def test_video_access(url):
146
- """Test if a video is accessible without downloading"""
147
- try:
148
- cookies_path = get_cookies_path()
149
- ydl_opts = {
150
- 'quiet': True,
151
- 'no_warnings': True,
152
- 'extract_flat': False,
153
- 'skip_download': True,
154
- }
155
-
156
- if cookies_path:
157
- ydl_opts['cookiefile'] = cookies_path
158
-
159
- with YoutubeDL(ydl_opts) as ydl:
160
- info_dict = ydl.extract_info(url, download=False)
161
-
162
- status = "βœ… Video accessible"
163
- details = []
164
-
165
- if info_dict.get('title'):
166
- details.append(f"Title: {info_dict['title'][:50]}...")
167
- if info_dict.get('duration'):
168
- details.append(f"Duration: {info_dict['duration']} seconds")
169
- if info_dict.get('availability'):
170
- details.append(f"Availability: {info_dict['availability']}")
171
- if info_dict.get('age_limit'):
172
- details.append(f"Age limit: {info_dict['age_limit']}+")
173
-
174
- return status + "\n" + "\n".join(details)
175
-
176
- except Exception as e:
177
- return f"❌ Video access test failed: {str(e)}"
178
 
179
  def transcribe_audio(file_path):
180
  """Transcribe audio file using Whisper"""
181
  if not WHISPER_AVAILABLE:
182
- raise Exception("OpenAI Whisper is not available. Please check the installation.")
183
 
184
  try:
185
- # Use the smallest model to reduce memory usage
186
- model = whisper.load_model("tiny")
187
- result = model.transcribe(file_path)
188
- return result["text"]
 
 
 
 
 
 
 
 
 
 
 
 
189
  except Exception as e:
190
  raise Exception(f"Failed to transcribe audio: {str(e)}")
191
 
 
192
  def extract_stock_info_simple(text):
193
  """Extract stock information using simple pattern matching"""
194
  try:
@@ -261,95 +247,76 @@ def cleanup_file(file_path):
261
  except:
262
  pass
263
 
264
- def system_test():
265
- """Test system components"""
266
- results = []
267
-
268
- # Test yt-dlp
269
- if YT_DLP_AVAILABLE:
270
- results.append("βœ… yt-dlp: Available")
271
- try:
272
- ydl = YoutubeDL({'quiet': True})
273
- results.append("βœ… yt-dlp: Can create YoutubeDL instance")
274
- except Exception as e:
275
- results.append(f"❌ yt-dlp: Cannot create instance - {e}")
276
- else:
277
- results.append("❌ yt-dlp: Not available")
278
 
279
- # Test Whisper
280
- if WHISPER_AVAILABLE:
281
- results.append("βœ… Whisper: Available (Type: openai-whisper)")
282
- try:
283
- import whisper
284
- results.append("βœ… Whisper: OpenAI Whisper can be imported")
285
- except Exception as e:
286
- results.append(f"❌ Whisper: Cannot import - {e}")
287
- else:
288
- results.append("❌ Whisper: Not available")
289
-
290
- # Test file operations
291
  try:
292
- temp_file = tempfile.NamedTemporaryFile(delete=False)
293
- temp_file.write(b"test")
294
- temp_file.close()
295
- os.remove(temp_file.name)
296
- results.append("βœ… File operations: Working")
 
 
297
  except Exception as e:
298
- results.append(f"❌ File operations: Failed - {e}")
299
-
300
- # Test cookies
301
- cookies_path = get_cookies_path()
302
- if cookies_path:
303
- results.append(f"βœ… Cookies: Found at {cookies_path}")
304
- else:
305
- results.append("⚠️ Cookies: Not found (may cause bot detection issues)")
306
-
307
- return "\n".join(results)
308
 
309
- def process_video(url, progress=gr.Progress()):
310
  """Main function to process YouTube video"""
311
 
312
  # Check if required packages are available
313
  if not YT_DLP_AVAILABLE:
314
- return "Error: yt-dlp is not installed properly. Please check the requirements.", ""
315
 
316
  if not WHISPER_AVAILABLE:
317
- return "Error: OpenAI Whisper is not installed properly. Please check the requirements.", ""
318
 
319
  if not url or not url.strip():
320
- return "Please provide a valid YouTube URL", ""
321
 
322
  audio_path = None
 
 
323
  try:
324
  # Validate URL
325
  if not any(domain in url.lower() for domain in ['youtube.com', 'youtu.be']):
326
- return "Please provide a valid YouTube URL", ""
 
 
 
 
 
 
327
 
328
  # Download audio
329
- progress(0.1, desc="Downloading audio...")
330
- audio_path = download_audio(url)
331
 
332
  # Transcribe audio
333
- progress(0.5, desc="Transcribing audio...")
334
  transcript = transcribe_audio(audio_path)
335
 
336
  if not transcript.strip():
337
- return "No speech detected in the video", ""
338
 
339
  # Extract stock information
340
- progress(0.8, desc="Extracting stock information...")
341
  stock_details = extract_stock_info_simple(transcript)
342
 
343
  progress(1.0, desc="Complete!")
344
- return transcript, stock_details
345
 
346
  except Exception as e:
347
  error_msg = f"Error processing video: {str(e)}"
348
- return error_msg, ""
349
 
350
  finally:
351
  # Clean up temporary files
352
  cleanup_file(audio_path)
 
353
 
354
  # Create Gradio interface
355
  with gr.Blocks(
@@ -357,9 +324,14 @@ with gr.Blocks(
357
  theme=gr.themes.Soft(),
358
  css="""
359
  .gradio-container {
360
- max-width: 1200px;
361
  margin: auto;
362
  }
 
 
 
 
 
363
  """
364
  ) as demo:
365
 
@@ -369,75 +341,81 @@ with gr.Blocks(
369
  Extract stock recommendations and trading information from YouTube videos using AI transcription.
370
 
371
  **How it works:**
372
- 1. Downloads audio from YouTube video
373
- 2. Transcribes using OpenAI Whisper
374
- 3. Extracts stock-related information
 
 
375
 
376
  **⚠️ Disclaimer:** This is for educational purposes only. Always do your own research!
377
  """)
378
 
379
- # Add system test section
380
- with gr.Accordion("πŸ§ͺ System Status", open=False):
381
- system_status = gr.Textbox(
382
- value=system_test(),
383
- label="System Test Results",
384
- lines=10,
385
- interactive=False
386
- )
387
- test_btn = gr.Button("πŸ”„ Re-run System Test")
388
- test_btn.click(fn=system_test, outputs=system_status)
389
-
390
  with gr.Row():
391
  with gr.Column(scale=1):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
  url_input = gr.Textbox(
393
  label="πŸ“Ί YouTube URL",
394
  placeholder="https://www.youtube.com/watch?v=...",
395
  lines=2
396
  )
397
 
398
- with gr.Row():
399
- process_btn = gr.Button(
400
- "πŸš€ Extract Stock Information",
401
- variant="primary",
402
- size="lg"
403
- )
404
- test_btn = gr.Button(
405
- "πŸ” Test Video Access",
406
- variant="secondary"
407
- )
408
 
409
- test_result = gr.Textbox(
410
- label="πŸ“‹ Video Access Test",
411
- lines=4,
412
- visible=False
 
413
  )
414
 
415
  gr.Markdown("""
416
  ### πŸ’‘ Tips:
417
- - **First try "Test Video Access"** to check if video is available
418
  - Works best with financial YouTube channels
419
  - Ensure video has clear audio
420
  - English content works best
421
- - If you get bot detection errors, try updating cookies.txt
422
-
423
- ### 🎯 Recommended Financial Channels:
424
- - Ben Felix, The Plain Bagel, Two Cents, Graham Stephan
425
- - Make sure videos are public and not age-restricted
426
  """)
427
 
428
- # Add test button functionality
429
- def test_and_show(url):
430
- if not url:
431
- return "Please enter a YouTube URL first", gr.update(visible=False)
432
- result = test_video_access(url)
433
- return result, gr.update(visible=True)
434
-
435
- test_btn.click(
436
- fn=test_and_show,
437
- inputs=[url_input],
438
- outputs=[test_result, test_result]
439
- )
440
-
441
  with gr.Row():
442
  with gr.Column():
443
  transcript_output = gr.Textbox(
@@ -456,10 +434,19 @@ with gr.Blocks(
456
  )
457
 
458
  # Event handlers
 
 
 
 
 
 
 
 
 
459
  process_btn.click(
460
  fn=process_video,
461
- inputs=[url_input],
462
- outputs=[transcript_output, stock_info_output],
463
  show_progress=True
464
  )
465
 
@@ -472,6 +459,48 @@ with gr.Blocks(
472
  inputs=[url_input],
473
  label="Click to try example"
474
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
 
476
  if __name__ == "__main__":
477
  demo.launch()
 
3
  import gradio as gr
4
  import re
5
  import sys
6
+ import shutil
7
+ import importlib.util
8
+
9
+ def check_requirements():
10
+ """Check if all required packages are installed and return status"""
11
+ requirements_status = []
12
+
13
+ packages = [
14
+ ('gradio', 'gradio'),
15
+ ('yt-dlp', 'yt_dlp'),
16
+ ('openai-whisper', 'whisper'),
17
+ ('torch', 'torch'),
18
+ ('torchaudio', 'torchaudio'),
19
+ ('numpy', 'numpy'),
20
+ ('regex', 'regex'),
21
+ ]
22
+
23
+ for package_name, import_name in packages:
24
+ try:
25
+ spec = importlib.util.find_spec(import_name)
26
+ if spec is None:
27
+ requirements_status.append(f"❌ {package_name}: Not found")
28
+ continue
29
+
30
+ module = importlib.import_module(import_name)
31
+ version = getattr(module, '__version__', 'Unknown version')
32
+ requirements_status.append(f"βœ… {package_name}: {version}")
33
+
34
+ except ImportError as e:
35
+ requirements_status.append(f"❌ {package_name}: Import error - {str(e)}")
36
+ except Exception as e:
37
+ requirements_status.append(f"⚠️ {package_name}: Found but error - {str(e)}")
38
+
39
+ # Add Python info
40
+ requirements_status.append(f"\n🐍 Python: {sys.version}")
41
+ requirements_status.append(f"πŸ“ Python executable: {sys.executable}")
42
+
43
+ return "\n".join(requirements_status)
44
 
45
  # Try to import required packages with error handling
46
  try:
 
50
  YT_DLP_AVAILABLE = False
51
  print(f"yt-dlp import error: {e}")
52
 
53
+ # Try multiple whisper import methods
54
+ WHISPER_AVAILABLE = False
55
+ WHISPER_TYPE = None
56
+
57
  try:
58
  import whisper
59
  WHISPER_AVAILABLE = True
60
+ WHISPER_TYPE = "openai-whisper"
61
+ print("Using OpenAI Whisper")
62
  except ImportError as e:
63
+ print(f"OpenAI Whisper import error: {e}")
64
+ try:
65
+ from transformers import pipeline
66
+ WHISPER_AVAILABLE = True
67
+ WHISPER_TYPE = "transformers"
68
+ print("Using Transformers Whisper")
69
+ except ImportError as e2:
70
+ print(f"Transformers Whisper import error: {e2}")
71
 
72
  print(f"Python version: {sys.version}")
73
+ print(f"Python executable: {sys.executable}")
74
  print(f"yt-dlp available: {YT_DLP_AVAILABLE}")
75
+ print(f"whisper available: {WHISPER_AVAILABLE} (type: {WHISPER_TYPE})")
76
 
77
+ # Additional diagnostics
78
+ if YT_DLP_AVAILABLE:
79
+ try:
80
+ from yt_dlp import YoutubeDL
81
+ print(f"yt-dlp version: {YoutubeDL().__class__.__module__}")
82
+ except:
83
+ pass
84
+
85
+ if WHISPER_AVAILABLE and WHISPER_TYPE == "openai-whisper":
86
+ try:
87
+ import whisper
88
+ print(f"whisper version: {whisper.__version__}")
89
+ except:
90
+ pass
91
 
92
+ def download_audio(url, cookies_file_path=None):
93
  """Download audio from YouTube URL and return the file path"""
94
  if not YT_DLP_AVAILABLE:
95
  raise Exception("yt-dlp is not available. Please check the installation.")
 
99
  temp_dir = tempfile.mkdtemp()
100
  output_path = os.path.join(temp_dir, "audio")
101
 
 
 
 
 
102
  ydl_opts = {
103
  'format': 'bestaudio[ext=m4a]/bestaudio/best',
104
  'outtmpl': output_path + '.%(ext)s',
105
  'quiet': True,
106
  'no_warnings': True,
107
+ # Anti-bot detection measures
108
+ 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
109
+ 'referer': 'https://www.youtube.com/',
 
 
 
 
110
  'extractor_retries': 3,
111
  'fragment_retries': 3,
112
+ 'retry_sleep_functions': {'http': lambda n: 2 ** n},
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  }
114
 
115
+ # Add cookies file if provided
116
+ if cookies_file_path and os.path.exists(cookies_file_path):
117
+ ydl_opts['cookiefile'] = cookies_file_path
118
+ print(f"Using cookies file: {cookies_file_path}")
119
  else:
120
+ print("No cookies file provided - may encounter bot detection")
121
+ # Additional headers without cookies
122
+ ydl_opts.update({
123
+ 'headers': {
124
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
125
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
126
+ 'Accept-Language': 'en-us,en;q=0.5',
127
+ 'Accept-Encoding': 'gzip,deflate',
128
+ 'DNT': '1',
129
+ 'Connection': 'keep-alive',
130
+ 'Upgrade-Insecure-Requests': '1',
131
+ }
132
+ })
133
 
134
  with YoutubeDL(ydl_opts) as ydl:
135
+ info_dict = ydl.extract_info(url, download=True)
136
+ filename = ydl.prepare_filename(info_dict)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
  # Find the downloaded file
139
  for ext in ['.m4a', '.webm', '.mp4', '.mp3']:
140
  potential_file = output_path + ext
141
  if os.path.exists(potential_file):
 
142
  return potential_file
143
 
144
  raise FileNotFoundError(f"Downloaded audio file not found")
145
 
146
  except Exception as e:
147
+ if "403" in str(e) or "Forbidden" in str(e):
148
+ raise Exception(f"YouTube blocked the request (403 Forbidden). Please upload your cookies.txt file to bypass bot detection. Original error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  else:
150
+ raise Exception(f"Failed to download audio: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
  def transcribe_audio(file_path):
153
  """Transcribe audio file using Whisper"""
154
  if not WHISPER_AVAILABLE:
155
+ raise Exception("OpenAI Whisper is not available. Please install it using: pip install openai-whisper")
156
 
157
  try:
158
+ if WHISPER_TYPE == "openai-whisper":
159
+ # Use OpenAI Whisper
160
+ model = whisper.load_model("tiny")
161
+ result = model.transcribe(file_path)
162
+ return result["text"]
163
+
164
+ elif WHISPER_TYPE == "transformers":
165
+ # Use Transformers Whisper
166
+ from transformers import pipeline
167
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
168
+ result = transcriber(file_path)
169
+ return result["text"]
170
+
171
+ else:
172
+ raise Exception("No compatible Whisper installation found")
173
+
174
  except Exception as e:
175
  raise Exception(f"Failed to transcribe audio: {str(e)}")
176
 
177
+
178
  def extract_stock_info_simple(text):
179
  """Extract stock information using simple pattern matching"""
180
  try:
 
247
  except:
248
  pass
249
 
250
+ def process_cookies_file(cookies_file):
251
+ """Process uploaded cookies file and return the path"""
252
+ if cookies_file is None:
253
+ return None
 
 
 
 
 
 
 
 
 
 
254
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  try:
256
+ # Create a temporary file for cookies
257
+ temp_cookies_path = tempfile.mktemp(suffix='.txt')
258
+
259
+ # Copy the uploaded file to temp location
260
+ shutil.copy2(cookies_file.name, temp_cookies_path)
261
+
262
+ return temp_cookies_path
263
  except Exception as e:
264
+ print(f"Error processing cookies file: {e}")
265
+ return None
 
 
 
 
 
 
 
 
266
 
267
+ def process_video(url, cookies_file, progress=gr.Progress()):
268
  """Main function to process YouTube video"""
269
 
270
  # Check if required packages are available
271
  if not YT_DLP_AVAILABLE:
272
+ return "Error: yt-dlp is not installed properly. Please install it using: pip install yt-dlp", "", "❌ Error: Missing yt-dlp"
273
 
274
  if not WHISPER_AVAILABLE:
275
+ return "Error: OpenAI Whisper is not installed properly. Please install it using: pip install openai-whisper", "", "❌ Error: Missing Whisper"
276
 
277
  if not url or not url.strip():
278
+ return "Please provide a valid YouTube URL", "", "❌ Error: Invalid URL"
279
 
280
  audio_path = None
281
+ cookies_temp_path = None
282
+
283
  try:
284
  # Validate URL
285
  if not any(domain in url.lower() for domain in ['youtube.com', 'youtu.be']):
286
+ return "Please provide a valid YouTube URL", "", "❌ Error: Invalid URL"
287
+
288
+ # Process cookies file if provided
289
+ progress(0.05, desc="Processing cookies...")
290
+ cookies_temp_path = process_cookies_file(cookies_file)
291
+
292
+ status_msg = "βœ… Cookies loaded" if cookies_temp_path else "⚠️ No cookies (may encounter bot detection)"
293
 
294
  # Download audio
295
+ progress(0.2, desc="Downloading audio...")
296
+ audio_path = download_audio(url, cookies_temp_path)
297
 
298
  # Transcribe audio
299
+ progress(0.6, desc="Transcribing audio...")
300
  transcript = transcribe_audio(audio_path)
301
 
302
  if not transcript.strip():
303
+ return "No speech detected in the video", "", "❌ No speech detected"
304
 
305
  # Extract stock information
306
+ progress(0.9, desc="Extracting stock information...")
307
  stock_details = extract_stock_info_simple(transcript)
308
 
309
  progress(1.0, desc="Complete!")
310
+ return transcript, stock_details, "βœ… Processing completed successfully"
311
 
312
  except Exception as e:
313
  error_msg = f"Error processing video: {str(e)}"
314
+ return error_msg, "", f"❌ Error: {str(e)}"
315
 
316
  finally:
317
  # Clean up temporary files
318
  cleanup_file(audio_path)
319
+ cleanup_file(cookies_temp_path)
320
 
321
  # Create Gradio interface
322
  with gr.Blocks(
 
324
  theme=gr.themes.Soft(),
325
  css="""
326
  .gradio-container {
327
+ max-width: 1400px;
328
  margin: auto;
329
  }
330
+ .status-box {
331
+ padding: 10px;
332
+ border-radius: 5px;
333
+ margin: 10px 0;
334
+ }
335
  """
336
  ) as demo:
337
 
 
341
  Extract stock recommendations and trading information from YouTube videos using AI transcription.
342
 
343
  **How it works:**
344
+ 1. Upload your cookies.txt file (optional but recommended to avoid bot detection)
345
+ 2. Paste YouTube video URL
346
+ 3. Downloads audio from YouTube video
347
+ 4. Transcribes using OpenAI Whisper
348
+ 5. Extracts stock-related information
349
 
350
  **⚠️ Disclaimer:** This is for educational purposes only. Always do your own research!
351
  """)
352
 
 
 
 
 
 
 
 
 
 
 
 
353
  with gr.Row():
354
  with gr.Column(scale=1):
355
+ # Requirements check button
356
+ gr.Markdown("### πŸ” System Check")
357
+ check_req_btn = gr.Button(
358
+ "Check Requirements",
359
+ variant="secondary",
360
+ size="sm"
361
+ )
362
+
363
+ requirements_output = gr.Textbox(
364
+ label="πŸ“‹ Requirements Status",
365
+ lines=10,
366
+ interactive=False,
367
+ visible=False
368
+ )
369
+
370
+ # Cookies file upload
371
+ cookies_input = gr.File(
372
+ label="πŸͺ Upload Cookies File (cookies.txt)",
373
+ file_types=[".txt"],
374
+ file_count="single"
375
+ )
376
+
377
+ gr.Markdown("""
378
+ **How to get cookies.txt to fix 403 Forbidden errors:**
379
+ 1. Install browser extension: "Get cookies.txt LOCALLY"
380
+ 2. Visit YouTube in your browser (while logged in)
381
+ 3. Click the extension icon and export cookies for youtube.com
382
+ 4. Upload the downloaded cookies.txt file here
383
+
384
+ **Alternative extensions:**
385
+ - "cookies.txt" (Chrome/Firefox)
386
+ - "Export Cookies" (Chrome)
387
+
388
+ ⚠️ **Important**: Without cookies, you'll likely get 403 Forbidden errors
389
+ """)
390
+
391
  url_input = gr.Textbox(
392
  label="πŸ“Ί YouTube URL",
393
  placeholder="https://www.youtube.com/watch?v=...",
394
  lines=2
395
  )
396
 
397
+ process_btn = gr.Button(
398
+ "πŸš€ Extract Stock Information",
399
+ variant="primary",
400
+ size="lg"
401
+ )
 
 
 
 
 
402
 
403
+ # Status display
404
+ status_output = gr.Textbox(
405
+ label="πŸ“Š Status",
406
+ lines=1,
407
+ interactive=False
408
  )
409
 
410
  gr.Markdown("""
411
  ### πŸ’‘ Tips:
412
+ - **MUST upload cookies.txt** to avoid 403 Forbidden errors
413
  - Works best with financial YouTube channels
414
  - Ensure video has clear audio
415
  - English content works best
416
+ - Try shorter videos first (under 10 minutes)
 
 
 
 
417
  """)
418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  with gr.Row():
420
  with gr.Column():
421
  transcript_output = gr.Textbox(
 
434
  )
435
 
436
  # Event handlers
437
+ def show_requirements():
438
+ status = check_requirements()
439
+ return gr.update(value=status, visible=True)
440
+
441
+ check_req_btn.click(
442
+ fn=show_requirements,
443
+ outputs=[requirements_output]
444
+ )
445
+
446
  process_btn.click(
447
  fn=process_video,
448
+ inputs=[url_input, cookies_input],
449
+ outputs=[transcript_output, stock_info_output, status_output],
450
  show_progress=True
451
  )
452
 
 
459
  inputs=[url_input],
460
  label="Click to try example"
461
  )
462
+
463
+ gr.Markdown("""
464
+ ### πŸ”§ Installation & Troubleshooting:
465
+
466
+ **Step 1: Click "Check Requirements" button above to see what's missing**
467
+
468
+ **If you get "Whisper Missing" error:**
469
+ ```bash
470
+ pip install openai-whisper
471
+ ```
472
+
473
+ **If you get "yt-dlp Missing" error:**
474
+ ```bash
475
+ pip install yt-dlp
476
+ ```
477
+
478
+ **Install all requirements at once:**
479
+ ```bash
480
+ pip install gradio==4.44.0 yt-dlp==2023.12.30 openai-whisper==20231117 torch==2.1.0 torchaudio==2.1.0 numpy==1.24.3 regex==2023.8.8
481
+ ```
482
+
483
+ **Alternative Whisper installation:**
484
+ ```bash
485
+ pip install transformers torch torchaudio
486
+ ```
487
+
488
+ **If using virtual environment:**
489
+ ```bash
490
+ # Create and activate virtual environment first
491
+ python -m venv myenv
492
+ # Windows: myenv\\Scripts\\activate
493
+ # Mac/Linux: source myenv/bin/activate
494
+ # Then install packages
495
+ pip install -r requirements.txt
496
+ ```
497
+
498
+ **Other Issues:**
499
+ - **Bot Detection Error**: Upload your cookies.txt file
500
+ - **No Audio Found**: Check if video has audio track
501
+ - **Transcription Failed**: Video might be too long or audio quality poor
502
+ - **No Stock Info**: Video might not contain financial content
503
+ """)
504
 
505
  if __name__ == "__main__":
506
  demo.launch()