developer28 commited on
Commit
3a866c5
Β·
verified Β·
1 Parent(s): 381a02c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +434 -237
app.py CHANGED
@@ -5,6 +5,8 @@ import re
5
  import sys
6
  import shutil
7
  import importlib.util
 
 
8
 
9
  def check_requirements():
10
  """Check if all required packages are installed and return status"""
@@ -74,79 +76,172 @@ print(f"Python executable: {sys.executable}")
74
  print(f"yt-dlp available: {YT_DLP_AVAILABLE}")
75
  print(f"whisper available: {WHISPER_AVAILABLE} (type: {WHISPER_TYPE})")
76
 
77
- # Additional diagnostics
78
- if YT_DLP_AVAILABLE:
79
- try:
80
- from yt_dlp import YoutubeDL
81
- print(f"yt-dlp version: {YoutubeDL().__class__.__module__}")
82
- except:
83
- pass
84
-
85
- if WHISPER_AVAILABLE and WHISPER_TYPE == "openai-whisper":
86
- try:
87
- import whisper
88
- print(f"whisper version: {whisper.__version__}")
89
- except:
90
- pass
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  def download_audio(url, cookies_file_path=None):
93
- """Download audio from YouTube URL and return the file path"""
94
  if not YT_DLP_AVAILABLE:
95
  raise Exception("yt-dlp is not available. Please check the installation.")
96
 
97
  try:
 
 
 
 
 
 
 
 
 
98
  # Create a temporary directory for downloads
99
  temp_dir = tempfile.mkdtemp()
100
  output_path = os.path.join(temp_dir, "audio")
101
 
102
- # Basic options
103
  ydl_opts = {
104
- 'format': 'bestaudio[ext=m4a]/bestaudio/best',
105
  'outtmpl': output_path + '.%(ext)s',
106
- 'quiet': True,
107
- 'no_warnings': True,
108
- 'extractor_retries': 3,
109
- 'fragment_retries': 3,
110
- 'retry_sleep_functions': {'http': lambda n: 2 ** n},
 
 
 
 
 
 
 
 
 
111
  }
112
 
113
- # If cookies are provided, use them
114
  if cookies_file_path and os.path.exists(cookies_file_path):
115
  ydl_opts['cookiefile'] = cookies_file_path
116
  print(f"βœ… Using cookies file: {cookies_file_path}")
117
  else:
118
- print("⚠️ No cookies file provided - falling back to headers (may trigger bot detection)")
119
- # Only add headers if cookies are not used
120
- ydl_opts.update({
121
- 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
122
- 'referer': 'https://www.youtube.com/',
123
- 'headers': {
124
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
125
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
126
- 'Accept-Language': 'en-us,en;q=0.5',
127
- 'Accept-Encoding': 'gzip,deflate',
128
- 'DNT': '1',
129
- 'Connection': 'keep-alive',
130
- 'Upgrade-Insecure-Requests': '1',
131
- }
132
- })
133
-
 
 
 
 
 
 
 
 
 
 
 
134
  with YoutubeDL(ydl_opts) as ydl:
 
135
  info_dict = ydl.extract_info(url, download=True)
136
- filename = ydl.prepare_filename(info_dict)
137
 
138
  # Find the downloaded file
139
- for ext in ['.m4a', '.webm', '.mp4', '.mp3']:
140
  potential_file = output_path + ext
141
  if os.path.exists(potential_file):
142
  print(f"βœ… Audio downloaded: {potential_file}")
143
  return potential_file
144
 
 
 
145
  raise FileNotFoundError("Downloaded audio file not found")
146
 
147
  except Exception as e:
148
- if "403" in str(e) or "Forbidden" in str(e):
149
- raise Exception(f"YouTube blocked the request (403 Forbidden). Please upload your cookies.txt file to bypass bot detection. Original error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  else:
151
  raise Exception(f"Failed to download audio: {str(e)}")
152
 
@@ -157,16 +252,27 @@ def transcribe_audio(file_path):
157
 
158
  try:
159
  if WHISPER_TYPE == "openai-whisper":
160
- # Use OpenAI Whisper
161
- model = whisper.load_model("tiny")
162
- result = model.transcribe(file_path)
 
 
 
 
 
 
 
163
  return result["text"]
164
 
165
  elif WHISPER_TYPE == "transformers":
166
  # Use Transformers Whisper
167
  from transformers import pipeline
168
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
169
- result = transcriber(file_path)
 
 
 
 
170
  return result["text"]
171
 
172
  else:
@@ -175,65 +281,88 @@ def transcribe_audio(file_path):
175
  except Exception as e:
176
  raise Exception(f"Failed to transcribe audio: {str(e)}")
177
 
178
-
179
- def extract_stock_info_simple(text):
180
- """Extract stock information using simple pattern matching"""
181
  try:
182
  stock_info = []
183
 
184
- # Simple patterns to look for stock-related information
185
- stock_patterns = [
186
- r'\b[A-Z]{1,5}\b(?:\s+stock|\s+shares|\s+symbol)', # Stock symbols
187
- r'(?:buy|sell|target|price)\s+[A-Z]{1,5}',
188
- r'\$\d+(?:\.\d{2})?', # Dollar amounts
189
- r'\b(?:bullish|bearish|buy|sell|hold)\b',
190
- ]
191
-
192
- # Look for company names and stock mentions
193
- companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
194
- symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
195
- prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
196
- actions = re.findall(r'\b(?:buy|sell|hold|bullish|bearish|target|stop\s+loss)\b', text, re.IGNORECASE)
197
 
198
- # Format the extracted information
199
- result = "=== EXTRACTED STOCK INFORMATION ===\n\n"
 
 
 
 
 
200
 
201
- if companies:
202
- result += f"πŸ“Š Mentioned Companies: {', '.join(set(companies[:10]))}\n\n"
203
 
204
  if symbols:
205
- result += f"πŸ”€ Potential Stock Symbols: {', '.join(set(symbols[:10]))}\n\n"
 
 
 
 
 
206
 
207
  if prices:
208
- result += f"πŸ’° Price Mentions: {', '.join(set(prices[:10]))}\n\n"
 
 
 
209
 
210
  if actions:
211
- result += f"πŸ“ˆ Trading Actions: {', '.join(set(actions[:10]))}\n\n"
 
 
 
212
 
213
- # Look for specific recommendation patterns
 
214
  recommendations = []
215
- sentences = text.split('.')
216
  for sentence in sentences:
217
- if any(word in sentence.lower() for word in ['buy', 'sell', 'target', 'recommendation']):
218
- if any(symbol in sentence for symbol in symbols[:5]):
219
- recommendations.append(sentence.strip())
 
220
 
221
  if recommendations:
222
- result += "🎯 Potential Recommendations:\n"
223
- for rec in recommendations[:5]:
224
- result += f"β€’ {rec}\n"
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
- if not any([companies, symbols, prices, actions]):
227
- result += "⚠️ No clear stock recommendations found in the transcript.\n"
228
- result += "This might be because:\n"
229
- result += "β€’ The video doesn't contain stock recommendations\n"
230
- result += "β€’ The audio quality was poor\n"
231
- result += "β€’ The content is not in English\n"
232
 
233
  return result
234
 
235
  except Exception as e:
236
- return f"Error extracting stock info: {str(e)}"
237
 
238
  def cleanup_file(file_path):
239
  """Clean up temporary files"""
@@ -257,184 +386,288 @@ def process_cookies_file(cookies_file):
257
  # Create a temporary file for cookies
258
  temp_cookies_path = tempfile.mktemp(suffix='.txt')
259
 
260
- # Copy the uploaded file directly (gradio provides it as a file object)
261
  shutil.copy2(cookies_file, temp_cookies_path)
262
 
263
- print(f"βœ… Cookies file saved at: {temp_cookies_path}")
 
 
 
 
 
 
264
  return temp_cookies_path
265
  except Exception as e:
266
  print(f"❌ Error processing cookies file: {e}")
267
  return None
268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  def process_video(url, cookies_file, progress=gr.Progress()):
270
  """Main function to process YouTube video"""
271
 
272
  # Check if required packages are available
273
  if not YT_DLP_AVAILABLE:
274
- return "Error: yt-dlp is not installed properly. Please install it using: pip install yt-dlp", "", "❌ Error: Missing yt-dlp"
275
 
276
  if not WHISPER_AVAILABLE:
277
- return "Error: OpenAI Whisper is not installed properly. Please install it using: pip install openai-whisper", "", "❌ Error: Missing Whisper"
278
 
279
- if not url or not url.strip():
280
- return "Please provide a valid YouTube URL", "", "❌ Error: Invalid URL"
 
 
281
 
282
  audio_path = None
283
  cookies_temp_path = None
284
 
285
  try:
286
- # Validate URL
287
- if not any(domain in url.lower() for domain in ['youtube.com', 'youtu.be']):
288
- return "Please provide a valid YouTube URL", "", "❌ Error: Invalid URL"
289
 
290
  # Process cookies file if provided
291
- progress(0.05, desc="Processing cookies...")
292
  cookies_temp_path = process_cookies_file(cookies_file)
293
 
294
- status_msg = "βœ… Cookies loaded" if cookies_temp_path else "⚠️ No cookies (may encounter bot detection)"
295
 
296
  # Download audio
297
- progress(0.2, desc="Downloading audio...")
298
  audio_path = download_audio(url, cookies_temp_path)
299
 
300
  # Transcribe audio
301
- progress(0.6, desc="Transcribing audio...")
302
  transcript = transcribe_audio(audio_path)
303
 
304
  if not transcript.strip():
305
- return "No speech detected in the video", "", "❌ No speech detected"
306
 
307
  # Extract stock information
308
- progress(0.9, desc="Extracting stock information...")
309
- stock_details = extract_stock_info_simple(transcript)
310
 
311
- progress(1.0, desc="Complete!")
312
  return transcript, stock_details, "βœ… Processing completed successfully"
313
 
314
  except Exception as e:
315
- error_msg = f"Error processing video: {str(e)}"
316
- return error_msg, "", f"❌ Error: {str(e)}"
317
 
318
  finally:
319
  # Clean up temporary files
320
  cleanup_file(audio_path)
321
  cleanup_file(cookies_temp_path)
322
 
323
- # Create Gradio interface
324
  with gr.Blocks(
325
- title="Stock Recommendation Extractor",
326
  theme=gr.themes.Soft(),
327
  css="""
328
  .gradio-container {
329
- max-width: 1400px;
330
  margin: auto;
 
331
  }
332
  .status-box {
333
- padding: 10px;
334
- border-radius: 5px;
335
  margin: 10px 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  }
337
  """
338
  ) as demo:
339
 
340
  gr.Markdown("""
341
- # πŸ“ˆ Stock Recommendation Extractor from YouTube
342
 
343
- Extract stock recommendations and trading information from YouTube videos using AI transcription.
344
 
345
- **How it works:**
346
- 1. Upload your cookies.txt file (optional but recommended to avoid bot detection)
347
- 2. Paste YouTube video URL
348
- 3. Downloads audio from YouTube video
349
- 4. Transcribes using OpenAI Whisper
350
- 5. Extracts stock-related information
351
 
352
- **⚠️ Disclaimer:** This is for educational purposes only. Always do your own research!
353
  """)
354
 
355
  with gr.Row():
356
  with gr.Column(scale=1):
357
- # Requirements check button
358
- gr.Markdown("### πŸ” System Check")
359
- check_req_btn = gr.Button(
360
- "Check Requirements",
361
- variant="secondary",
362
- size="sm"
363
- )
364
-
365
- requirements_output = gr.Textbox(
366
- label="πŸ“‹ Requirements Status",
367
- lines=10,
368
- interactive=False,
369
- visible=False
370
- )
371
-
372
- # Cookies file upload
373
- cookies_input = gr.File(
374
- label="πŸͺ Upload Cookies File (cookies.txt)",
375
- file_types=[".txt"],
376
- file_count="single"
377
- )
378
-
379
- gr.Markdown("""
380
- **How to get cookies.txt to fix 403 Forbidden errors:**
381
- 1. Install browser extension: "Get cookies.txt LOCALLY"
382
- 2. Visit YouTube in your browser (while logged in)
383
- 3. Click the extension icon and export cookies for youtube.com
384
- 4. Upload the downloaded cookies.txt file here
385
-
386
- **Alternative extensions:**
387
- - "cookies.txt" (Chrome/Firefox)
388
- - "Export Cookies" (Chrome)
389
-
390
- ⚠️ **Important**: Without cookies, you'll likely get 403 Forbidden errors
391
- """)
392
-
393
- url_input = gr.Textbox(
394
- label="πŸ“Ί YouTube URL",
395
- placeholder="https://www.youtube.com/watch?v=...",
396
- lines=2
397
- )
398
-
399
- process_btn = gr.Button(
400
- "πŸš€ Extract Stock Information",
401
- variant="primary",
402
- size="lg"
403
- )
404
-
405
- # Status display
406
- status_output = gr.Textbox(
407
- label="πŸ“Š Status",
408
- lines=1,
409
- interactive=False
410
- )
411
 
412
- gr.Markdown("""
413
- ### πŸ’‘ Tips:
414
- - **MUST upload cookies.txt** to avoid 403 Forbidden errors
415
- - Works best with financial YouTube channels
416
- - Ensure video has clear audio
417
- - English content works best
418
- - Try shorter videos first (under 10 minutes)
419
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
 
 
421
  with gr.Row():
422
  with gr.Column():
423
  transcript_output = gr.Textbox(
424
  label="πŸ“ Full Transcript",
425
- lines=15,
426
- max_lines=20,
427
- show_copy_button=True
 
428
  )
429
 
430
  with gr.Column():
431
  stock_info_output = gr.Textbox(
432
  label="πŸ“Š Extracted Stock Information",
433
- lines=15,
434
- max_lines=20,
435
- show_copy_button=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436
  )
437
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
  # Event handlers
439
  def show_requirements():
440
  status = check_requirements()
@@ -452,57 +685,21 @@ with gr.Blocks(
452
  show_progress=True
453
  )
454
 
455
- # Example section
456
- gr.Markdown("### πŸ“‹ Example URLs (Replace with actual financial videos)")
457
- gr.Examples(
458
- examples=[
459
- ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"],
460
- ],
461
- inputs=[url_input],
462
- label="Click to try example"
463
- )
464
-
465
  gr.Markdown("""
466
- ### πŸ”§ Installation & Troubleshooting:
467
-
468
- **Step 1: Click "Check Requirements" button above to see what's missing**
469
-
470
- **If you get "Whisper Missing" error:**
471
- ```bash
472
- pip install openai-whisper
473
- ```
474
-
475
- **If you get "yt-dlp Missing" error:**
476
- ```bash
477
- pip install yt-dlp
478
- ```
479
-
480
- **Install all requirements at once:**
481
- ```bash
482
- pip install gradio==4.44.0 yt-dlp==2023.12.30 openai-whisper==20231117 torch==2.1.0 torchaudio==2.1.0 numpy==1.24.3 regex==2023.8.8
483
- ```
484
-
485
- **Alternative Whisper installation:**
486
- ```bash
487
- pip install transformers torch torchaudio
488
- ```
489
-
490
- **If using virtual environment:**
491
- ```bash
492
- # Create and activate virtual environment first
493
- python -m venv myenv
494
- # Windows: myenv\\Scripts\\activate
495
- # Mac/Linux: source myenv/bin/activate
496
- # Then install packages
497
- pip install -r requirements.txt
498
- ```
499
-
500
- **Other Issues:**
501
- - **Bot Detection Error**: Upload your cookies.txt file
502
- - **No Audio Found**: Check if video has audio track
503
- - **Transcription Failed**: Video might be too long or audio quality poor
504
- - **No Stock Info**: Video might not contain financial content
505
  """)
506
 
 
507
  if __name__ == "__main__":
508
- demo.launch()
 
 
 
 
 
 
 
 
5
  import sys
6
  import shutil
7
  import importlib.util
8
+ import time
9
+ import random
10
 
11
  def check_requirements():
12
  """Check if all required packages are installed and return status"""
 
76
  print(f"yt-dlp available: {YT_DLP_AVAILABLE}")
77
  print(f"whisper available: {WHISPER_AVAILABLE} (type: {WHISPER_TYPE})")
78
 
79
+ def get_video_info(url, cookies_file_path=None):
80
+ """Get video information without downloading"""
81
+ if not YT_DLP_AVAILABLE:
82
+ raise Exception("yt-dlp is not available.")
83
+
84
+ ydl_opts = {
85
+ 'quiet': True,
86
+ 'no_warnings': True,
87
+ 'extract_flat': False,
88
+ 'skip_download': True,
89
+ }
90
+
91
+ if cookies_file_path and os.path.exists(cookies_file_path):
92
+ ydl_opts['cookiefile'] = cookies_file_path
93
+
94
+ with YoutubeDL(ydl_opts) as ydl:
95
+ try:
96
+ info = ydl.extract_info(url, download=False)
97
+ return {
98
+ 'title': info.get('title', 'Unknown'),
99
+ 'duration': info.get('duration', 0),
100
+ 'availability': info.get('availability', 'unknown'),
101
+ 'live_status': info.get('live_status', 'unknown'),
102
+ }
103
+ except Exception as e:
104
+ return {'error': str(e)}
105
 
106
  def download_audio(url, cookies_file_path=None):
107
+ """Download audio from YouTube URL with enhanced error handling"""
108
  if not YT_DLP_AVAILABLE:
109
  raise Exception("yt-dlp is not available. Please check the installation.")
110
 
111
  try:
112
+ # First, try to get video info
113
+ video_info = get_video_info(url, cookies_file_path)
114
+ if 'error' in video_info:
115
+ raise Exception(f"Video info error: {video_info['error']}")
116
+
117
+ print(f"Video title: {video_info.get('title', 'Unknown')}")
118
+ print(f"Video duration: {video_info.get('duration', 0)} seconds")
119
+ print(f"Video availability: {video_info.get('availability', 'unknown')}")
120
+
121
  # Create a temporary directory for downloads
122
  temp_dir = tempfile.mkdtemp()
123
  output_path = os.path.join(temp_dir, "audio")
124
 
125
+ # Enhanced options for better compatibility
126
  ydl_opts = {
127
+ 'format': 'bestaudio[ext=m4a]/bestaudio[ext=webm]/bestaudio[ext=mp4]/bestaudio/best',
128
  'outtmpl': output_path + '.%(ext)s',
129
+ 'quiet': False, # Enable logging for debugging
130
+ 'no_warnings': False,
131
+ 'extractor_retries': 5,
132
+ 'fragment_retries': 5,
133
+ 'retry_sleep_functions': {'http': lambda n: min(2 ** n, 60)},
134
+ 'socket_timeout': 30,
135
+ 'http_chunk_size': 10485760, # 10MB chunks
136
+ 'writeinfojson': False,
137
+ 'writesubtitles': False,
138
+ 'writeautomaticsub': False,
139
+ 'geo_bypass': True,
140
+ 'geo_bypass_country': 'US',
141
+ 'extract_flat': False,
142
+ 'ignoreerrors': False,
143
  }
144
 
145
+ # Enhanced cookies and headers handling
146
  if cookies_file_path and os.path.exists(cookies_file_path):
147
  ydl_opts['cookiefile'] = cookies_file_path
148
  print(f"βœ… Using cookies file: {cookies_file_path}")
149
  else:
150
+ print("⚠️ No cookies file - using enhanced headers")
151
+
152
+ # Always add enhanced headers
153
+ ydl_opts.update({
154
+ 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
155
+ 'referer': 'https://www.youtube.com/',
156
+ 'headers': {
157
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
158
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
159
+ 'Accept-Language': 'en-US,en;q=0.9',
160
+ 'Accept-Encoding': 'gzip, deflate, br',
161
+ 'DNT': '1',
162
+ 'Connection': 'keep-alive',
163
+ 'Upgrade-Insecure-Requests': '1',
164
+ 'Sec-Fetch-Dest': 'document',
165
+ 'Sec-Fetch-Mode': 'navigate',
166
+ 'Sec-Fetch-Site': 'none',
167
+ 'Sec-Fetch-User': '?1',
168
+ 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
169
+ 'sec-ch-ua-mobile': '?0',
170
+ 'sec-ch-ua-platform': '"Windows"',
171
+ }
172
+ })
173
+
174
+ # Add random delay to avoid rate limiting
175
+ time.sleep(random.uniform(1, 3))
176
+
177
  with YoutubeDL(ydl_opts) as ydl:
178
+ print(f"Attempting to download audio from: {url}")
179
  info_dict = ydl.extract_info(url, download=True)
 
180
 
181
  # Find the downloaded file
182
+ for ext in ['.m4a', '.webm', '.mp4', '.mp3', '.aac', '.opus']:
183
  potential_file = output_path + ext
184
  if os.path.exists(potential_file):
185
  print(f"βœ… Audio downloaded: {potential_file}")
186
  return potential_file
187
 
188
+ # If no file found, list directory contents for debugging
189
+ print(f"Files in temp directory: {os.listdir(temp_dir)}")
190
  raise FileNotFoundError("Downloaded audio file not found")
191
 
192
  except Exception as e:
193
+ error_msg = str(e).lower()
194
+
195
+ # Provide specific error messages and solutions
196
+ if "video unavailable" in error_msg or "content isn't available" in error_msg:
197
+ raise Exception(f"""
198
+ ❌ Video Access Error: The video is unavailable or restricted.
199
+
200
+ Possible reasons:
201
+ β€’ Video is private, unlisted, or deleted
202
+ β€’ Video is geo-blocked in your region
203
+ β€’ Video has age restrictions
204
+ β€’ Video requires sign-in to view
205
+ β€’ Copyright restrictions
206
+
207
+ Solutions to try:
208
+ 1. Verify the video URL is correct and accessible
209
+ 2. Try a different public video
210
+ 3. Check if the video works in your browser
211
+ 4. If using a playlist URL, try the direct video URL instead
212
+ 5. For age-restricted videos, ensure cookies are from a logged-in account
213
+
214
+ Original error: {str(e)}
215
+ """)
216
+ elif "403" in error_msg or "forbidden" in error_msg:
217
+ raise Exception(f"""
218
+ ❌ Access Forbidden (403): YouTube blocked the request.
219
+
220
+ Solutions:
221
+ 1. **Upload fresh cookies.txt file** (most important)
222
+ 2. Get cookies from a logged-in YouTube account
223
+ 3. Try again after a few minutes (rate limiting)
224
+ 4. Use a different network/VPN if possible
225
+
226
+ How to get fresh cookies:
227
+ β€’ Visit YouTube while logged in
228
+ β€’ Use browser extension to export cookies
229
+ β€’ Upload the newest cookies.txt file
230
+
231
+ Original error: {str(e)}
232
+ """)
233
+ elif "429" in error_msg or "rate limit" in error_msg:
234
+ raise Exception(f"""
235
+ ❌ Rate Limited (429): Too many requests.
236
+
237
+ Solutions:
238
+ 1. Wait 10-15 minutes before trying again
239
+ 2. Upload fresh cookies.txt file
240
+ 3. Try a different video
241
+ 4. Use a different network if possible
242
+
243
+ Original error: {str(e)}
244
+ """)
245
  else:
246
  raise Exception(f"Failed to download audio: {str(e)}")
247
 
 
252
 
253
  try:
254
  if WHISPER_TYPE == "openai-whisper":
255
+ # Use OpenAI Whisper with more robust settings
256
+ model = whisper.load_model("base") # Use base model for better accuracy
257
+ result = model.transcribe(
258
+ file_path,
259
+ language="en", # Specify English for better performance
260
+ task="transcribe",
261
+ verbose=False,
262
+ fp16=False, # Better compatibility
263
+ temperature=0.0, # More deterministic
264
+ )
265
  return result["text"]
266
 
267
  elif WHISPER_TYPE == "transformers":
268
  # Use Transformers Whisper
269
  from transformers import pipeline
270
+ transcriber = pipeline(
271
+ "automatic-speech-recognition",
272
+ model="openai/whisper-base",
273
+ device=-1 # Use CPU for better compatibility
274
+ )
275
+ result = transcriber(file_path, return_timestamps=False)
276
  return result["text"]
277
 
278
  else:
 
281
  except Exception as e:
282
  raise Exception(f"Failed to transcribe audio: {str(e)}")
283
 
284
+ def extract_stock_info_enhanced(text):
285
+ """Enhanced stock information extraction with better patterns"""
 
286
  try:
287
  stock_info = []
288
 
289
+ # Enhanced patterns for stock information
290
+ stock_patterns = {
291
+ 'symbols': r'\b[A-Z]{2,5}\b(?=\s+(?:stock|shares|ticker|symbol|price|target|buy|sell))',
292
+ 'prices': r'\$\d+(?:\.\d{1,2})?(?:\s*(?:per share|each|target|price))?',
293
+ 'percentages': r'\d+(?:\.\d{1,2})?%',
294
+ 'actions': r'\b(?:buy|sell|hold|long|short|bullish|bearish|target|stop loss|take profit|accumulate|distribute)\b',
295
+ 'companies': r'\b[A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+){0,2}(?:\s+(?:Inc|Corp|Company|Ltd|LLC)\.?)?',
296
+ 'market_terms': r'\b(?:earnings|revenue|profit|loss|growth|dividend|yield|PE ratio|market cap|volume)\b',
297
+ }
 
 
 
 
298
 
299
+ # Extract information
300
+ symbols = re.findall(stock_patterns['symbols'], text, re.IGNORECASE)
301
+ prices = re.findall(stock_patterns['prices'], text)
302
+ percentages = re.findall(stock_patterns['percentages'], text)
303
+ actions = re.findall(stock_patterns['actions'], text, re.IGNORECASE)
304
+ companies = re.findall(stock_patterns['companies'], text)
305
+ market_terms = re.findall(stock_patterns['market_terms'], text, re.IGNORECASE)
306
 
307
+ # Format results
308
+ result = "=== πŸ“Š EXTRACTED STOCK INFORMATION ===\n\n"
309
 
310
  if symbols:
311
+ result += f"πŸ”€ **Stock Symbols Found**: {', '.join(set(symbols[:10]))}\n\n"
312
+
313
+ if companies:
314
+ filtered_companies = [c for c in set(companies) if len(c) > 3 and c.upper() not in ['THE', 'AND', 'FOR', 'WITH']]
315
+ if filtered_companies:
316
+ result += f"🏒 **Companies Mentioned**: {', '.join(filtered_companies[:10])}\n\n"
317
 
318
  if prices:
319
+ result += f"πŸ’° **Price Mentions**: {', '.join(set(prices[:10]))}\n\n"
320
+
321
+ if percentages:
322
+ result += f"πŸ“ˆ **Percentage Mentions**: {', '.join(set(percentages[:10]))}\n\n"
323
 
324
  if actions:
325
+ result += f"🎯 **Trading Actions**: {', '.join(set(actions[:10]))}\n\n"
326
+
327
+ if market_terms:
328
+ result += f"πŸ“Š **Market Terms**: {', '.join(set(market_terms[:10]))}\n\n"
329
 
330
+ # Look for recommendation sentences
331
+ sentences = [s.strip() for s in text.split('.') if s.strip()]
332
  recommendations = []
333
+
334
  for sentence in sentences:
335
+ sentence_lower = sentence.lower()
336
+ if any(action in sentence_lower for action in ['buy', 'sell', 'target', 'recommend', 'suggest']):
337
+ if any(symbol in sentence for symbol in symbols[:5]) or any(price in sentence for price in prices[:3]):
338
+ recommendations.append(sentence)
339
 
340
  if recommendations:
341
+ result += "🎯 **Potential Recommendations**:\n"
342
+ for i, rec in enumerate(recommendations[:5], 1):
343
+ result += f"{i}. {rec}\n"
344
+ result += "\n"
345
+
346
+ # Add summary
347
+ if not any([symbols, prices, actions, recommendations]):
348
+ result += "⚠️ **No clear stock recommendations found**\n\n"
349
+ result += "**Possible reasons:**\n"
350
+ result += "β€’ Video doesn't contain stock/financial content\n"
351
+ result += "β€’ Audio quality was poor for transcription\n"
352
+ result += "β€’ Content is not in English\n"
353
+ result += "β€’ General market discussion without specific recommendations\n"
354
+ else:
355
+ result += "βœ… **Analysis Complete** - Please verify all information independently!\n"
356
 
357
+ result += "\n" + "="*50 + "\n"
358
+ result += "⚠️ **DISCLAIMER**: This is automated extraction for educational purposes only.\n"
359
+ result += "Always conduct your own research before making investment decisions!\n"
360
+ result += "="*50
 
 
361
 
362
  return result
363
 
364
  except Exception as e:
365
+ return f"❌ Error extracting stock info: {str(e)}"
366
 
367
  def cleanup_file(file_path):
368
  """Clean up temporary files"""
 
386
  # Create a temporary file for cookies
387
  temp_cookies_path = tempfile.mktemp(suffix='.txt')
388
 
389
+ # Copy the uploaded file
390
  shutil.copy2(cookies_file, temp_cookies_path)
391
 
392
+ # Validate cookies file
393
+ with open(temp_cookies_path, 'r', encoding='utf-8') as f:
394
+ content = f.read()
395
+ if 'youtube.com' not in content.lower():
396
+ print("⚠️ Warning: cookies file might not contain YouTube cookies")
397
+
398
+ print(f"βœ… Cookies file processed: {temp_cookies_path}")
399
  return temp_cookies_path
400
  except Exception as e:
401
  print(f"❌ Error processing cookies file: {e}")
402
  return None
403
 
404
+ def validate_youtube_url(url):
405
+ """Validate YouTube URL format"""
406
+ if not url or not url.strip():
407
+ return False, "Please provide a YouTube URL"
408
+
409
+ url = url.strip()
410
+ youtube_patterns = [
411
+ r'(?:https?://)?(?:www\.)?youtube\.com/watch\?v=[\w-]+',
412
+ r'(?:https?://)?(?:www\.)?youtu\.be/[\w-]+',
413
+ r'(?:https?://)?(?:www\.)?youtube\.com/embed/[\w-]+',
414
+ r'(?:https?://)?(?:m\.)?youtube\.com/watch\?v=[\w-]+',
415
+ ]
416
+
417
+ for pattern in youtube_patterns:
418
+ if re.match(pattern, url):
419
+ return True, "Valid YouTube URL"
420
+
421
+ return False, "Invalid YouTube URL format"
422
+
423
  def process_video(url, cookies_file, progress=gr.Progress()):
424
  """Main function to process YouTube video"""
425
 
426
  # Check if required packages are available
427
  if not YT_DLP_AVAILABLE:
428
+ return "❌ Error: yt-dlp is not installed. Please install it using: pip install yt-dlp", "", "❌ Missing yt-dlp"
429
 
430
  if not WHISPER_AVAILABLE:
431
+ return "❌ Error: OpenAI Whisper is not installed. Please install it using: pip install openai-whisper", "", "❌ Missing Whisper"
432
 
433
+ # Validate URL
434
+ is_valid, validation_msg = validate_youtube_url(url)
435
+ if not is_valid:
436
+ return f"❌ Error: {validation_msg}", "", "❌ Invalid URL"
437
 
438
  audio_path = None
439
  cookies_temp_path = None
440
 
441
  try:
442
+ progress(0.05, desc="πŸ” Validating URL...")
 
 
443
 
444
  # Process cookies file if provided
445
+ progress(0.1, desc="πŸͺ Processing cookies...")
446
  cookies_temp_path = process_cookies_file(cookies_file)
447
 
448
+ status_msg = "βœ… Cookies loaded" if cookies_temp_path else "⚠️ No cookies (may encounter restrictions)"
449
 
450
  # Download audio
451
+ progress(0.2, desc="πŸ“₯ Downloading audio...")
452
  audio_path = download_audio(url, cookies_temp_path)
453
 
454
  # Transcribe audio
455
+ progress(0.6, desc="πŸŽ™οΈ Transcribing audio...")
456
  transcript = transcribe_audio(audio_path)
457
 
458
  if not transcript.strip():
459
+ return "❌ No speech detected in the video", "", "❌ No speech detected"
460
 
461
  # Extract stock information
462
+ progress(0.9, desc="πŸ“Š Analyzing content...")
463
+ stock_details = extract_stock_info_enhanced(transcript)
464
 
465
+ progress(1.0, desc="βœ… Complete!")
466
  return transcript, stock_details, "βœ… Processing completed successfully"
467
 
468
  except Exception as e:
469
+ error_msg = str(e)
470
+ return error_msg, "", f"❌ Error occurred"
471
 
472
  finally:
473
  # Clean up temporary files
474
  cleanup_file(audio_path)
475
  cleanup_file(cookies_temp_path)
476
 
477
+ # Create Gradio interface optimized for Gradio Cloud
478
  with gr.Blocks(
479
+ title="πŸ“ˆ YouTube Stock Extractor",
480
  theme=gr.themes.Soft(),
481
  css="""
482
  .gradio-container {
483
+ max-width: 1200px;
484
  margin: auto;
485
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
486
  }
487
  .status-box {
488
+ padding: 12px;
489
+ border-radius: 8px;
490
  margin: 10px 0;
491
+ border: 1px solid #ddd;
492
+ }
493
+ .warning-box {
494
+ background-color: #fff3cd;
495
+ border-color: #ffeaa7;
496
+ color: #856404;
497
+ }
498
+ .success-box {
499
+ background-color: #d4edda;
500
+ border-color: #c3e6cb;
501
+ color: #155724;
502
+ }
503
+ .error-box {
504
+ background-color: #f8d7da;
505
+ border-color: #f5c6cb;
506
+ color: #721c24;
507
  }
508
  """
509
  ) as demo:
510
 
511
  gr.Markdown("""
512
+ # πŸ“ˆ YouTube Stock Recommendation Extractor
513
 
514
+ **Extract stock analysis and trading recommendations from YouTube videos using AI**
515
 
516
+ πŸ”§ **How it works:**
517
+ 1. **Upload cookies.txt** (essential for avoiding restrictions)
518
+ 2. **Paste YouTube URL** of financial content
519
+ 3. **AI downloads** audio and transcribes using Whisper
520
+ 4. **Extracts** stock symbols, prices, and recommendations
 
521
 
522
+ ⚠️ **Important:** This tool is for educational purposes only. Always do your own research before investing!
523
  """)
524
 
525
  with gr.Row():
526
  with gr.Column(scale=1):
527
+ # System check section
528
+ with gr.Group():
529
+ gr.Markdown("### πŸ” System Status")
530
+ check_req_btn = gr.Button(
531
+ "Check System Requirements",
532
+ variant="secondary",
533
+ size="sm"
534
+ )
535
+
536
+ requirements_output = gr.Textbox(
537
+ label="πŸ“‹ System Requirements Status",
538
+ lines=8,
539
+ max_lines=15,
540
+ interactive=False,
541
+ visible=False
542
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
543
 
544
+ # Input section
545
+ with gr.Group():
546
+ gr.Markdown("### πŸ“₯ Input")
547
+
548
+ # Cookies upload with better instructions
549
+ cookies_input = gr.File(
550
+ label="πŸͺ Upload Cookies File (cookies.txt) - REQUIRED",
551
+ file_types=[".txt"],
552
+ file_count="single"
553
+ )
554
+
555
+ with gr.Accordion("πŸ“‹ How to Get Cookies (Click to expand)", open=False):
556
+ gr.Markdown("""
557
+ **Why cookies are needed:** YouTube blocks most automated requests without proper authentication.
558
+
559
+ **Step-by-step instructions:**
560
+ 1. **Install browser extension:**
561
+ - Chrome: "Get cookies.txt LOCALLY" or "cookies.txt"
562
+ - Firefox: "cookies.txt" or "Export Cookies"
563
+
564
+ 2. **Get cookies:**
565
+ - Visit YouTube.com (log in if needed)
566
+ - Click the extension icon
567
+ - Select "Export for youtube.com"
568
+ - Download the cookies.txt file
569
+
570
+ 3. **Upload here:** Use the file upload above
571
+
572
+ **⚠️ Without cookies, you'll get "403 Forbidden" or "Video unavailable" errors**
573
+ """)
574
+
575
+ url_input = gr.Textbox(
576
+ label="πŸ“Ί YouTube Video URL",
577
+ placeholder="https://www.youtube.com/watch?v=VIDEO_ID",
578
+ lines=2,
579
+ info="Paste the full YouTube video URL here"
580
+ )
581
+
582
+ process_btn = gr.Button(
583
+ "πŸš€ Extract Stock Information",
584
+ variant="primary",
585
+ size="lg"
586
+ )
587
+
588
+ # Status display
589
+ status_output = gr.Textbox(
590
+ label="πŸ“Š Status",
591
+ lines=1,
592
+ interactive=False,
593
+ info="Current processing status"
594
+ )
595
 
596
+ # Output section
597
  with gr.Row():
598
  with gr.Column():
599
  transcript_output = gr.Textbox(
600
  label="πŸ“ Full Transcript",
601
+ lines=20,
602
+ max_lines=25,
603
+ show_copy_button=True,
604
+ info="Complete transcription of the video audio"
605
  )
606
 
607
  with gr.Column():
608
  stock_info_output = gr.Textbox(
609
  label="πŸ“Š Extracted Stock Information",
610
+ lines=20,
611
+ max_lines=25,
612
+ show_copy_button=True,
613
+ info="Parsed stock symbols, prices, and recommendations"
614
+ )
615
+
616
+ # Example and troubleshooting section
617
+ with gr.Row():
618
+ with gr.Column():
619
+ gr.Markdown("### πŸ“‹ Example URLs")
620
+ gr.Examples(
621
+ examples=[
622
+ ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"],
623
+ ["https://youtu.be/dQw4w9WgXcQ"],
624
+ ],
625
+ inputs=[url_input],
626
+ label="Click to try example URLs (replace with actual financial videos)"
627
  )
628
 
629
+ # Troubleshooting section
630
+ with gr.Accordion("πŸ”§ Troubleshooting Guide", open=False):
631
+ gr.Markdown("""
632
+ ### Common Issues and Solutions:
633
+
634
+ **❌ "Video unavailable" or "Content isn't available":**
635
+ - Video might be private, deleted, or geo-blocked
636
+ - Try a different public financial video
637
+ - Verify the URL works in your browser
638
+ - Check if video requires age verification
639
+
640
+ **❌ "403 Forbidden" error:**
641
+ - **Upload fresh cookies.txt file** (most common fix)
642
+ - Make sure cookies are from a logged-in YouTube account
643
+ - Try waiting 10-15 minutes (rate limiting)
644
+
645
+ **❌ "No speech detected":**
646
+ - Video might not have clear audio
647
+ - Try videos with clear narration
648
+ - Check if video is in English
649
+
650
+ **❌ "No stock information found":**
651
+ - Video might not contain financial content
652
+ - Try videos from financial YouTube channels
653
+ - Look for videos with stock analysis or recommendations
654
+
655
+ ### Installation Commands:
656
+ ```bash
657
+ # Install all requirements
658
+ pip install gradio yt-dlp openai-whisper torch torchaudio
659
+
660
+ # Alternative whisper installation
661
+ pip install transformers torch torchaudio
662
+ ```
663
+
664
+ ### Best Practices:
665
+ - Use videos from reputable financial channels
666
+ - Prefer videos under 20 minutes for faster processing
667
+ - Ensure clear audio quality
668
+ - Always verify extracted information independently
669
+ """)
670
+
671
  # Event handlers
672
  def show_requirements():
673
  status = check_requirements()
 
685
  show_progress=True
686
  )
687
 
688
+ # Footer
 
 
 
 
 
 
 
 
 
689
  gr.Markdown("""
690
+ ---
691
+ **πŸ“’ Disclaimer:** This tool is for educational and research purposes only.
692
+ The extracted information should not be considered as financial advice.
693
+ Always conduct your own research and consult with financial professionals before making investment decisions.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
694
  """)
695
 
696
+ # Launch configuration for Gradio Cloud
697
  if __name__ == "__main__":
698
+ demo.launch(
699
+ server_name="0.0.0.0",
700
+ server_port=7860,
701
+ share=False,
702
+ debug=False,
703
+ show_error=True,
704
+ quiet=False
705
+ )