Spaces:

developer28
/

Youtubedownloader

Sleeping

App Files Files Community

developer28 commited on 14 days ago

Commit

3a866c5

verified ·

1 Parent(s): 381a02c

Update app.py

Browse files

Files changed (1) hide show

app.py +434 -237

app.py CHANGED Viewed

@@ -5,6 +5,8 @@ import re
 import sys
 import shutil
 import importlib.util
 def check_requirements():
     """Check if all required packages are installed and return status"""
@@ -74,79 +76,172 @@ print(f"Python executable: {sys.executable}")
 print(f"yt-dlp available: {YT_DLP_AVAILABLE}")
 print(f"whisper available: {WHISPER_AVAILABLE} (type: {WHISPER_TYPE})")
-# Additional diagnostics
-if YT_DLP_AVAILABLE:
-    try:
-        from yt_dlp import YoutubeDL
-        print(f"yt-dlp version: {YoutubeDL().__class__.__module__}")
-    except:
-        pass
-if WHISPER_AVAILABLE and WHISPER_TYPE == "openai-whisper":
-    try:
-        import whisper
-        print(f"whisper version: {whisper.__version__}")
-    except:
-        pass
 def download_audio(url, cookies_file_path=None):
-    """Download audio from YouTube URL and return the file path"""
     if not YT_DLP_AVAILABLE:
         raise Exception("yt-dlp is not available. Please check the installation.")
     try:
         # Create a temporary directory for downloads
         temp_dir = tempfile.mkdtemp()
         output_path = os.path.join(temp_dir, "audio")
-        # Basic options
         ydl_opts = {
-            'format': 'bestaudio[ext=m4a]/bestaudio/best',
             'outtmpl': output_path + '.%(ext)s',
-            'quiet': True,
-            'no_warnings': True,
-            'extractor_retries': 3,
-            'fragment_retries': 3,
-            'retry_sleep_functions': {'http': lambda n: 2 ** n},
         }
-        # If cookies are provided, use them
         if cookies_file_path and os.path.exists(cookies_file_path):
             ydl_opts['cookiefile'] = cookies_file_path
             print(f"✅ Using cookies file: {cookies_file_path}")
         else:
-            print("⚠️ No cookies file provided - falling back to headers (may trigger bot detection)")
-            # Only add headers if cookies are not used
-            ydl_opts.update({
-                'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-                'referer': 'https://www.youtube.com/',
-                'headers': {
-                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-                    'Accept-Language': 'en-us,en;q=0.5',
-                    'Accept-Encoding': 'gzip,deflate',
-                    'DNT': '1',
-                    'Connection': 'keep-alive',
-                    'Upgrade-Insecure-Requests': '1',
-                }
-            })
         with YoutubeDL(ydl_opts) as ydl:
             info_dict = ydl.extract_info(url, download=True)
-            filename = ydl.prepare_filename(info_dict)
             # Find the downloaded file
-            for ext in ['.m4a', '.webm', '.mp4', '.mp3']:
                 potential_file = output_path + ext
                 if os.path.exists(potential_file):
                     print(f"✅ Audio downloaded: {potential_file}")
                     return potential_file
             raise FileNotFoundError("Downloaded audio file not found")
     except Exception as e:
-        if "403" in str(e) or "Forbidden" in str(e):
-            raise Exception(f"YouTube blocked the request (403 Forbidden). Please upload your cookies.txt file to bypass bot detection. Original error: {str(e)}")
         else:
             raise Exception(f"Failed to download audio: {str(e)}")
@@ -157,16 +252,27 @@ def transcribe_audio(file_path):
     try:
         if WHISPER_TYPE == "openai-whisper":
-            # Use OpenAI Whisper
-            model = whisper.load_model("tiny")
-            result = model.transcribe(file_path)
             return result["text"]
         elif WHISPER_TYPE == "transformers":
             # Use Transformers Whisper
             from transformers import pipeline
-            transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
-            result = transcriber(file_path)
             return result["text"]
         else:
@@ -175,65 +281,88 @@ def transcribe_audio(file_path):
     except Exception as e:
         raise Exception(f"Failed to transcribe audio: {str(e)}")
-def extract_stock_info_simple(text):
-    """Extract stock information using simple pattern matching"""
     try:
         stock_info = []
-        # Simple patterns to look for stock-related information
-        stock_patterns = [
-            r'\b[A-Z]{1,5}\b(?:\s+stock|\s+shares|\s+symbol)',  # Stock symbols
-            r'(?:buy|sell|target|price)\s+[A-Z]{1,5}',
-            r'\$\d+(?:\.\d{2})?',  # Dollar amounts
-            r'\b(?:bullish|bearish|buy|sell|hold)\b',
-        ]
-        # Look for company names and stock mentions
-        companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
-        symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
-        prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
-        actions = re.findall(r'\b(?:buy|sell|hold|bullish|bearish|target|stop\s+loss)\b', text, re.IGNORECASE)
-        # Format the extracted information
-        result = "=== EXTRACTED STOCK INFORMATION ===\n\n"
-        if companies:
-            result += f"📊 Mentioned Companies: {', '.join(set(companies[:10]))}\n\n"
         if symbols:
-            result += f"🔤 Potential Stock Symbols: {', '.join(set(symbols[:10]))}\n\n"
         if prices:
-            result += f"💰 Price Mentions: {', '.join(set(prices[:10]))}\n\n"
         if actions:
-            result += f"📈 Trading Actions: {', '.join(set(actions[:10]))}\n\n"
-        # Look for specific recommendation patterns
         recommendations = []
-        sentences = text.split('.')
         for sentence in sentences:
-            if any(word in sentence.lower() for word in ['buy', 'sell', 'target', 'recommendation']):
-                if any(symbol in sentence for symbol in symbols[:5]):
-                    recommendations.append(sentence.strip())
         if recommendations:
-            result += "🎯 Potential Recommendations:\n"
-            for rec in recommendations[:5]:
-                result += f"• {rec}\n"
-        if not any([companies, symbols, prices, actions]):
-            result += "⚠️ No clear stock recommendations found in the transcript.\n"
-            result += "This might be because:\n"
-            result += "• The video doesn't contain stock recommendations\n"
-            result += "• The audio quality was poor\n"
-            result += "• The content is not in English\n"
         return result
     except Exception as e:
-        return f"Error extracting stock info: {str(e)}"
 def cleanup_file(file_path):
     """Clean up temporary files"""
@@ -257,184 +386,288 @@ def process_cookies_file(cookies_file):
         # Create a temporary file for cookies
         temp_cookies_path = tempfile.mktemp(suffix='.txt')
-        # Copy the uploaded file directly (gradio provides it as a file object)
         shutil.copy2(cookies_file, temp_cookies_path)
-        print(f"✅ Cookies file saved at: {temp_cookies_path}")
         return temp_cookies_path
     except Exception as e:
         print(f"❌ Error processing cookies file: {e}")
         return None
 def process_video(url, cookies_file, progress=gr.Progress()):
     """Main function to process YouTube video"""
     # Check if required packages are available
     if not YT_DLP_AVAILABLE:
-        return "Error: yt-dlp is not installed properly. Please install it using: pip install yt-dlp", "", "❌ Error: Missing yt-dlp"
     if not WHISPER_AVAILABLE:
-        return "Error: OpenAI Whisper is not installed properly. Please install it using: pip install openai-whisper", "", "❌ Error: Missing Whisper"
-    if not url or not url.strip():
-        return "Please provide a valid YouTube URL", "", "❌ Error: Invalid URL"
     audio_path = None
     cookies_temp_path = None
     try:
-        # Validate URL
-        if not any(domain in url.lower() for domain in ['youtube.com', 'youtu.be']):
-            return "Please provide a valid YouTube URL", "", "❌ Error: Invalid URL"
         # Process cookies file if provided
-        progress(0.05, desc="Processing cookies...")
         cookies_temp_path = process_cookies_file(cookies_file)
-        status_msg = "✅ Cookies loaded" if cookies_temp_path else "⚠️ No cookies (may encounter bot detection)"
         # Download audio
-        progress(0.2, desc="Downloading audio...")
         audio_path = download_audio(url, cookies_temp_path)
         # Transcribe audio
-        progress(0.6, desc="Transcribing audio...")
         transcript = transcribe_audio(audio_path)
         if not transcript.strip():
-            return "No speech detected in the video", "", "❌ No speech detected"
         # Extract stock information
-        progress(0.9, desc="Extracting stock information...")
-        stock_details = extract_stock_info_simple(transcript)
-        progress(1.0, desc="Complete!")
         return transcript, stock_details, "✅ Processing completed successfully"
     except Exception as e:
-        error_msg = f"Error processing video: {str(e)}"
-        return error_msg, "", f"❌ Error: {str(e)}"
     finally:
         # Clean up temporary files
         cleanup_file(audio_path)
         cleanup_file(cookies_temp_path)
-# Create Gradio interface
 with gr.Blocks(
-    title="Stock Recommendation Extractor",
     theme=gr.themes.Soft(),
     css="""
     .gradio-container {
-        max-width: 1400px;
         margin: auto;
     }
     .status-box {
-        padding: 10px;
-        border-radius: 5px;
         margin: 10px 0;
     }
     """
 ) as demo:
     gr.Markdown("""
-    # 📈 Stock Recommendation Extractor from YouTube
-    Extract stock recommendations and trading information from YouTube videos using AI transcription.
-    **How it works:**
-    1. Upload your cookies.txt file (optional but recommended to avoid bot detection)
-    2. Paste YouTube video URL
-    3. Downloads audio from YouTube video
-    4. Transcribes using OpenAI Whisper
-    5. Extracts stock-related information
-    **⚠️ Disclaimer:** This is for educational purposes only. Always do your own research!
     """)
     with gr.Row():
         with gr.Column(scale=1):
-            # Requirements check button
-            gr.Markdown("### 🔍 System Check")
-            check_req_btn = gr.Button(
-                "Check Requirements",
-                variant="secondary",
-                size="sm"
-            )
-            requirements_output = gr.Textbox(
-                label="📋 Requirements Status",
-                lines=10,
-                interactive=False,
-                visible=False
-            )
-            # Cookies file upload
-            cookies_input = gr.File(
-                label="🍪 Upload Cookies File (cookies.txt)",
-                file_types=[".txt"],
-                file_count="single"
-            )
-            gr.Markdown("""
-            **How to get cookies.txt to fix 403 Forbidden errors:**
-            1. Install browser extension: "Get cookies.txt LOCALLY"
-            2. Visit YouTube in your browser (while logged in)
-            3. Click the extension icon and export cookies for youtube.com
-            4. Upload the downloaded cookies.txt file here
-            **Alternative extensions:**
-            - "cookies.txt" (Chrome/Firefox)
-            - "Export Cookies" (Chrome)
-            ⚠️ **Important**: Without cookies, you'll likely get 403 Forbidden errors
-            """)
-            url_input = gr.Textbox(
-                label="📺 YouTube URL",
-                placeholder="https://www.youtube.com/watch?v=...",
-                lines=2
-            )
-            process_btn = gr.Button(
-                "🚀 Extract Stock Information",
-                variant="primary",
-                size="lg"
-            )
-            # Status display
-            status_output = gr.Textbox(
-                label="📊 Status",
-                lines=1,
-                interactive=False
-            )
-            gr.Markdown("""
-            ### 💡 Tips:
-            - **MUST upload cookies.txt** to avoid 403 Forbidden errors
-            - Works best with financial YouTube channels
-            - Ensure video has clear audio
-            - English content works best
-            - Try shorter videos first (under 10 minutes)
-            """)
     with gr.Row():
         with gr.Column():
             transcript_output = gr.Textbox(
                 label="📝 Full Transcript",
-                lines=15,
-                max_lines=20,
-                show_copy_button=True
             )
         with gr.Column():
             stock_info_output = gr.Textbox(
                 label="📊 Extracted Stock Information",
-                lines=15,
-                max_lines=20,
-                show_copy_button=True
             )
     # Event handlers
     def show_requirements():
         status = check_requirements()
@@ -452,57 +685,21 @@ with gr.Blocks(
         show_progress=True
     )
-    # Example section
-    gr.Markdown("### 📋 Example URLs (Replace with actual financial videos)")
-    gr.Examples(
-        examples=[
-            ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"],
-        ],
-        inputs=[url_input],
-        label="Click to try example"
-    )
     gr.Markdown("""
-    ### 🔧 Installation & Troubleshooting:
-    **Step 1: Click "Check Requirements" button above to see what's missing**
-    **If you get "Whisper Missing" error:**
-    ```bash
-    pip install openai-whisper
-    ```
-    **If you get "yt-dlp Missing" error:**
-    ```bash
-    pip install yt-dlp
-    ```
-    **Install all requirements at once:**
-    ```bash
-    pip install gradio==4.44.0 yt-dlp==2023.12.30 openai-whisper==20231117 torch==2.1.0 torchaudio==2.1.0 numpy==1.24.3 regex==2023.8.8
-    ```
-    **Alternative Whisper installation:**
-    ```bash
-    pip install transformers torch torchaudio
-    ```
-    **If using virtual environment:**
-    ```bash
-    # Create and activate virtual environment first
-    python -m venv myenv
-    # Windows: myenv\\Scripts\\activate
-    # Mac/Linux: source myenv/bin/activate
-    # Then install packages
-    pip install -r requirements.txt
-    ```
-    **Other Issues:**
-    - **Bot Detection Error**: Upload your cookies.txt file
-    - **No Audio Found**: Check if video has audio track
-    - **Transcription Failed**: Video might be too long or audio quality poor
-    - **No Stock Info**: Video might not contain financial content
     """)
 if __name__ == "__main__":
-    demo.launch()

 import sys
 import shutil
 import importlib.util
+import time
+import random
 def check_requirements():
     """Check if all required packages are installed and return status"""
 print(f"yt-dlp available: {YT_DLP_AVAILABLE}")
 print(f"whisper available: {WHISPER_AVAILABLE} (type: {WHISPER_TYPE})")
+def get_video_info(url, cookies_file_path=None):
+    """Get video information without downloading"""
+    if not YT_DLP_AVAILABLE:
+        raise Exception("yt-dlp is not available.")
+    ydl_opts = {
+        'quiet': True,
+        'no_warnings': True,
+        'extract_flat': False,
+        'skip_download': True,
+    }
+    if cookies_file_path and os.path.exists(cookies_file_path):
+        ydl_opts['cookiefile'] = cookies_file_path
+    with YoutubeDL(ydl_opts) as ydl:
+        try:
+            info = ydl.extract_info(url, download=False)
+            return {
+                'title': info.get('title', 'Unknown'),
+                'duration': info.get('duration', 0),
+                'availability': info.get('availability', 'unknown'),
+                'live_status': info.get('live_status', 'unknown'),
+            }
+        except Exception as e:
+            return {'error': str(e)}
 def download_audio(url, cookies_file_path=None):
+    """Download audio from YouTube URL with enhanced error handling"""
     if not YT_DLP_AVAILABLE:
         raise Exception("yt-dlp is not available. Please check the installation.")
     try:
+        # First, try to get video info
+        video_info = get_video_info(url, cookies_file_path)
+        if 'error' in video_info:
+            raise Exception(f"Video info error: {video_info['error']}")
+        print(f"Video title: {video_info.get('title', 'Unknown')}")
+        print(f"Video duration: {video_info.get('duration', 0)} seconds")
+        print(f"Video availability: {video_info.get('availability', 'unknown')}")
         # Create a temporary directory for downloads
         temp_dir = tempfile.mkdtemp()
         output_path = os.path.join(temp_dir, "audio")
+        # Enhanced options for better compatibility
         ydl_opts = {
+            'format': 'bestaudio[ext=m4a]/bestaudio[ext=webm]/bestaudio[ext=mp4]/bestaudio/best',
             'outtmpl': output_path + '.%(ext)s',
+            'quiet': False,  # Enable logging for debugging
+            'no_warnings': False,
+            'extractor_retries': 5,
+            'fragment_retries': 5,
+            'retry_sleep_functions': {'http': lambda n: min(2 ** n, 60)},
+            'socket_timeout': 30,
+            'http_chunk_size': 10485760,  # 10MB chunks
+            'writeinfojson': False,
+            'writesubtitles': False,
+            'writeautomaticsub': False,
+            'geo_bypass': True,
+            'geo_bypass_country': 'US',
+            'extract_flat': False,
+            'ignoreerrors': False,
         }
+        # Enhanced cookies and headers handling
         if cookies_file_path and os.path.exists(cookies_file_path):
             ydl_opts['cookiefile'] = cookies_file_path
             print(f"✅ Using cookies file: {cookies_file_path}")
         else:
+            print("⚠️ No cookies file - using enhanced headers")
+        # Always add enhanced headers
+        ydl_opts.update({
+            'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'referer': 'https://www.youtube.com/',
+            'headers': {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
+                'Accept-Language': 'en-US,en;q=0.9',
+                'Accept-Encoding': 'gzip, deflate, br',
+                'DNT': '1',
+                'Connection': 'keep-alive',
+                'Upgrade-Insecure-Requests': '1',
+                'Sec-Fetch-Dest': 'document',
+                'Sec-Fetch-Mode': 'navigate',
+                'Sec-Fetch-Site': 'none',
+                'Sec-Fetch-User': '?1',
+                'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
+                'sec-ch-ua-mobile': '?0',
+                'sec-ch-ua-platform': '"Windows"',
+            }
+        })
+        # Add random delay to avoid rate limiting
+        time.sleep(random.uniform(1, 3))
         with YoutubeDL(ydl_opts) as ydl:
+            print(f"Attempting to download audio from: {url}")
             info_dict = ydl.extract_info(url, download=True)
             # Find the downloaded file
+            for ext in ['.m4a', '.webm', '.mp4', '.mp3', '.aac', '.opus']:
                 potential_file = output_path + ext
                 if os.path.exists(potential_file):
                     print(f"✅ Audio downloaded: {potential_file}")
                     return potential_file
+            # If no file found, list directory contents for debugging
+            print(f"Files in temp directory: {os.listdir(temp_dir)}")
             raise FileNotFoundError("Downloaded audio file not found")
     except Exception as e:
+        error_msg = str(e).lower()
+        # Provide specific error messages and solutions
+        if "video unavailable" in error_msg or "content isn't available" in error_msg:
+            raise Exception(f"""
+❌ Video Access Error: The video is unavailable or restricted.
+Possible reasons:
+• Video is private, unlisted, or deleted
+• Video is geo-blocked in your region
+• Video has age restrictions
+• Video requires sign-in to view
+• Copyright restrictions
+Solutions to try:
+1. Verify the video URL is correct and accessible
+2. Try a different public video
+3. Check if the video works in your browser
+4. If using a playlist URL, try the direct video URL instead
+5. For age-restricted videos, ensure cookies are from a logged-in account
+Original error: {str(e)}
+""")
+        elif "403" in error_msg or "forbidden" in error_msg:
+            raise Exception(f"""
+❌ Access Forbidden (403): YouTube blocked the request.
+Solutions:
+1. **Upload fresh cookies.txt file** (most important)
+2. Get cookies from a logged-in YouTube account
+3. Try again after a few minutes (rate limiting)
+4. Use a different network/VPN if possible
+How to get fresh cookies:
+• Visit YouTube while logged in
+• Use browser extension to export cookies
+• Upload the newest cookies.txt file
+Original error: {str(e)}
+""")
+        elif "429" in error_msg or "rate limit" in error_msg:
+            raise Exception(f"""
+❌ Rate Limited (429): Too many requests.
+Solutions:
+1. Wait 10-15 minutes before trying again
+2. Upload fresh cookies.txt file
+3. Try a different video
+4. Use a different network if possible
+Original error: {str(e)}
+""")
         else:
             raise Exception(f"Failed to download audio: {str(e)}")
     try:
         if WHISPER_TYPE == "openai-whisper":
+            # Use OpenAI Whisper with more robust settings
+            model = whisper.load_model("base")  # Use base model for better accuracy
+            result = model.transcribe(
+                file_path,
+                language="en",  # Specify English for better performance
+                task="transcribe",
+                verbose=False,
+                fp16=False,  # Better compatibility
+                temperature=0.0,  # More deterministic
+            )
             return result["text"]
         elif WHISPER_TYPE == "transformers":
             # Use Transformers Whisper
             from transformers import pipeline
+            transcriber = pipeline(
+                "automatic-speech-recognition",
+                model="openai/whisper-base",
+                device=-1  # Use CPU for better compatibility
+            )
+            result = transcriber(file_path, return_timestamps=False)
             return result["text"]
         else:
     except Exception as e:
         raise Exception(f"Failed to transcribe audio: {str(e)}")
+def extract_stock_info_enhanced(text):
+    """Enhanced stock information extraction with better patterns"""
     try:
         stock_info = []
+        # Enhanced patterns for stock information
+        stock_patterns = {
+            'symbols': r'\b[A-Z]{2,5}\b(?=\s+(?:stock|shares|ticker|symbol|price|target|buy|sell))',
+            'prices': r'\$\d+(?:\.\d{1,2})?(?:\s*(?:per share|each|target|price))?',
+            'percentages': r'\d+(?:\.\d{1,2})?%',
+            'actions': r'\b(?:buy|sell|hold|long|short|bullish|bearish|target|stop loss|take profit|accumulate|distribute)\b',
+            'companies': r'\b[A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+){0,2}(?:\s+(?:Inc|Corp|Company|Ltd|LLC)\.?)?',
+            'market_terms': r'\b(?:earnings|revenue|profit|loss|growth|dividend|yield|PE ratio|market cap|volume)\b',
+        }
+        # Extract information
+        symbols = re.findall(stock_patterns['symbols'], text, re.IGNORECASE)
+        prices = re.findall(stock_patterns['prices'], text)
+        percentages = re.findall(stock_patterns['percentages'], text)
+        actions = re.findall(stock_patterns['actions'], text, re.IGNORECASE)
+        companies = re.findall(stock_patterns['companies'], text)
+        market_terms = re.findall(stock_patterns['market_terms'], text, re.IGNORECASE)
+        # Format results
+        result = "=== 📊 EXTRACTED STOCK INFORMATION ===\n\n"
         if symbols:
+            result += f"🔤 **Stock Symbols Found**: {', '.join(set(symbols[:10]))}\n\n"
+        if companies:
+            filtered_companies = [c for c in set(companies) if len(c) > 3 and c.upper() not in ['THE', 'AND', 'FOR', 'WITH']]
+            if filtered_companies:
+                result += f"🏢 **Companies Mentioned**: {', '.join(filtered_companies[:10])}\n\n"
         if prices:
+            result += f"💰 **Price Mentions**: {', '.join(set(prices[:10]))}\n\n"
+        if percentages:
+            result += f"📈 **Percentage Mentions**: {', '.join(set(percentages[:10]))}\n\n"
         if actions:
+            result += f"🎯 **Trading Actions**: {', '.join(set(actions[:10]))}\n\n"
+        if market_terms:
+            result += f"📊 **Market Terms**: {', '.join(set(market_terms[:10]))}\n\n"
+        # Look for recommendation sentences
+        sentences = [s.strip() for s in text.split('.') if s.strip()]
         recommendations = []
         for sentence in sentences:
+            sentence_lower = sentence.lower()
+            if any(action in sentence_lower for action in ['buy', 'sell', 'target', 'recommend', 'suggest']):
+                if any(symbol in sentence for symbol in symbols[:5]) or any(price in sentence for price in prices[:3]):
+                    recommendations.append(sentence)
         if recommendations:
+            result += "🎯 **Potential Recommendations**:\n"
+            for i, rec in enumerate(recommendations[:5], 1):
+                result += f"{i}. {rec}\n"
+            result += "\n"
+        # Add summary
+        if not any([symbols, prices, actions, recommendations]):
+            result += "⚠️ **No clear stock recommendations found**\n\n"
+            result += "**Possible reasons:**\n"
+            result += "• Video doesn't contain stock/financial content\n"
+            result += "• Audio quality was poor for transcription\n"
+            result += "• Content is not in English\n"
+            result += "• General market discussion without specific recommendations\n"
+        else:
+            result += "✅ **Analysis Complete** - Please verify all information independently!\n"
+        result += "\n" + "="*50 + "\n"
+        result += "⚠️ **DISCLAIMER**: This is automated extraction for educational purposes only.\n"
+        result += "Always conduct your own research before making investment decisions!\n"
+        result += "="*50
         return result
     except Exception as e:
+        return f"❌ Error extracting stock info: {str(e)}"
 def cleanup_file(file_path):
     """Clean up temporary files"""
         # Create a temporary file for cookies
         temp_cookies_path = tempfile.mktemp(suffix='.txt')
+        # Copy the uploaded file
         shutil.copy2(cookies_file, temp_cookies_path)
+        # Validate cookies file
+        with open(temp_cookies_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+            if 'youtube.com' not in content.lower():
+                print("⚠️ Warning: cookies file might not contain YouTube cookies")
+        print(f"✅ Cookies file processed: {temp_cookies_path}")
         return temp_cookies_path
     except Exception as e:
         print(f"❌ Error processing cookies file: {e}")
         return None
+def validate_youtube_url(url):
+    """Validate YouTube URL format"""
+    if not url or not url.strip():
+        return False, "Please provide a YouTube URL"
+    url = url.strip()
+    youtube_patterns = [
+        r'(?:https?://)?(?:www\.)?youtube\.com/watch\?v=[\w-]+',
+        r'(?:https?://)?(?:www\.)?youtu\.be/[\w-]+',
+        r'(?:https?://)?(?:www\.)?youtube\.com/embed/[\w-]+',
+        r'(?:https?://)?(?:m\.)?youtube\.com/watch\?v=[\w-]+',
+    ]
+    for pattern in youtube_patterns:
+        if re.match(pattern, url):
+            return True, "Valid YouTube URL"
+    return False, "Invalid YouTube URL format"
 def process_video(url, cookies_file, progress=gr.Progress()):
     """Main function to process YouTube video"""
     # Check if required packages are available
     if not YT_DLP_AVAILABLE:
+        return "❌ Error: yt-dlp is not installed. Please install it using: pip install yt-dlp", "", "❌ Missing yt-dlp"
     if not WHISPER_AVAILABLE:
+        return "❌ Error: OpenAI Whisper is not installed. Please install it using: pip install openai-whisper", "", "❌ Missing Whisper"
+    # Validate URL
+    is_valid, validation_msg = validate_youtube_url(url)
+    if not is_valid:
+        return f"❌ Error: {validation_msg}", "", "❌ Invalid URL"
     audio_path = None
     cookies_temp_path = None
     try:
+        progress(0.05, desc="🔍 Validating URL...")
         # Process cookies file if provided
+        progress(0.1, desc="🍪 Processing cookies...")
         cookies_temp_path = process_cookies_file(cookies_file)
+        status_msg = "✅ Cookies loaded" if cookies_temp_path else "⚠️ No cookies (may encounter restrictions)"
         # Download audio
+        progress(0.2, desc="📥 Downloading audio...")
         audio_path = download_audio(url, cookies_temp_path)
         # Transcribe audio
+        progress(0.6, desc="🎙️ Transcribing audio...")
         transcript = transcribe_audio(audio_path)
         if not transcript.strip():
+            return "❌ No speech detected in the video", "", "❌ No speech detected"
         # Extract stock information
+        progress(0.9, desc="📊 Analyzing content...")
+        stock_details = extract_stock_info_enhanced(transcript)
+        progress(1.0, desc="✅ Complete!")
         return transcript, stock_details, "✅ Processing completed successfully"
     except Exception as e:
+        error_msg = str(e)
+        return error_msg, "", f"❌ Error occurred"
     finally:
         # Clean up temporary files
         cleanup_file(audio_path)
         cleanup_file(cookies_temp_path)
+# Create Gradio interface optimized for Gradio Cloud
 with gr.Blocks(
+    title="📈 YouTube Stock Extractor",
     theme=gr.themes.Soft(),
     css="""
     .gradio-container {
+        max-width: 1200px;
         margin: auto;
+        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
     }
     .status-box {
+        padding: 12px;
+        border-radius: 8px;
         margin: 10px 0;
+        border: 1px solid #ddd;
+    }
+    .warning-box {
+        background-color: #fff3cd;
+        border-color: #ffeaa7;
+        color: #856404;
+    }
+    .success-box {
+        background-color: #d4edda;
+        border-color: #c3e6cb;
+        color: #155724;
+    }
+    .error-box {
+        background-color: #f8d7da;
+        border-color: #f5c6cb;
+        color: #721c24;
     }
     """
 ) as demo:
     gr.Markdown("""
+    # 📈 YouTube Stock Recommendation Extractor
+    **Extract stock analysis and trading recommendations from YouTube videos using AI**
+    🔧 **How it works:**
+    1. **Upload cookies.txt** (essential for avoiding restrictions)
+    2. **Paste YouTube URL** of financial content
+    3. **AI downloads** audio and transcribes using Whisper
+    4. **Extracts** stock symbols, prices, and recommendations
+    ⚠️ **Important:** This tool is for educational purposes only. Always do your own research before investing!
     """)
     with gr.Row():
         with gr.Column(scale=1):
+            # System check section
+            with gr.Group():
+                gr.Markdown("### 🔍 System Status")
+                check_req_btn = gr.Button(
+                    "Check System Requirements",
+                    variant="secondary",
+                    size="sm"
+                )
+                requirements_output = gr.Textbox(
+                    label="📋 System Requirements Status",
+                    lines=8,
+                    max_lines=15,
+                    interactive=False,
+                    visible=False
+                )
+            # Input section
+            with gr.Group():
+                gr.Markdown("### 📥 Input")
+                # Cookies upload with better instructions
+                cookies_input = gr.File(
+                    label="🍪 Upload Cookies File (cookies.txt) - REQUIRED",
+                    file_types=[".txt"],
+                    file_count="single"
+                )
+                with gr.Accordion("📋 How to Get Cookies (Click to expand)", open=False):
+                    gr.Markdown("""
+                    **Why cookies are needed:** YouTube blocks most automated requests without proper authentication.
+                    **Step-by-step instructions:**
+                    1. **Install browser extension:**
+                       - Chrome: "Get cookies.txt LOCALLY" or "cookies.txt"
+                       - Firefox: "cookies.txt" or "Export Cookies"
+                    2. **Get cookies:**
+                       - Visit YouTube.com (log in if needed)
+                       - Click the extension icon
+                       - Select "Export for youtube.com"
+                       - Download the cookies.txt file
+                    3. **Upload here:** Use the file upload above
+                    **⚠️ Without cookies, you'll get "403 Forbidden" or "Video unavailable" errors**
+                    """)
+                url_input = gr.Textbox(
+                    label="📺 YouTube Video URL",
+                    placeholder="https://www.youtube.com/watch?v=VIDEO_ID",
+                    lines=2,
+                    info="Paste the full YouTube video URL here"
+                )
+                process_btn = gr.Button(
+                    "🚀 Extract Stock Information",
+                    variant="primary",
+                    size="lg"
+                )
+                # Status display
+                status_output = gr.Textbox(
+                    label="📊 Status",
+                    lines=1,
+                    interactive=False,
+                    info="Current processing status"
+                )
+    # Output section
     with gr.Row():
         with gr.Column():
             transcript_output = gr.Textbox(
                 label="📝 Full Transcript",
+                lines=20,
+                max_lines=25,
+                show_copy_button=True,
+                info="Complete transcription of the video audio"
             )
         with gr.Column():
             stock_info_output = gr.Textbox(
                 label="📊 Extracted Stock Information",
+                lines=20,
+                max_lines=25,
+                show_copy_button=True,
+                info="Parsed stock symbols, prices, and recommendations"
+            )
+    # Example and troubleshooting section
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 📋 Example URLs")
+            gr.Examples(
+                examples=[
+                    ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"],
+                    ["https://youtu.be/dQw4w9WgXcQ"],
+                ],
+                inputs=[url_input],
+                label="Click to try example URLs (replace with actual financial videos)"
             )
+    # Troubleshooting section
+    with gr.Accordion("🔧 Troubleshooting Guide", open=False):
+        gr.Markdown("""
+        ### Common Issues and Solutions:
+        **❌ "Video unavailable" or "Content isn't available":**
+        - Video might be private, deleted, or geo-blocked
+        - Try a different public financial video
+        - Verify the URL works in your browser
+        - Check if video requires age verification
+        **❌ "403 Forbidden" error:**
+        - **Upload fresh cookies.txt file** (most common fix)
+        - Make sure cookies are from a logged-in YouTube account
+        - Try waiting 10-15 minutes (rate limiting)
+        **❌ "No speech detected":**
+        - Video might not have clear audio
+        - Try videos with clear narration
+        - Check if video is in English
+        **❌ "No stock information found":**
+        - Video might not contain financial content
+        - Try videos from financial YouTube channels
+        - Look for videos with stock analysis or recommendations
+        ### Installation Commands:
+        ```bash
+        # Install all requirements
+        pip install gradio yt-dlp openai-whisper torch torchaudio
+        # Alternative whisper installation
+        pip install transformers torch torchaudio
+        ```
+        ### Best Practices:
+        - Use videos from reputable financial channels
+        - Prefer videos under 20 minutes for faster processing
+        - Ensure clear audio quality
+        - Always verify extracted information independently
+        """)
     # Event handlers
     def show_requirements():
         status = check_requirements()
         show_progress=True
     )
+    # Footer
     gr.Markdown("""
+    ---
+    **📢 Disclaimer:** This tool is for educational and research purposes only.
+    The extracted information should not be considered as financial advice.
+    Always conduct your own research and consult with financial professionals before making investment decisions.
     """)
+# Launch configuration for Gradio Cloud
 if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        debug=False,
+        show_error=True,
+        quiet=False
+    )