Spaces:

developer28
/

Youtubedownloader

Sleeping

App Files Files Community

developer28 commited on 12 days ago

Commit

9518e76

verified ·

1 Parent(s): 3644986

Update app.py

Browse files

Files changed (1) hide show

app.py +219 -700

app.py CHANGED Viewed

@@ -3,46 +3,6 @@ import tempfile
 import gradio as gr
 import re
 import sys
-import shutil
-import importlib.util
-import time
-import random
-def check_requirements():
-    """Check if all required packages are installed and return status"""
-    requirements_status = []
-    packages = [
-        ('gradio', 'gradio'),
-        ('yt-dlp', 'yt_dlp'),
-        ('openai-whisper', 'whisper'),
-        ('torch', 'torch'),
-        ('torchaudio', 'torchaudio'),
-        ('numpy', 'numpy'),
-        ('regex', 'regex'),
-    ]
-    for package_name, import_name in packages:
-        try:
-            spec = importlib.util.find_spec(import_name)
-            if spec is None:
-                requirements_status.append(f"❌ {package_name}: Not found")
-                continue
-            module = importlib.import_module(import_name)
-            version = getattr(module, '__version__', 'Unknown version')
-            requirements_status.append(f"✅ {package_name}: {version}")
-        except ImportError as e:
-            requirements_status.append(f"❌ {package_name}: Import error - {str(e)}")
-        except Exception as e:
-            requirements_status.append(f"⚠️  {package_name}: Found but error - {str(e)}")
-    # Add Python info
-    requirements_status.append(f"\n🐍 Python: {sys.version}")
-    requirements_status.append(f"📁 Python executable: {sys.executable}")
-    return "\n".join(requirements_status)
 # Try to import required packages with error handling
 try:
@@ -52,317 +12,175 @@ except ImportError as e:
     YT_DLP_AVAILABLE = False
     print(f"yt-dlp import error: {e}")
-# Try multiple whisper import methods
-WHISPER_AVAILABLE = False
-WHISPER_TYPE = None
 try:
     import whisper
     WHISPER_AVAILABLE = True
-    WHISPER_TYPE = "openai-whisper"
-    print("Using OpenAI Whisper")
 except ImportError as e:
-    print(f"OpenAI Whisper import error: {e}")
-    try:
-        from transformers import pipeline
-        WHISPER_AVAILABLE = True
-        WHISPER_TYPE = "transformers"
-        print("Using Transformers Whisper")
-    except ImportError as e2:
-        print(f"Transformers Whisper import error: {e2}")
 print(f"Python version: {sys.version}")
-print(f"Python executable: {sys.executable}")
 print(f"yt-dlp available: {YT_DLP_AVAILABLE}")
-print(f"whisper available: {WHISPER_AVAILABLE} (type: {WHISPER_TYPE})")
-def get_video_info(url, cookies_file_path=None):
-    """Get video information without downloading"""
-    if not YT_DLP_AVAILABLE:
-        raise Exception("yt-dlp is not available.")
-    ydl_opts = {
-        'quiet': True,
-        'no_warnings': True,
-        'extract_flat': False,
-        'skip_download': True,
-    }
-    if cookies_file_path and os.path.exists(cookies_file_path):
-        ydl_opts['cookiefile'] = cookies_file_path
-    with YoutubeDL(ydl_opts) as ydl:
-        try:
-            info = ydl.extract_info(url, download=False)
-            return {
-                'title': info.get('title', 'Unknown'),
-                'duration': info.get('duration', 0),
-                'availability': info.get('availability', 'unknown'),
-                'live_status': info.get('live_status', 'unknown'),
-            }
-        except Exception as e:
-            return {'error': str(e)}
-def download_audio(url, cookies_file_path=None):
-    """Download audio from YouTube URL with enhanced error handling"""
     if not YT_DLP_AVAILABLE:
         raise Exception("yt-dlp is not available. Please check the installation.")
     try:
-        # First, try to get video info
-        video_info = get_video_info(url, cookies_file_path)
-        if 'error' in video_info:
-            raise Exception(f"Video info error: {video_info['error']}")
-        print(f"Video title: {video_info.get('title', 'Unknown')}")
-        print(f"Video duration: {video_info.get('duration', 0)} seconds")
-        print(f"Video availability: {video_info.get('availability', 'unknown')}")
         # Create a temporary directory for downloads
         temp_dir = tempfile.mkdtemp()
         output_path = os.path.join(temp_dir, "audio")
-        # Enhanced options for better compatibility
         ydl_opts = {
-            'format': 'bestaudio[ext=m4a]/bestaudio[ext=webm]/bestaudio[ext=mp4]/bestaudio/best',
             'outtmpl': output_path + '.%(ext)s',
-            'quiet': False,  # Enable logging for debugging
-            'no_warnings': False,
-            'extractor_retries': 5,
-            'fragment_retries': 5,
-            'retry_sleep_functions': {'http': lambda n: min(2 ** n, 60)},
-            'socket_timeout': 30,
-            'http_chunk_size': 10485760,  # 10MB chunks
-            'writeinfojson': False,
-            'writesubtitles': False,
-            'writeautomaticsub': False,
-            'geo_bypass': True,
-            'geo_bypass_country': 'US',
             'extract_flat': False,
             'ignoreerrors': False,
         }
-        # Enhanced cookies and headers handling
-        if cookies_file_path and os.path.exists(cookies_file_path):
-            ydl_opts['cookiefile'] = cookies_file_path
-            print(f"✅ Using cookies file: {cookies_file_path}")
         else:
-            print("⚠️ No cookies file - using enhanced headers")
-        # Always add enhanced headers
-        ydl_opts.update({
-            'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-            'referer': 'https://www.youtube.com/',
-            'headers': {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
-                'Accept-Language': 'en-US,en;q=0.9',
-                'Accept-Encoding': 'gzip, deflate, br',
-                'DNT': '1',
-                'Connection': 'keep-alive',
-                'Upgrade-Insecure-Requests': '1',
-                'Sec-Fetch-Dest': 'document',
-                'Sec-Fetch-Mode': 'navigate',
-                'Sec-Fetch-Site': 'none',
-                'Sec-Fetch-User': '?1',
-                'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
-                'sec-ch-ua-mobile': '?0',
-                'sec-ch-ua-platform': '"Windows"',
-            }
-        })
-        # Add random delay to avoid rate limiting
-        time.sleep(random.uniform(1, 3))
         with YoutubeDL(ydl_opts) as ydl:
-            print(f"Attempting to download audio from: {url}")
-            info_dict = ydl.extract_info(url, download=True)
             # Find the downloaded file
-            for ext in ['.m4a', '.webm', '.mp4', '.mp3', '.aac', '.opus']:
                 potential_file = output_path + ext
                 if os.path.exists(potential_file):
-                    print(f"✅ Audio downloaded: {potential_file}")
                     return potential_file
-            # If no file found, list directory contents for debugging
-            print(f"Files in temp directory: {os.listdir(temp_dir)}")
-            raise FileNotFoundError("Downloaded audio file not found")
     except Exception as e:
-        error_msg = str(e).lower()
-        # Provide specific error messages and solutions
-        if "video unavailable" in error_msg or "content isn't available" in error_msg:
-            raise Exception(f"""
-❌ Video Access Error: The video is unavailable or restricted.
-Possible reasons:
-• Video is private, unlisted, or deleted
-• Video is geo-blocked in your region
-• Video has age restrictions
-• Video requires sign-in to view
-• Copyright restrictions
-Solutions to try:
-1. Verify the video URL is correct and accessible
-2. Try a different public video
-3. Check if the video works in your browser
-4. If using a playlist URL, try the direct video URL instead
-5. For age-restricted videos, ensure cookies are from a logged-in account
-Original error: {str(e)}
-""")
-        elif "403" in error_msg or "forbidden" in error_msg:
-            raise Exception(f"""
-❌ Access Forbidden (403): YouTube blocked the request.
-Solutions:
-1. **Upload fresh cookies.txt file** (most important)
-2. Get cookies from a logged-in YouTube account
-3. Try again after a few minutes (rate limiting)
-4. Use a different network/VPN if possible
-How to get fresh cookies:
-• Visit YouTube while logged in
-• Use browser extension to export cookies
-• Upload the newest cookies.txt file
-Original error: {str(e)}
-""")
-        elif "429" in error_msg or "rate limit" in error_msg:
-            raise Exception(f"""
-❌ Rate Limited (429): Too many requests.
-Solutions:
-1. Wait 10-15 minutes before trying again
-2. Upload fresh cookies.txt file
-3. Try a different video
-4. Use a different network if possible
-Original error: {str(e)}
-""")
         else:
-            raise Exception(f"Failed to download audio: {str(e)}")
 def transcribe_audio(file_path):
     """Transcribe audio file using Whisper"""
     if not WHISPER_AVAILABLE:
-        raise Exception("OpenAI Whisper is not available. Please install it using: pip install openai-whisper")
     try:
-        if WHISPER_TYPE == "openai-whisper":
-            # Use OpenAI Whisper with more robust settings
-            model = whisper.load_model("base")  # Use base model for better accuracy
-            result = model.transcribe(
-                file_path,
-                language="en",  # Specify English for better performance
-                task="transcribe",
-                verbose=False,
-                fp16=False,  # Better compatibility
-                temperature=0.0,  # More deterministic
-            )
-            return result["text"]
-        elif WHISPER_TYPE == "transformers":
-            # Use Transformers Whisper
-            from transformers import pipeline
-            transcriber = pipeline(
-                "automatic-speech-recognition",
-                model="openai/whisper-base",
-                device=-1  # Use CPU for better compatibility
-            )
-            result = transcriber(file_path, return_timestamps=False)
-            return result["text"]
-        else:
-            raise Exception("No compatible Whisper installation found")
     except Exception as e:
         raise Exception(f"Failed to transcribe audio: {str(e)}")
-def extract_stock_info_enhanced(text):
-    """Enhanced stock information extraction with better patterns"""
     try:
         stock_info = []
-        # Enhanced patterns for stock information
-        stock_patterns = {
-            'symbols': r'\b[A-Z]{2,5}\b(?=\s+(?:stock|shares|ticker|symbol|price|target|buy|sell))',
-            'prices': r'\$\d+(?:\.\d{1,2})?(?:\s*(?:per share|each|target|price))?',
-            'percentages': r'\d+(?:\.\d{1,2})?%',
-            'actions': r'\b(?:buy|sell|hold|long|short|bullish|bearish|target|stop loss|take profit|accumulate|distribute)\b',
-            'companies': r'\b[A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+){0,2}(?:\s+(?:Inc|Corp|Company|Ltd|LLC)\.?)?',
-            'market_terms': r'\b(?:earnings|revenue|profit|loss|growth|dividend|yield|PE ratio|market cap|volume)\b',
-        }
-        # Extract information
-        symbols = re.findall(stock_patterns['symbols'], text, re.IGNORECASE)
-        prices = re.findall(stock_patterns['prices'], text)
-        percentages = re.findall(stock_patterns['percentages'], text)
-        actions = re.findall(stock_patterns['actions'], text, re.IGNORECASE)
-        companies = re.findall(stock_patterns['companies'], text)
-        market_terms = re.findall(stock_patterns['market_terms'], text, re.IGNORECASE)
-        # Format results
-        result = "=== 📊 EXTRACTED STOCK INFORMATION ===\n\n"
-        if symbols:
-            result += f"🔤 **Stock Symbols Found**: {', '.join(set(symbols[:10]))}\n\n"
         if companies:
-            filtered_companies = [c for c in set(companies) if len(c) > 3 and c.upper() not in ['THE', 'AND', 'FOR', 'WITH']]
-            if filtered_companies:
-                result += f"🏢 **Companies Mentioned**: {', '.join(filtered_companies[:10])}\n\n"
-        if prices:
-            result += f"💰 **Price Mentions**: {', '.join(set(prices[:10]))}\n\n"
-        if percentages:
-            result += f"📈 **Percentage Mentions**: {', '.join(set(percentages[:10]))}\n\n"
         if actions:
-            result += f"🎯 **Trading Actions**: {', '.join(set(actions[:10]))}\n\n"
-        if market_terms:
-            result += f"📊 **Market Terms**: {', '.join(set(market_terms[:10]))}\n\n"
-        # Look for recommendation sentences
-        sentences = [s.strip() for s in text.split('.') if s.strip()]
         recommendations = []
         for sentence in sentences:
-            sentence_lower = sentence.lower()
-            if any(action in sentence_lower for action in ['buy', 'sell', 'target', 'recommend', 'suggest']):
-                if any(symbol in sentence for symbol in symbols[:5]) or any(price in sentence for price in prices[:3]):
-                    recommendations.append(sentence)
         if recommendations:
-            result += "🎯 **Potential Recommendations**:\n"
-            for i, rec in enumerate(recommendations[:5], 1):
-                result += f"{i}. {rec}\n"
-            result += "\n"
-        # Add summary
-        if not any([symbols, prices, actions, recommendations]):
-            result += "⚠️ **No clear stock recommendations found**\n\n"
-            result += "**Possible reasons:**\n"
-            result += "• Video doesn't contain stock/financial content\n"
-            result += "• Audio quality was poor for transcription\n"
-            result += "• Content is not in English\n"
-            result += "• General market discussion without specific recommendations\n"
-        else:
-            result += "✅ **Analysis Complete** - Please verify all information independently!\n"
-        result += "\n" + "="*50 + "\n"
-        result += "⚠️ **DISCLAIMER**: This is automated extraction for educational purposes only.\n"
-        result += "Always conduct your own research before making investment decisions!\n"
-        result += "="*50
         return result
     except Exception as e:
-        return f"❌ Error extracting stock info: {str(e)}"
 def cleanup_file(file_path):
     """Clean up temporary files"""
@@ -377,487 +195,188 @@ def cleanup_file(file_path):
     except:
         pass
-def process_cookies_file(cookies_file):
-    """Process uploaded cookies file and return the path"""
-    if cookies_file is None:
-        return None
     try:
-        # Create a temporary file for cookies
-        temp_cookies_path = tempfile.mktemp(suffix='.txt')
-        # Copy the uploaded file
-        shutil.copy2(cookies_file, temp_cookies_path)
-        # Validate cookies file
-        with open(temp_cookies_path, 'r', encoding='utf-8') as f:
-            content = f.read()
-            if 'youtube.com' not in content.lower():
-                print("⚠️ Warning: cookies file might not contain YouTube cookies")
-        print(f"✅ Cookies file processed: {temp_cookies_path}")
-        return temp_cookies_path
     except Exception as e:
-        print(f"❌ Error processing cookies file: {e}")
-        return None
-def validate_youtube_url(url):
-    """Validate YouTube URL format"""
-    if not url or not url.strip():
-        return False, "Please provide a YouTube URL"
-    url = url.strip()
-    youtube_patterns = [
-        r'(?:https?://)?(?:www\.)?youtube\.com/watch\?v=[\w-]+',
-        r'(?:https?://)?(?:www\.)?youtu\.be/[\w-]+',
-        r'(?:https?://)?(?:www\.)?youtube\.com/embed/[\w-]+',
-        r'(?:https?://)?(?:m\.)?youtube\.com/watch\?v=[\w-]+',
-    ]
-    for pattern in youtube_patterns:
-        if re.match(pattern, url):
-            return True, "Valid YouTube URL"
-    return False, "Invalid YouTube URL format"
-def process_video(url, cookies_file, progress=gr.Progress()):
-    """Main function to process YouTube video with detailed debugging"""
-    # Detailed debugging info
-    debug_info = []
-    debug_info.append(f"🔍 Starting process at {time.strftime('%H:%M:%S')}")
-    debug_info.append(f"📡 Python version: {sys.version.split()[0]}")
-    debug_info.append(f"📦 yt-dlp available: {YT_DLP_AVAILABLE}")
-    debug_info.append(f"🎙️ Whisper available: {WHISPER_AVAILABLE} (type: {WHISPER_TYPE})")
     # Check if required packages are available
     if not YT_DLP_AVAILABLE:
-        error_msg = "❌ ERROR: yt-dlp is not installed properly.\n\n"
-        error_msg += "SOLUTION: Install yt-dlp using:\n"
-        error_msg += "pip install yt-dlp\n\n"
-        error_msg += "DEBUG INFO:\n" + "\n".join(debug_info)
-        return error_msg, "", "❌ Missing yt-dlp"
     if not WHISPER_AVAILABLE:
-        error_msg = "❌ ERROR: OpenAI Whisper is not installed properly.\n\n"
-        error_msg += "SOLUTION: Install Whisper using:\n"
-        error_msg += "pip install openai-whisper\n"
-        error_msg += "OR\n"
-        error_msg += "pip install transformers torch torchaudio\n\n"
-        error_msg += "DEBUG INFO:\n" + "\n".join(debug_info)
-        return error_msg, "", "❌ Missing Whisper"
-    # Validate URL
-    is_valid, validation_msg = validate_youtube_url(url)
-    if not is_valid:
-        error_msg = f"❌ ERROR: {validation_msg}\n\n"
-        error_msg += f"PROVIDED URL: {url}\n\n"
-        error_msg += "VALID URL FORMATS:\n"
-        error_msg += "�� https://www.youtube.com/watch?v=VIDEO_ID\n"
-        error_msg += "• https://youtu.be/VIDEO_ID\n"
-        error_msg += "• https://www.youtube.com/embed/VIDEO_ID\n\n"
-        error_msg += "DEBUG INFO:\n" + "\n".join(debug_info)
-        return error_msg, "", "❌ Invalid URL"
     audio_path = None
-    cookies_temp_path = None
     try:
-        progress(0.05, desc="🔍 Validating URL...")
-        debug_info.append(f"✅ URL validation passed: {url}")
-        # Process cookies file if provided
-        progress(0.1, desc="🍪 Processing cookies...")
-        cookies_temp_path = process_cookies_file(cookies_file)
-        if cookies_temp_path:
-            debug_info.append(f"✅ Cookies processed: {cookies_temp_path}")
-        else:
-            debug_info.append("⚠️ No cookies provided - this may cause access errors")
-        status_msg = "✅ Cookies loaded" if cookies_temp_path else "⚠️ No cookies (may encounter restrictions)"
-        # First, try to get video info for debugging
-        progress(0.15, desc="🔍 Checking video accessibility...")
-        try:
-            video_info = get_video_info(url, cookies_temp_path)
-            if 'error' in video_info:
-                debug_info.append(f"❌ Video info error: {video_info['error']}")
-                raise Exception(f"Video accessibility check failed: {video_info['error']}")
-            else:
-                debug_info.append(f"✅ Video info: {video_info}")
-        except Exception as e:
-            debug_info.append(f"❌ Video info check failed: {str(e)}")
-            # Continue anyway, but log the issue
         # Download audio
-        progress(0.2, desc="📥 Downloading audio...")
-        debug_info.append("🔄 Starting audio download...")
-        audio_path = download_audio(url, cookies_temp_path)
-        debug_info.append(f"✅ Audio downloaded: {audio_path}")
-        # Check if audio file exists and get size
-        if audio_path and os.path.exists(audio_path):
-            file_size = os.path.getsize(audio_path)
-            debug_info.append(f"📊 Audio file size: {file_size/1024/1024:.2f} MB")
-        else:
-            raise Exception("Audio file not found after download")
         # Transcribe audio
-        progress(0.6, desc="🎙️ Transcribing audio...")
-        debug_info.append("🔄 Starting transcription...")
         transcript = transcribe_audio(audio_path)
-        debug_info.append(f"✅ Transcription completed: {len(transcript)} characters")
         if not transcript.strip():
-            error_msg = "❌ ERROR: No speech detected in the video\n\n"
-            error_msg += "POSSIBLE CAUSES:\n"
-            error_msg += "• Video has no audio track\n"
-            error_msg += "• Audio is too quiet or unclear\n"
-            error_msg += "• Video is not in English\n"
-            error_msg += "• Audio file is corrupted\n\n"
-            error_msg += "DEBUG INFO:\n" + "\n".join(debug_info)
-            return error_msg, "", "❌ No speech detected"
         # Extract stock information
-        progress(0.9, desc="📊 Analyzing content...")
-        debug_info.append("🔄 Starting stock analysis...")
-        stock_details = extract_stock_info_enhanced(transcript)
-        debug_info.append("✅ Stock analysis completed")
-        progress(1.0, desc="✅ Complete!")
-        # Add debug info to transcript
-        debug_section = "\n\n" + "="*50 + "\n"
-        debug_section += "🔍 DEBUG INFORMATION\n"
-        debug_section += "="*50 + "\n"
-        debug_section += "\n".join(debug_info)
-        return transcript + debug_section, stock_details, "✅ Processing completed successfully"
     except Exception as e:
-        error_msg = f"❌ DETAILED ERROR INFORMATION:\n\n"
-        error_msg += f"ERROR MESSAGE: {str(e)}\n\n"
-        error_msg += f"ERROR TYPE: {type(e).__name__}\n\n"
-        # Add context based on where the error occurred
-        if "download" in str(e).lower():
-            error_msg += "🔧 DOWNLOAD TROUBLESHOOTING:\n"
-            error_msg += "• Check if video URL is accessible in browser\n"
-            error_msg += "• Upload fresh cookies.txt file\n"
-            error_msg += "• Try a different video\n"
-            error_msg += "• Wait 10-15 minutes if rate limited\n\n"
-        elif "transcribe" in str(e).lower():
-            error_msg += "🔧 TRANSCRIPTION TROUBLESHOOTING:\n"
-            error_msg += "• Check if audio file was downloaded properly\n"
-            error_msg += "• Ensure video has clear audio\n"
-            error_msg += "• Try a shorter video\n\n"
-        error_msg += "📊 PROCESSING STEPS COMPLETED:\n"
-        error_msg += "\n".join(debug_info)
-        return error_msg, "", f"❌ Error: {type(e).__name__}"
     finally:
         # Clean up temporary files
-        if audio_path:
-            debug_info.append(f"🗑️ Cleaning up: {audio_path}")
-            cleanup_file(audio_path)
-        if cookies_temp_path:
-            debug_info.append(f"🗑️ Cleaning up: {cookies_temp_path}")
-            cleanup_file(cookies_temp_path)
-# Create Gradio interface optimized for Gradio Cloud
 with gr.Blocks(
-    title="📈 YouTube Stock Extractor",
     theme=gr.themes.Soft(),
     css="""
     .gradio-container {
         max-width: 1200px;
         margin: auto;
-        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-    }
-    .status-box {
-        padding: 12px;
-        border-radius: 8px;
-        margin: 10px 0;
-        border: 1px solid #ddd;
-    }
-    .warning-box {
-        background-color: #fff3cd;
-        border-color: #ffeaa7;
-        color: #856404;
-    }
-    .success-box {
-        background-color: #d4edda;
-        border-color: #c3e6cb;
-        color: #155724;
-    }
-    .error-box {
-        background-color: #f8d7da;
-        border-color: #f5c6cb;
-        color: #721c24;
     }
     """
 ) as demo:
     gr.Markdown("""
-    # 📈 YouTube Stock Recommendation Extractor
-    **Extract stock analysis and trading recommendations from YouTube videos using AI**
-    🔧 **How it works:**
-    1. **Upload cookies.txt** (essential for avoiding restrictions)
-    2. **Paste YouTube URL** of financial content
-    3. **AI downloads** audio and transcribes using Whisper
-    4. **Extracts** stock symbols, prices, and recommendations
-    ⚠️ **Important:** This tool is for educational purposes only. Always do your own research before investing!
     """)
     with gr.Row():
         with gr.Column(scale=1):
-            # System check section
-            with gr.Group():
-                gr.Markdown("### 🔍 System Status")
-                check_req_btn = gr.Button(
-                    "Check System Requirements",
-                    variant="secondary",
-                    size="sm"
-                )
-                requirements_output = gr.Textbox(
-                    label="📋 System Requirements Status",
-                    lines=8,
-                    max_lines=15,
-                    interactive=False,
-                    visible=False
-                )
-            # Input section
-            with gr.Group():
-                gr.Markdown("### 📥 Input")
-                # Add a test button first
-                test_btn = gr.Button(
-                    "🧪 Test System (Click First!)",
-                    variant="secondary",
-                    size="sm"
-                )
-                test_output = gr.Textbox(
-                    label="🧪 System Test Results",
-                    lines=5,
-                    visible=False,
-                    interactive=False
-                )
-                # Cookies upload with better instructions
-                cookies_input = gr.File(
-                    label="🍪 Upload Cookies File (cookies.txt) - HIGHLY RECOMMENDED",
-                    file_types=[".txt"],
-                    file_count="single"
-                )
-                with gr.Accordion("📋 How to Get Cookies (Click to expand)", open=False):
-                    gr.Markdown("""
-                    **Why cookies are needed:** YouTube blocks most automated requests without proper authentication.
-                    **Step-by-step instructions:**
-                    1. **Install browser extension:**
-                       - Chrome: "Get cookies.txt LOCALLY" or "cookies.txt"
-                       - Firefox: "cookies.txt" or "Export Cookies"
-                    2. **Get cookies:**
-                       - Visit YouTube.com (log in if needed)
-                       - Click the extension icon
-                       - Select "Export for youtube.com"
-                       - Download the cookies.txt file
-                    3. **Upload here:** Use the file upload above
-                    **⚠️ Without cookies, you'll get "403 Forbidden" or "Video unavailable" errors**
-                    """)
-                url_input = gr.Textbox(
-                    label="📺 YouTube Video URL",
-                    placeholder="https://www.youtube.com/watch?v=VIDEO_ID",
-                    lines=2,
-                    info="Paste the full YouTube video URL here"
-                )
-                process_btn = gr.Button(
-                    "🚀 Extract Stock Information",
-                    variant="primary",
-                    size="lg"
-                )
-                # Status display
-                status_output = gr.Textbox(
-                    label="📊 Status",
-                    lines=3,
-                    interactive=False,
-                    info="Current processing status"
-                )
-    # Output section
     with gr.Row():
         with gr.Column():
             transcript_output = gr.Textbox(
                 label="📝 Full Transcript",
-                lines=20,
-                max_lines=25,
-                show_copy_button=True,
-                info="Complete transcription of the video audio"
             )
         with gr.Column():
             stock_info_output = gr.Textbox(
                 label="📊 Extracted Stock Information",
-                lines=20,
-                max_lines=25,
-                show_copy_button=True,
-                info="Parsed stock symbols, prices, and recommendations"
             )
-    # Example and troubleshooting section
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown("### 📋 Example URLs")
-            gr.Examples(
-                examples=[
-                    ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"],
-                    ["https://youtu.be/dQw4w9WgXcQ"],
-                ],
-                inputs=[url_input],
-                label="Click to try example URLs (replace with actual financial videos)"
-            )
-    # Troubleshooting section
-    with gr.Accordion("🔧 Troubleshooting Guide", open=False):
-        gr.Markdown("""
-        ### Common Issues and Solutions:
-        **❌ "Video unavailable" or "Content isn't available":**
-        - Video might be private, deleted, or geo-blocked
-        - Try a different public financial video
-        - Verify the URL works in your browser
-        - Check if video requires age verification
-        **❌ "403 Forbidden" error:**
-        - **Upload fresh cookies.txt file** (most common fix)
-        - Make sure cookies are from a logged-in YouTube account
-        - Try waiting 10-15 minutes (rate limiting)
-        **❌ "No speech detected":**
-        - Video might not have clear audio
-        - Try videos with clear narration
-        - Check if video is in English
-        **❌ "No stock information found":**
-        - Video might not contain financial content
-        - Try videos from financial YouTube channels
-        - Look for videos with stock analysis or recommendations
-        ### Installation Commands:
-        ```bash
-        # Install all requirements
-        pip install gradio yt-dlp openai-whisper torch torchaudio
-        # Alternative whisper installation
-        pip install transformers torch torchaudio
-        ```
-        ### Best Practices:
-        - Use videos from reputable financial channels
-        - Prefer videos under 20 minutes for faster processing
-        - Ensure clear audio quality
-        - Always verify extracted information independently
-        """)
     # Event handlers
-    def show_requirements():
-        status = check_requirements()
-        return gr.update(value=status, visible=True)
-    def test_system():
-        """Test system components and return detailed status"""
-        test_results = []
-        test_results.append("🧪 SYSTEM TEST RESULTS")
-        test_results.append("="*30)
-        # Test imports
-        test_results.append(f"✅ yt-dlp: {'Available' if YT_DLP_AVAILABLE else 'NOT AVAILABLE'}")
-        test_results.append(f"✅ Whisper: {'Available' if WHISPER_AVAILABLE else 'NOT AVAILABLE'} (Type: {WHISPER_TYPE})")
-        # Test yt-dlp functionality
-        if YT_DLP_AVAILABLE:
-            try:
-                from yt_dlp import YoutubeDL
-                test_ydl = YoutubeDL({'quiet': True})
-                test_results.append("✅ yt-dlp: Can create YoutubeDL instance")
-            except Exception as e:
-                test_results.append(f"❌ yt-dlp: Error creating instance - {str(e)}")
-        # Test Whisper functionality
-        if WHISPER_AVAILABLE:
-            try:
-                if WHISPER_TYPE == "openai-whisper":
-                    import whisper
-                    test_results.append("✅ Whisper: OpenAI Whisper can be imported")
-                elif WHISPER_TYPE == "transformers":
-                    from transformers import pipeline
-                    test_results.append("✅ Whisper: Transformers Whisper can be imported")
-            except Exception as e:
-                test_results.append(f"❌ Whisper: Error testing - {str(e)}")
-        # Test file operations
-        try:
-            temp_file = tempfile.mktemp()
-            with open(temp_file, 'w') as f:
-                f.write("test")
-            os.remove(temp_file)
-            test_results.append("✅ File operations: Working")
-        except Exception as e:
-            test_results.append(f"❌ File operations: Error - {str(e)}")
-        test_results.append("\n💡 If you see any ❌ errors above, install missing packages:")
-        test_results.append("pip install yt-dlp openai-whisper torch torchaudio")
-        return gr.update(value="\n".join(test_results), visible=True)
-    check_req_btn.click(
-        fn=show_requirements,
-        outputs=[requirements_output]
-    )
-    test_btn.click(
-        fn=test_system,
-        outputs=[test_output]
-    )
     process_btn.click(
         fn=process_video,
-        inputs=[url_input, cookies_input],
-        outputs=[transcript_output, stock_info_output, status_output],
         show_progress=True
     )
-    # Footer
-    gr.Markdown("""
-    ---
-    **📢 Disclaimer:** This tool is for educational and research purposes only.
-    The extracted information should not be considered as financial advice.
-    Always conduct your own research and consult with financial professionals before making investment decisions.
-    """)
-# Launch configuration for Gradio Cloud
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False,
-        debug=False,
-        show_error=True,
-        quiet=False
-    )

 import gradio as gr
 import re
 import sys
 # Try to import required packages with error handling
 try:
     YT_DLP_AVAILABLE = False
     print(f"yt-dlp import error: {e}")
 try:
     import whisper
     WHISPER_AVAILABLE = True
 except ImportError as e:
+    WHISPER_AVAILABLE = False
+    print(f"whisper import error: {e}")
 print(f"Python version: {sys.version}")
 print(f"yt-dlp available: {YT_DLP_AVAILABLE}")
+print(f"whisper available: {WHISPER_AVAILABLE}")
+def get_cookies_path():
+    """Get the path to cookies.txt file"""
+    # Check if cookies.txt exists in the current directory
+    if os.path.exists('cookies.txt'):
+        return 'cookies.txt'
+    # Check in the same directory as the script
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    cookies_path = os.path.join(script_dir, 'cookies.txt')
+    if os.path.exists(cookies_path):
+        return cookies_path
+    return None
+def download_audio(url):
+    """Download audio from YouTube URL and return the file path"""
     if not YT_DLP_AVAILABLE:
         raise Exception("yt-dlp is not available. Please check the installation.")
     try:
         # Create a temporary directory for downloads
         temp_dir = tempfile.mkdtemp()
         output_path = os.path.join(temp_dir, "audio")
+        # Get cookies path
+        cookies_path = get_cookies_path()
+        # Base yt-dlp options
         ydl_opts = {
+            'format': 'bestaudio[ext=m4a]/bestaudio/best',
             'outtmpl': output_path + '.%(ext)s',
+            'quiet': True,
+            'no_warnings': True,
             'extract_flat': False,
             'ignoreerrors': False,
+            # Add user agent to avoid bot detection
+            'http_headers': {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+            },
+            # Add additional options to avoid bot detection
+            'extractor_retries': 3,
+            'fragment_retries': 3,
+            'retry_sleep_functions': {'http': lambda n: 2 ** n},
         }
+        # Add cookies if available
+        if cookies_path:
+            ydl_opts['cookiefile'] = cookies_path
+            print(f"Using cookies from: {cookies_path}")
         else:
+            print("No cookies.txt found - proceeding without cookies")
         with YoutubeDL(ydl_opts) as ydl:
+            # Extract info first to check if video is available
+            info_dict = ydl.extract_info(url, download=False)
+            # Check if video is available
+            if info_dict.get('availability') == 'private':
+                raise Exception("Video is private")
+            elif info_dict.get('availability') == 'premium_only':
+                raise Exception("Video requires premium subscription")
+            elif info_dict.get('live_status') == 'is_live':
+                raise Exception("Cannot download live streams")
+            # Download the audio
+            ydl.download([url])
             # Find the downloaded file
+            for ext in ['.m4a', '.webm', '.mp4', '.mp3']:
                 potential_file = output_path + ext
                 if os.path.exists(potential_file):
+                    print(f"Successfully downloaded: {potential_file}")
                     return potential_file
+            raise FileNotFoundError(f"Downloaded audio file not found")
     except Exception as e:
+        error_msg = str(e)
+        if "Sign in to confirm your age" in error_msg:
+            raise Exception("Video is age-restricted. Please use a different video or update your cookies.")
+        elif "Private video" in error_msg:
+            raise Exception("Video is private and cannot be accessed.")
+        elif "This video is unavailable" in error_msg:
+            raise Exception("Video is unavailable or has been removed.")
+        elif "blocked" in error_msg.lower():
+            raise Exception("Access to this video is blocked. Try using updated cookies or a different video.")
         else:
+            raise Exception(f"Failed to download audio: {error_msg}")
 def transcribe_audio(file_path):
     """Transcribe audio file using Whisper"""
     if not WHISPER_AVAILABLE:
+        raise Exception("OpenAI Whisper is not available. Please check the installation.")
     try:
+        # Use the smallest model to reduce memory usage
+        model = whisper.load_model("tiny")
+        result = model.transcribe(file_path)
+        return result["text"]
     except Exception as e:
         raise Exception(f"Failed to transcribe audio: {str(e)}")
+def extract_stock_info_simple(text):
+    """Extract stock information using simple pattern matching"""
     try:
         stock_info = []
+        # Simple patterns to look for stock-related information
+        stock_patterns = [
+            r'\b[A-Z]{1,5}\b(?:\s+stock|\s+shares|\s+symbol)',  # Stock symbols
+            r'(?:buy|sell|target|price)\s+[A-Z]{1,5}',
+            r'\$\d+(?:\.\d{2})?',  # Dollar amounts
+            r'\b(?:bullish|bearish|buy|sell|hold)\b',
+        ]
+        # Look for company names and stock mentions
+        companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
+        symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
+        prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
+        actions = re.findall(r'\b(?:buy|sell|hold|bullish|bearish|target|stop\s+loss)\b', text, re.IGNORECASE)
+        # Format the extracted information
+        result = "=== EXTRACTED STOCK INFORMATION ===\n\n"
         if companies:
+            result += f"📊 Mentioned Companies: {', '.join(set(companies[:10]))}\n\n"
+        if symbols:
+            result += f"🔤 Potential Stock Symbols: {', '.join(set(symbols[:10]))}\n\n"
+        if prices:
+            result += f"💰 Price Mentions: {', '.join(set(prices[:10]))}\n\n"
         if actions:
+            result += f"📈 Trading Actions: {', '.join(set(actions[:10]))}\n\n"
+        # Look for specific recommendation patterns
         recommendations = []
+        sentences = text.split('.')
         for sentence in sentences:
+            if any(word in sentence.lower() for word in ['buy', 'sell', 'target', 'recommendation']):
+                if any(symbol in sentence for symbol in symbols[:5]):
+                    recommendations.append(sentence.strip())
         if recommendations:
+            result += "🎯 Potential Recommendations:\n"
+            for rec in recommendations[:5]:
+                result += f"• {rec}\n"
+        if not any([companies, symbols, prices, actions]):
+            result += "⚠️ No clear stock recommendations found in the transcript.\n"
+            result += "This might be because:\n"
+            result += "• The video doesn't contain stock recommendations\n"
+            result += "• The audio quality was poor\n"
+            result += "• The content is not in English\n"
         return result
     except Exception as e:
+        return f"Error extracting stock info: {str(e)}"
 def cleanup_file(file_path):
     """Clean up temporary files"""
     except:
         pass
+def system_test():
+    """Test system components"""
+    results = []
+    # Test yt-dlp
+    if YT_DLP_AVAILABLE:
+        results.append("✅ yt-dlp: Available")
+        try:
+            ydl = YoutubeDL({'quiet': True})
+            results.append("✅ yt-dlp: Can create YoutubeDL instance")
+        except Exception as e:
+            results.append(f"❌ yt-dlp: Cannot create instance - {e}")
+    else:
+        results.append("❌ yt-dlp: Not available")
+    # Test Whisper
+    if WHISPER_AVAILABLE:
+        results.append("✅ Whisper: Available (Type: openai-whisper)")
+        try:
+            import whisper
+            results.append("✅ Whisper: OpenAI Whisper can be imported")
+        except Exception as e:
+            results.append(f"❌ Whisper: Cannot import - {e}")
+    else:
+        results.append("❌ Whisper: Not available")
+    # Test file operations
     try:
+        temp_file = tempfile.NamedTemporaryFile(delete=False)
+        temp_file.write(b"test")
+        temp_file.close()
+        os.remove(temp_file.name)
+        results.append("✅ File operations: Working")
     except Exception as e:
+        results.append(f"❌ File operations: Failed - {e}")
+    # Test cookies
+    cookies_path = get_cookies_path()
+    if cookies_path:
+        results.append(f"✅ Cookies: Found at {cookies_path}")
+    else:
+        results.append("⚠️ Cookies: Not found (may cause bot detection issues)")
+    return "\n".join(results)
+def process_video(url, progress=gr.Progress()):
+    """Main function to process YouTube video"""
     # Check if required packages are available
     if not YT_DLP_AVAILABLE:
+        return "Error: yt-dlp is not installed properly. Please check the requirements.", ""
     if not WHISPER_AVAILABLE:
+        return "Error: OpenAI Whisper is not installed properly. Please check the requirements.", ""
+    if not url or not url.strip():
+        return "Please provide a valid YouTube URL", ""
     audio_path = None
     try:
+        # Validate URL
+        if not any(domain in url.lower() for domain in ['youtube.com', 'youtu.be']):
+            return "Please provide a valid YouTube URL", ""
         # Download audio
+        progress(0.1, desc="Downloading audio...")
+        audio_path = download_audio(url)
         # Transcribe audio
+        progress(0.5, desc="Transcribing audio...")
         transcript = transcribe_audio(audio_path)
         if not transcript.strip():
+            return "No speech detected in the video", ""
         # Extract stock information
+        progress(0.8, desc="Extracting stock information...")
+        stock_details = extract_stock_info_simple(transcript)
+        progress(1.0, desc="Complete!")
+        return transcript, stock_details
     except Exception as e:
+        error_msg = f"Error processing video: {str(e)}"
+        return error_msg, ""
     finally:
         # Clean up temporary files
+        cleanup_file(audio_path)
+# Create Gradio interface
 with gr.Blocks(
+    title="Stock Recommendation Extractor",
     theme=gr.themes.Soft(),
     css="""
     .gradio-container {
         max-width: 1200px;
         margin: auto;
     }
     """
 ) as demo:
     gr.Markdown("""
+    # 📈 Stock Recommendation Extractor from YouTube
+    Extract stock recommendations and trading information from YouTube videos using AI transcription.
+    **How it works:**
+    1. Downloads audio from YouTube video
+    2. Transcribes using OpenAI Whisper
+    3. Extracts stock-related information
+    **⚠️ Disclaimer:** This is for educational purposes only. Always do your own research!
     """)
+    # Add system test section
+    with gr.Accordion("🧪 System Status", open=False):
+        system_status = gr.Textbox(
+            value=system_test(),
+            label="System Test Results",
+            lines=10,
+            interactive=False
+        )
+        test_btn = gr.Button("🔄 Re-run System Test")
+        test_btn.click(fn=system_test, outputs=system_status)
     with gr.Row():
         with gr.Column(scale=1):
+            url_input = gr.Textbox(
+                label="📺 YouTube URL",
+                placeholder="https://www.youtube.com/watch?v=...",
+                lines=2
+            )
+            process_btn = gr.Button(
+                "🚀 Extract Stock Information",
+                variant="primary",
+                size="lg"
+            )
+            gr.Markdown("""
+            ### 💡 Tips:
+            - Works best with financial YouTube channels
+            - Ensure video has clear audio
+            - English content works best
+            - If you get bot detection errors, try updating cookies.txt
+            """)
     with gr.Row():
         with gr.Column():
             transcript_output = gr.Textbox(
                 label="📝 Full Transcript",
+                lines=15,
+                max_lines=20,
+                show_copy_button=True
             )
         with gr.Column():
             stock_info_output = gr.Textbox(
                 label="📊 Extracted Stock Information",
+                lines=15,
+                max_lines=20,
+                show_copy_button=True
             )
     # Event handlers
     process_btn.click(
         fn=process_video,
+        inputs=[url_input],
+        outputs=[transcript_output, stock_info_output],
         show_progress=True
     )
+    # Example section
+    gr.Markdown("### 📋 Example URLs (Replace with actual financial videos)")
+    gr.Examples(
+        examples=[
+            ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"],
+        ],
+        inputs=[url_input],
+        label="Click to try example"
+    )
 if __name__ == "__main__":
+    demo.launch()