Spaces:

developer28
/

Youtubedownloader

Sleeping

File size: 17,785 Bytes

import os
import tempfile
import gradio as gr
import re
import sys
import shutil
import importlib.util

def check_requirements():
    """Check if all required packages are installed and return status"""
    requirements_status = []
    
    packages = [
        ('gradio', 'gradio'),
        ('yt-dlp', 'yt_dlp'),
        ('openai-whisper', 'whisper'),
        ('torch', 'torch'),
        ('torchaudio', 'torchaudio'),
        ('numpy', 'numpy'),
        ('regex', 'regex'),
    ]
    
    for package_name, import_name in packages:
        try:
            spec = importlib.util.find_spec(import_name)
            if spec is None:
                requirements_status.append(f"❌ {package_name}: Not found")
                continue
            
            module = importlib.import_module(import_name)
            version = getattr(module, '__version__', 'Unknown version')
            requirements_status.append(f"✅ {package_name}: {version}")
            
        except ImportError as e:
            requirements_status.append(f"❌ {package_name}: Import error - {str(e)}")
        except Exception as e:
            requirements_status.append(f"⚠️  {package_name}: Found but error - {str(e)}")
    
    # Add Python info
    requirements_status.append(f"\n🐍 Python: {sys.version}")
    requirements_status.append(f"📁 Python executable: {sys.executable}")
    
    return "\n".join(requirements_status)

# Try to import required packages with error handling
try:
    from yt_dlp import YoutubeDL
    YT_DLP_AVAILABLE = True
except ImportError as e:
    YT_DLP_AVAILABLE = False
    print(f"yt-dlp import error: {e}")

# Try multiple whisper import methods
WHISPER_AVAILABLE = False
WHISPER_TYPE = None

try:
    import whisper
    WHISPER_AVAILABLE = True
    WHISPER_TYPE = "openai-whisper"
    print("Using OpenAI Whisper")
except ImportError as e:
    print(f"OpenAI Whisper import error: {e}")
    try:
        from transformers import pipeline
        WHISPER_AVAILABLE = True
        WHISPER_TYPE = "transformers"
        print("Using Transformers Whisper")
    except ImportError as e2:
        print(f"Transformers Whisper import error: {e2}")

print(f"Python version: {sys.version}")
print(f"Python executable: {sys.executable}")
print(f"yt-dlp available: {YT_DLP_AVAILABLE}")
print(f"whisper available: {WHISPER_AVAILABLE} (type: {WHISPER_TYPE})")

# Additional diagnostics
if YT_DLP_AVAILABLE:
    try:
        from yt_dlp import YoutubeDL
        print(f"yt-dlp version: {YoutubeDL().__class__.__module__}")
    except:
        pass

if WHISPER_AVAILABLE and WHISPER_TYPE == "openai-whisper":
    try:
        import whisper
        print(f"whisper version: {whisper.__version__}")
    except:
        pass

def download_audio(url, cookies_file_path=None):
    """Download audio from YouTube URL and return the file path"""
    if not YT_DLP_AVAILABLE:
        raise Exception("yt-dlp is not available. Please check the installation.")
    
    try:
        # Create a temporary directory for downloads
        temp_dir = tempfile.mkdtemp()
        output_path = os.path.join(temp_dir, "audio")
        
        ydl_opts = {
            'format': 'bestaudio[ext=m4a]/bestaudio/best',
            'outtmpl': output_path + '.%(ext)s',
            'quiet': True,
            'no_warnings': True,
            # Anti-bot detection measures
            'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'referer': 'https://www.youtube.com/',
            'extractor_retries': 3,
            'fragment_retries': 3,
            'retry_sleep_functions': {'http': lambda n: 2 ** n},
        }
        
        # Add cookies file if provided
        if cookies_file_path and os.path.exists(cookies_file_path):
            ydl_opts['cookiefile'] = cookies_file_path
            print(f"Using cookies file: {cookies_file_path}")
        else:
            print("No cookies file provided - may encounter bot detection")
            # Additional headers without cookies
            ydl_opts.update({
                'headers': {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                    'Accept-Language': 'en-us,en;q=0.5',
                    'Accept-Encoding': 'gzip,deflate',
                    'DNT': '1',
                    'Connection': 'keep-alive',
                    'Upgrade-Insecure-Requests': '1',
                }
            })
        
        with YoutubeDL(ydl_opts) as ydl:
            info_dict = ydl.extract_info(url, download=True)
            filename = ydl.prepare_filename(info_dict)
            
            # Find the downloaded file
            for ext in ['.m4a', '.webm', '.mp4', '.mp3']:
                potential_file = output_path + ext
                if os.path.exists(potential_file):
                    return potential_file
            
            raise FileNotFoundError(f"Downloaded audio file not found")
                
    except Exception as e:
        if "403" in str(e) or "Forbidden" in str(e):
            raise Exception(f"YouTube blocked the request (403 Forbidden). Please upload your cookies.txt file to bypass bot detection. Original error: {str(e)}")
        else:
            raise Exception(f"Failed to download audio: {str(e)}")

def transcribe_audio(file_path):
    """Transcribe audio file using Whisper"""
    if not WHISPER_AVAILABLE:
        raise Exception("OpenAI Whisper is not available. Please install it using: pip install openai-whisper")
    
    try:
        if WHISPER_TYPE == "openai-whisper":
            # Use OpenAI Whisper
            model = whisper.load_model("tiny")
            result = model.transcribe(file_path)
            return result["text"]
        
        elif WHISPER_TYPE == "transformers":
            # Use Transformers Whisper
            from transformers import pipeline
            transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
            result = transcriber(file_path)
            return result["text"]
        
        else:
            raise Exception("No compatible Whisper installation found")
            
    except Exception as e:
        raise Exception(f"Failed to transcribe audio: {str(e)}")


def extract_stock_info_simple(text):
    """Extract stock information using simple pattern matching"""
    try:
        stock_info = []
        
        # Simple patterns to look for stock-related information
        stock_patterns = [
            r'\b[A-Z]{1,5}\b(?:\s+stock|\s+shares|\s+symbol)',  # Stock symbols
            r'(?:buy|sell|target|price)\s+[A-Z]{1,5}',
            r'\$\d+(?:\.\d{2})?',  # Dollar amounts
            r'\b(?:bullish|bearish|buy|sell|hold)\b',
        ]
        
        # Look for company names and stock mentions
        companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
        symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
        prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
        actions = re.findall(r'\b(?:buy|sell|hold|bullish|bearish|target|stop\s+loss)\b', text, re.IGNORECASE)
        
        # Format the extracted information
        result = "=== EXTRACTED STOCK INFORMATION ===\n\n"
        
        if companies:
            result += f"📊 Mentioned Companies: {', '.join(set(companies[:10]))}\n\n"
        
        if symbols:
            result += f"🔤 Potential Stock Symbols: {', '.join(set(symbols[:10]))}\n\n"
        
        if prices:
            result += f"💰 Price Mentions: {', '.join(set(prices[:10]))}\n\n"
        
        if actions:
            result += f"📈 Trading Actions: {', '.join(set(actions[:10]))}\n\n"
        
        # Look for specific recommendation patterns
        recommendations = []
        sentences = text.split('.')
        for sentence in sentences:
            if any(word in sentence.lower() for word in ['buy', 'sell', 'target', 'recommendation']):
                if any(symbol in sentence for symbol in symbols[:5]):
                    recommendations.append(sentence.strip())
        
        if recommendations:
            result += "🎯 Potential Recommendations:\n"
            for rec in recommendations[:5]:
                result += f"• {rec}\n"
        
        if not any([companies, symbols, prices, actions]):
            result += "⚠️ No clear stock recommendations found in the transcript.\n"
            result += "This might be because:\n"
            result += "• The video doesn't contain stock recommendations\n"
            result += "• The audio quality was poor\n"
            result += "• The content is not in English\n"
        
        return result
        
    except Exception as e:
        return f"Error extracting stock info: {str(e)}"

def cleanup_file(file_path):
    """Clean up temporary files"""
    try:
        if file_path and os.path.exists(file_path):
            os.remove(file_path)
            # Also try to remove the directory if it's empty
            try:
                os.rmdir(os.path.dirname(file_path))
            except:
                pass
    except:
        pass

def process_cookies_file(cookies_file):
    """Process uploaded cookies file and return the path"""
    if cookies_file is None:
        return None
    
    try:
        # Create a temporary file for cookies
        temp_cookies_path = tempfile.mktemp(suffix='.txt')
        
        # Copy the uploaded file to temp location
        shutil.copy2(cookies_file, temp_cookies_path)
        
        return temp_cookies_path
    except Exception as e:
        print(f"Error processing cookies file: {e}")
        return None

def process_video(url, cookies_file, progress=gr.Progress()):
    """Main function to process YouTube video"""
    
    # Check if required packages are available
    if not YT_DLP_AVAILABLE:
        return "Error: yt-dlp is not installed properly. Please install it using: pip install yt-dlp", "", "❌ Error: Missing yt-dlp"
    
    if not WHISPER_AVAILABLE:
        return "Error: OpenAI Whisper is not installed properly. Please install it using: pip install openai-whisper", "", "❌ Error: Missing Whisper"
    
    if not url or not url.strip():
        return "Please provide a valid YouTube URL", "", "❌ Error: Invalid URL"
    
    audio_path = None
    cookies_temp_path = None
    
    try:
        # Validate URL
        if not any(domain in url.lower() for domain in ['youtube.com', 'youtu.be']):
            return "Please provide a valid YouTube URL", "", "❌ Error: Invalid URL"
        
        # Process cookies file if provided
        progress(0.05, desc="Processing cookies...")
        cookies_temp_path = process_cookies_file(cookies_file)
        
        status_msg = "✅ Cookies loaded" if cookies_temp_path else "⚠️ No cookies (may encounter bot detection)"
        
        # Download audio
        progress(0.2, desc="Downloading audio...")
        audio_path = download_audio(url, cookies_temp_path)
        
        # Transcribe audio
        progress(0.6, desc="Transcribing audio...")
        transcript = transcribe_audio(audio_path)
        
        if not transcript.strip():
            return "No speech detected in the video", "", "❌ No speech detected"
        
        # Extract stock information
        progress(0.9, desc="Extracting stock information...")
        stock_details = extract_stock_info_simple(transcript)
        
        progress(1.0, desc="Complete!")
        return transcript, stock_details, "✅ Processing completed successfully"
        
    except Exception as e:
        error_msg = f"Error processing video: {str(e)}"
        return error_msg, "", f"❌ Error: {str(e)}"
    
    finally:
        # Clean up temporary files
        cleanup_file(audio_path)
        cleanup_file(cookies_temp_path)

# Create Gradio interface
with gr.Blocks(
    title="Stock Recommendation Extractor",
    theme=gr.themes.Soft(),
    css="""
    .gradio-container {
        max-width: 1400px;
        margin: auto;
    }
    .status-box {
        padding: 10px;
        border-radius: 5px;
        margin: 10px 0;
    }
    """
) as demo:
    
    gr.Markdown("""
    # 📈 Stock Recommendation Extractor from YouTube
    
    Extract stock recommendations and trading information from YouTube videos using AI transcription.
    
    **How it works:**
    1. Upload your cookies.txt file (optional but recommended to avoid bot detection)
    2. Paste YouTube video URL
    3. Downloads audio from YouTube video
    4. Transcribes using OpenAI Whisper
    5. Extracts stock-related information
    
    **⚠️ Disclaimer:** This is for educational purposes only. Always do your own research!
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            # Requirements check button
            gr.Markdown("### 🔍 System Check")
            check_req_btn = gr.Button(
                "Check Requirements", 
                variant="secondary",
                size="sm"
            )
            
            requirements_output = gr.Textbox(
                label="📋 Requirements Status",
                lines=10,
                interactive=False,
                visible=False
            )
            
            # Cookies file upload
            cookies_input = gr.File(
                label="🍪 Upload Cookies File (cookies.txt)",
                file_types=[".txt"],
                file_count="single"
            )
            
            gr.Markdown("""
            **How to get cookies.txt to fix 403 Forbidden errors:**
            1. Install browser extension: "Get cookies.txt LOCALLY" 
            2. Visit YouTube in your browser (while logged in)
            3. Click the extension icon and export cookies for youtube.com
            4. Upload the downloaded cookies.txt file here
            
            **Alternative extensions:**
            - "cookies.txt" (Chrome/Firefox)
            - "Export Cookies" (Chrome)
            
            ⚠️ **Important**: Without cookies, you'll likely get 403 Forbidden errors
            """)
            
            url_input = gr.Textbox(
                label="📺 YouTube URL",
                placeholder="https://www.youtube.com/watch?v=...",
                lines=2
            )
            
            process_btn = gr.Button(
                "🚀 Extract Stock Information", 
                variant="primary",
                size="lg"
            )
            
            # Status display
            status_output = gr.Textbox(
                label="📊 Status",
                lines=1,
                interactive=False
            )
            
            gr.Markdown("""
            ### 💡 Tips:
            - **MUST upload cookies.txt** to avoid 403 Forbidden errors
            - Works best with financial YouTube channels
            - Ensure video has clear audio
            - English content works best
            - Try shorter videos first (under 10 minutes)
            """)
    
    with gr.Row():
        with gr.Column():
            transcript_output = gr.Textbox(
                label="📝 Full Transcript",
                lines=15,
                max_lines=20,
                show_copy_button=True
            )
        
        with gr.Column():
            stock_info_output = gr.Textbox(
                label="📊 Extracted Stock Information",
                lines=15,
                max_lines=20,
                show_copy_button=True
            )
    
    # Event handlers
    def show_requirements():
        status = check_requirements()
        return gr.update(value=status, visible=True)
    
    check_req_btn.click(
        fn=show_requirements,
        outputs=[requirements_output]
    )
    
    process_btn.click(
        fn=process_video,
        inputs=[url_input, cookies_input],
        outputs=[transcript_output, stock_info_output, status_output],
        show_progress=True
    )
    
    # Example section
    gr.Markdown("### 📋 Example URLs (Replace with actual financial videos)")
    gr.Examples(
        examples=[
            ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"],
        ],
        inputs=[url_input],
        label="Click to try example"
    )
    
    gr.Markdown("""
    ### 🔧 Installation & Troubleshooting:
    
    **Step 1: Click "Check Requirements" button above to see what's missing**
    
    **If you get "Whisper Missing" error:**
    ```bash
    pip install openai-whisper
    ```
    
    **If you get "yt-dlp Missing" error:**
    ```bash
    pip install yt-dlp
    ```
    
    **Install all requirements at once:**
    ```bash
    pip install gradio==4.44.0 yt-dlp==2023.12.30 openai-whisper==20231117 torch==2.1.0 torchaudio==2.1.0 numpy==1.24.3 regex==2023.8.8
    ```
    
    **Alternative Whisper installation:**
    ```bash
    pip install transformers torch torchaudio
    ```
    
    **If using virtual environment:**
    ```bash
    # Create and activate virtual environment first
    python -m venv myenv
    # Windows: myenv\\Scripts\\activate
    # Mac/Linux: source myenv/bin/activate
    # Then install packages
    pip install -r requirements.txt
    ```
    
    **Other Issues:**
    - **Bot Detection Error**: Upload your cookies.txt file
    - **No Audio Found**: Check if video has audio track
    - **Transcription Failed**: Video might be too long or audio quality poor
    - **No Stock Info**: Video might not contain financial content
    """)

if __name__ == "__main__":
    demo.launch()