Spaces:

developer28
/

Youtubedownloader

Sleeping

App Files Files Community

developer28 commited on 13 days ago

Commit

bb84391

verified ·

1 Parent(s): a204567

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -461

app.py CHANGED Viewed

@@ -1,512 +1,145 @@
 import os
 import tempfile
-import gradio as gr
-import re
-import sys
 import shutil
-import importlib.util
-def check_requirements():
-    """Check if all required packages are installed and return status"""
-    requirements_status = []
-    packages = [
-        ('gradio', 'gradio'),
-        ('yt-dlp', 'yt_dlp'),
-        ('openai-whisper', 'whisper'),
-        ('torch', 'torch'),
-        ('torchaudio', 'torchaudio'),
-        ('numpy', 'numpy'),
-        ('regex', 'regex'),
-    ]
-    for package_name, import_name in packages:
-        try:
-            spec = importlib.util.find_spec(import_name)
-            if spec is None:
-                requirements_status.append(f"❌ {package_name}: Not found")
-                continue
-            module = importlib.import_module(import_name)
-            version = getattr(module, '__version__', 'Unknown version')
-            requirements_status.append(f"✅ {package_name}: {version}")
-        except ImportError as e:
-            requirements_status.append(f"❌ {package_name}: Import error - {str(e)}")
-        except Exception as e:
-            requirements_status.append(f"⚠️  {package_name}: Found but error - {str(e)}")
-    # Add Python info
-    requirements_status.append(f"\n🐍 Python: {sys.version}")
-    requirements_status.append(f"📁 Python executable: {sys.executable}")
-    return "\n".join(requirements_status)
-# Try to import required packages with error handling
-try:
-    from yt_dlp import YoutubeDL
-    YT_DLP_AVAILABLE = True
-except ImportError as e:
-    YT_DLP_AVAILABLE = False
-    print(f"yt-dlp import error: {e}")
-# Try multiple whisper import methods
 WHISPER_AVAILABLE = False
 WHISPER_TYPE = None
 try:
     import whisper
     WHISPER_AVAILABLE = True
     WHISPER_TYPE = "openai-whisper"
-    print("Using OpenAI Whisper")
-except ImportError as e:
-    print(f"OpenAI Whisper import error: {e}")
     try:
         from transformers import pipeline
         WHISPER_AVAILABLE = True
         WHISPER_TYPE = "transformers"
-        print("Using Transformers Whisper")
-    except ImportError as e2:
-        print(f"Transformers Whisper import error: {e2}")
-print(f"Python version: {sys.version}")
-print(f"Python executable: {sys.executable}")
-print(f"yt-dlp available: {YT_DLP_AVAILABLE}")
-print(f"whisper available: {WHISPER_AVAILABLE} (type: {WHISPER_TYPE})")
-# Additional diagnostics
-if YT_DLP_AVAILABLE:
-    try:
-        from yt_dlp import YoutubeDL
-        print(f"yt-dlp version: {YoutubeDL().__class__.__module__}")
-    except:
-        pass
-if WHISPER_AVAILABLE and WHISPER_TYPE == "openai-whisper":
-    try:
-        import whisper
-        print(f"whisper version: {whisper.__version__}")
-    except:
         pass
-def download_audio(url, cookies_file_path=None):
-    """Download audio from YouTube URL and return the file path"""
-    if not YT_DLP_AVAILABLE:
-        raise Exception("yt-dlp is not available. Please check the installation.")
-    try:
-        # Create a temporary directory for downloads
-        temp_dir = tempfile.mkdtemp()
-        output_path = os.path.join(temp_dir, "audio")
-        # Base yt-dlp options
-        ydl_opts = {
-            'format': 'bestaudio[ext=m4a]/bestaudio/best',
-            'outtmpl': output_path + '.%(ext)s',
-            'quiet': True,
-            'no_warnings': True,
-            'force_ipv4': True,
-            'referer': 'https://www.youtube.com/',
-            'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-            'extractor_retries': 3,
-            'fragment_retries': 3,
-            'retry_sleep_functions': {'http': lambda n: 2 ** n},
-        }
-        # Add cookies file if provided
-        if cookies_file_path and os.path.exists(cookies_file_path):
-            print(f"✅ Using cookies file: {cookies_file_path}")
-            ydl_opts['cookiefile'] = cookies_file_path
-        else:
-            print("⚠️ No valid cookies file provided – likely to hit 403 Forbidden.")
-        # Extra headers to mimic real browser
-        ydl_opts['http_headers'] = {
-            'User-Agent': ydl_opts['user_agent'],
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-            'Accept-Language': 'en-US,en;q=0.5',
-            'Accept-Encoding': 'gzip, deflate',
-            'DNT': '1',
-            'Connection': 'keep-alive',
-            'Upgrade-Insecure-Requests': '1',
-            'Referer': 'https://www.youtube.com/',
-        }
-        print(f"🔧 yt-dlp options:\n{ydl_opts}")
-        with YoutubeDL(ydl_opts) as ydl:
-            info_dict = ydl.extract_info(url, download=True)
-            filename = ydl.prepare_filename(info_dict)
-            # Search for the downloaded audio file
-            for ext in ['.m4a', '.webm', '.mp4', '.mp3']:
-                potential_file = output_path + ext
-                if os.path.exists(potential_file):
-                    print(f"✅ Audio file downloaded: {potential_file}")
-                    return potential_file
-            raise FileNotFoundError("Downloaded audio file not found.")
-    except Exception as e:
-        import traceback
-        traceback.print_exc()  # For debugging
-        if "403" in str(e) or "Forbidden" in str(e):
-            raise Exception(f"YouTube blocked the request (403 Forbidden). Please upload a valid cookies.txt file. Original error: {str(e)}")
-        else:
-            raise Exception(f"Failed to download audio: {str(e)}")
-def transcribe_audio(file_path):
-    """Transcribe audio file using Whisper"""
-    if not WHISPER_AVAILABLE:
-        raise Exception("OpenAI Whisper is not available. Please install it using: pip install openai-whisper")
-    try:
-        if WHISPER_TYPE == "openai-whisper":
-            # Use OpenAI Whisper
-            model = whisper.load_model("tiny")
-            result = model.transcribe(file_path)
-            return result["text"]
-        elif WHISPER_TYPE == "transformers":
-            # Use Transformers Whisper
-            from transformers import pipeline
-            transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
-            result = transcriber(file_path)
-            return result["text"]
-        else:
-            raise Exception("No compatible Whisper installation found")
-    except Exception as e:
-        raise Exception(f"Failed to transcribe audio: {str(e)}")
 def extract_stock_info_simple(text):
-    """Extract stock information using simple pattern matching"""
     try:
         stock_info = []
-        # Simple patterns to look for stock-related information
-        stock_patterns = [
-            r'\b[A-Z]{1,5}\b(?:\s+stock|\s+shares|\s+symbol)',  # Stock symbols
-            r'(?:buy|sell|target|price)\s+[A-Z]{1,5}',
-            r'\$\d+(?:\.\d{2})?',  # Dollar amounts
-            r'\b(?:bullish|bearish|buy|sell|hold)\b',
-        ]
-        # Look for company names and stock mentions
         companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
         symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
         prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
-        actions = re.findall(r'\b(?:buy|sell|hold|bullish|bearish|target|stop\s+loss)\b', text, re.IGNORECASE)
-        # Format the extracted information
         result = "=== EXTRACTED STOCK INFORMATION ===\n\n"
         if companies:
-            result += f"📊 Mentioned Companies: {', '.join(set(companies[:10]))}\n\n"
         if symbols:
-            result += f"🔤 Potential Stock Symbols: {', '.join(set(symbols[:10]))}\n\n"
         if prices:
-            result += f"💰 Price Mentions: {', '.join(set(prices[:10]))}\n\n"
         if actions:
-            result += f"📈 Trading Actions: {', '.join(set(actions[:10]))}\n\n"
-        # Look for specific recommendation patterns
         recommendations = []
         sentences = text.split('.')
         for sentence in sentences:
-            if any(word in sentence.lower() for word in ['buy', 'sell', 'target', 'recommendation']):
-                if any(symbol in sentence for symbol in symbols[:5]):
                     recommendations.append(sentence.strip())
         if recommendations:
-            result += "🎯 Potential Recommendations:\n"
             for rec in recommendations[:5]:
                 result += f"• {rec}\n"
         if not any([companies, symbols, prices, actions]):
-            result += "⚠️ No clear stock recommendations found in the transcript.\n"
-            result += "This might be because:\n"
-            result += "• The video doesn't contain stock recommendations\n"
-            result += "• The audio quality was poor\n"
-            result += "• The content is not in English\n"
         return result
     except Exception as e:
         return f"Error extracting stock info: {str(e)}"
-def cleanup_file(file_path):
-    """Clean up temporary files"""
-    try:
-        if file_path and os.path.exists(file_path):
-            os.remove(file_path)
-            # Also try to remove the directory if it's empty
-            try:
-                os.rmdir(os.path.dirname(file_path))
-            except:
-                pass
-    except:
-        pass
-def process_cookies_file(cookies_file):
-    """Process uploaded cookies file and return the path"""
-    if cookies_file is None:
-        return None
     try:
-        # Create a temporary file for cookies
-        temp_cookies_path = tempfile.mktemp(suffix='.txt')
-        # Copy the uploaded file to temp location
-        shutil.copy2(cookies_file.name, temp_cookies_path)
-        return temp_cookies_path
     except Exception as e:
-        print(f"Error processing cookies file: {e}")
-        return None
-def process_video(url, cookies_file, progress=gr.Progress()):
-    """Main function to process YouTube video"""
-    # Check if required packages are available
-    if not YT_DLP_AVAILABLE:
-        return "Error: yt-dlp is not installed properly. Please install it using: pip install yt-dlp", "", "❌ Error: Missing yt-dlp"
-    if not WHISPER_AVAILABLE:
-        return "Error: OpenAI Whisper is not installed properly. Please install it using: pip install openai-whisper", "", "❌ Error: Missing Whisper"
-    if not url or not url.strip():
-        return "Please provide a valid YouTube URL", "", "❌ Error: Invalid URL"
-    audio_path = None
-    cookies_temp_path = None
     try:
-        # Validate URL
-        if not any(domain in url.lower() for domain in ['youtube.com', 'youtu.be']):
-            return "Please provide a valid YouTube URL", "", "❌ Error: Invalid URL"
-        # Process cookies file if provided
-        progress(0.05, desc="Processing cookies...")
-        cookies_temp_path = process_cookies_file(cookies_file)
-        status_msg = "✅ Cookies loaded" if cookies_temp_path else "⚠️ No cookies (may encounter bot detection)"
-        # Download audio
-        progress(0.2, desc="Downloading audio...")
-        audio_path = download_audio(url, cookies_temp_path)
-        # Transcribe audio
-        progress(0.6, desc="Transcribing audio...")
-        transcript = transcribe_audio(audio_path)
-        if not transcript.strip():
-            return "No speech detected in the video", "", "❌ No speech detected"
-        # Extract stock information
-        progress(0.9, desc="Extracting stock information...")
-        stock_details = extract_stock_info_simple(transcript)
-        progress(1.0, desc="Complete!")
-        return transcript, stock_details, "✅ Processing completed successfully"
     except Exception as e:
-        error_msg = f"Error processing video: {str(e)}"
-        return error_msg, "", f"❌ Error: {str(e)}"
-    finally:
-        # Clean up temporary files
-        cleanup_file(audio_path)
-        cleanup_file(cookies_temp_path)
-# Create Gradio interface
-with gr.Blocks(
-    title="Stock Recommendation Extractor",
-    theme=gr.themes.Soft(),
-    css="""
-    .gradio-container {
-        max-width: 1400px;
-        margin: auto;
-    }
-    .status-box {
-        padding: 10px;
-        border-radius: 5px;
-        margin: 10px 0;
-    }
-    """
-) as demo:
     gr.Markdown("""
-    # 📈 Stock Recommendation Extractor from YouTube
-    Extract stock recommendations and trading information from YouTube videos using AI transcription.
-    **How it works:**
-    1. Upload your cookies.txt file (optional but recommended to avoid bot detection)
-    2. Paste YouTube video URL
-    3. Downloads audio from YouTube video
-    4. Transcribes using OpenAI Whisper
-    5. Extracts stock-related information
-    **⚠️ Disclaimer:** This is for educational purposes only. Always do your own research!
     """)
-    with gr.Row():
-        with gr.Column(scale=1):
-            # Requirements check button
-            gr.Markdown("### 🔍 System Check")
-            check_req_btn = gr.Button(
-                "Check Requirements",
-                variant="secondary",
-                size="sm"
-            )
-            requirements_output = gr.Textbox(
-                label="📋 Requirements Status",
-                lines=10,
-                interactive=False,
-                visible=False
-            )
-            # Cookies file upload
-            cookies_input = gr.File(
-                label="🍪 Upload Cookies File (cookies.txt)",
-                file_types=[".txt"],
-                file_count="single"
-            )
-            gr.Markdown("""
-            **How to get cookies.txt to fix 403 Forbidden errors:**
-            1. Install browser extension: "Get cookies.txt LOCALLY"
-            2. Visit YouTube in your browser (while logged in)
-            3. Click the extension icon and export cookies for youtube.com
-            4. Upload the downloaded cookies.txt file here
-            **Alternative extensions:**
-            - "cookies.txt" (Chrome/Firefox)
-            - "Export Cookies" (Chrome)
-            ⚠️ **Important**: Without cookies, you'll likely get 403 Forbidden errors
-            """)
-            url_input = gr.Textbox(
-                label="📺 YouTube URL",
-                placeholder="https://www.youtube.com/watch?v=...",
-                lines=2
-            )
-            process_btn = gr.Button(
-                "🚀 Extract Stock Information",
-                variant="primary",
-                size="lg"
-            )
-            # Status display
-            status_output = gr.Textbox(
-                label="📊 Status",
-                lines=1,
-                interactive=False
-            )
-            gr.Markdown("""
-            ### 💡 Tips:
-            - **MUST upload cookies.txt** to avoid 403 Forbidden errors
-            - Works best with financial YouTube channels
-            - Ensure video has clear audio
-            - English content works best
-            - Try shorter videos first (under 10 minutes)
-            """)
     with gr.Row():
-        with gr.Column():
-            transcript_output = gr.Textbox(
-                label="📝 Full Transcript",
-                lines=15,
-                max_lines=20,
-                show_copy_button=True
-            )
-        with gr.Column():
-            stock_info_output = gr.Textbox(
-                label="📊 Extracted Stock Information",
-                lines=15,
-                max_lines=20,
-                show_copy_button=True
-            )
-    # Event handlers
-    def show_requirements():
-        status = check_requirements()
-        return gr.update(value=status, visible=True)
-    check_req_btn.click(
-        fn=show_requirements,
-        outputs=[requirements_output]
-    )
-    process_btn.click(
-        fn=process_video,
-        inputs=[url_input, cookies_input],
-        outputs=[transcript_output, stock_info_output, status_output],
-        show_progress=True
-    )
-    # Example section
-    gr.Markdown("### 📋 Example URLs (Replace with actual financial videos)")
-    gr.Examples(
-        examples=[
-            ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"],
-        ],
-        inputs=[url_input],
-        label="Click to try example"
-    )
-    gr.Markdown("""
-    ### 🔧 Installation & Troubleshooting:
-    **Step 1: Click "Check Requirements" button above to see what's missing**
-    **If you get "Whisper Missing" error:**
-    ```bash
-    pip install openai-whisper
-    ```
-    **If you get "yt-dlp Missing" error:**
-    ```bash
-    pip install yt-dlp
-    ```
-    **Install all requirements at once:**
-    ```bash
-    pip install gradio==4.44.0 yt-dlp==2023.12.30 openai-whisper==20231117 torch==2.1.0 torchaudio==2.1.0 numpy==1.24.3 regex==2023.8.8
-    ```
-    **Alternative Whisper installation:**
-    ```bash
-    pip install transformers torch torchaudio
-    ```
-    **If using virtual environment:**
-    ```bash
-    # Create and activate virtual environment first
-    python -m venv myenv
-    # Windows: myenv\\Scripts\\activate
-    # Mac/Linux: source myenv/bin/activate
-    # Then install packages
-    pip install -r requirements.txt
-    ```
-    **Other Issues:**
-    - **Bot Detection Error**: Upload your cookies.txt file
-    - **No Audio Found**: Check if video has audio track
-    - **Transcription Failed**: Video might be too long or audio quality poor
-    - **No Stock Info**: Video might not contain financial content
-    """)
 if __name__ == "__main__":
-    demo.launch()

+# ✅ Combined YouTube Analyzer with Stock Info Extractor
+# ⬇️ Based on your working app + whisper + stock extraction
+import gradio as gr
 import os
 import tempfile
 import shutil
+import re
+import torch
+import numpy as np
+from yt_dlp import YoutubeDL
+# Whisper setup
 WHISPER_AVAILABLE = False
 WHISPER_TYPE = None
 try:
     import whisper
     WHISPER_AVAILABLE = True
     WHISPER_TYPE = "openai-whisper"
+except ImportError:
     try:
         from transformers import pipeline
         WHISPER_AVAILABLE = True
         WHISPER_TYPE = "transformers"
+    except ImportError:
         pass
+# Stock Info Extraction
 def extract_stock_info_simple(text):
     try:
         stock_info = []
         companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
         symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
         prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
+        actions = re.findall(r'\b(?:buy|sell|hold|bullish|bearish|target|stop loss)\b', text, re.IGNORECASE)
         result = "=== EXTRACTED STOCK INFORMATION ===\n\n"
         if companies:
+            result += f"\U0001F4CA Mentioned Companies: {', '.join(set(companies[:10]))}\n\n"
         if symbols:
+            result += f"\U0001F524 Potential Stock Symbols: {', '.join(set(symbols[:10]))}\n\n"
         if prices:
+            result += f"\U0001F4B0 Price Mentions: {', '.join(set(prices[:10]))}\n\n"
         if actions:
+            result += f"\U0001F4C8 Trading Actions: {', '.join(set(actions[:10]))}\n\n"
         recommendations = []
         sentences = text.split('.')
         for sentence in sentences:
+            if any(word in sentence.lower() for word in ['buy', 'sell', 'target']):
+                if any(sym in sentence for sym in symbols[:5]):
                     recommendations.append(sentence.strip())
         if recommendations:
+            result += "\U0001F3AF Potential Recommendations:\n"
             for rec in recommendations[:5]:
                 result += f"• {rec}\n"
         if not any([companies, symbols, prices, actions]):
+            result += "⚠️ No clear stock recommendations found.\n"
         return result
     except Exception as e:
         return f"Error extracting stock info: {str(e)}"
+# Whisper Transcription
+def transcribe_audio(file_path):
+    if not WHISPER_AVAILABLE:
+        return "❌ Whisper not available", ""
     try:
+        if WHISPER_TYPE == "openai-whisper":
+            model = whisper.load_model("tiny")
+            result = model.transcribe(file_path)
+            return result["text"], "✅ Transcription complete"
+        else:
+            pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
+            result = pipe(file_path)
+            return result["text"], "✅ Transcription complete"
     except Exception as e:
+        return "❌ Transcription failed", str(e)
+# Audio Downloader using yt-dlp
+def download_audio_youtube(url, cookies_file=None):
     try:
+        temp_dir = tempfile.mkdtemp()
+        out_path = os.path.join(temp_dir, "audio")
+        ydl_opts = {
+            'format': 'bestaudio[ext=m4a]/bestaudio/best',
+            'outtmpl': out_path + '.%(ext)s',
+            'quiet': True,
+            'noplaylist': True,
+            'cookiefile': cookies_file if cookies_file else None,
+            'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
+            'force_ipv4': True,
+        }
+        with YoutubeDL(ydl_opts) as ydl:
+            ydl.download([url])
+        for ext in ['.m4a', '.mp3', '.webm']:
+            full_path = out_path + ext
+            if os.path.exists(full_path):
+                return full_path, "✅ Audio downloaded"
+        return None, "❌ Audio file not found"
     except Exception as e:
+        return None, f"❌ Download error: {str(e)}"
+# Gradio UI
+def full_pipeline(url, cookies):
+    if not url:
+        return "❌ Enter a valid YouTube URL", "", ""
+    temp_cookie = cookies.name if cookies else None
+    audio_path, msg = download_audio_youtube(url, temp_cookie)
+    if not audio_path:
+        return msg, "", ""
+    transcript, tmsg = transcribe_audio(audio_path)
+    if "❌" in transcript:
+        return msg, transcript, tmsg
+    stock_data = extract_stock_info_simple(transcript)
+    return "✅ Complete", transcript, stock_data
+# Gradio App
+with gr.Blocks(title="📈 Stock Info Extractor from YouTube") as demo:
     gr.Markdown("""
+    # 📈 Extract Stock Mentions from YouTube
+    Upload a YouTube link + cookies.txt, and extract trading mentions using Whisper + AI
     """)
     with gr.Row():
+        url_input = gr.Textbox(label="YouTube URL")
+        cookies_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])
+    run_btn = gr.Button("🚀 Run Extraction")
+    status = gr.Textbox(label="Status")
+    transcript_box = gr.Textbox(label="Transcript", lines=10)
+    stock_box = gr.Textbox(label="Stock Info", lines=10)
+    run_btn.click(fn=full_pipeline, inputs=[url_input, cookies_input], outputs=[status, transcript_box, stock_box])
 if __name__ == "__main__":
+    demo.launch(debug=True)