Spaces:

developer28
/

Youtubedownloader

Sleeping

App Files Files Community

developer28 commited on 12 days ago

Commit

91c1296

verified ·

1 Parent(s): 172f5c9

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -116

app.py CHANGED Viewed

@@ -1,155 +1,156 @@
-# ✅ Gemini-Based Stock Recommendation Extractor (Debug-Enhanced with Fallback)
-import gradio as gr
 import os
 import tempfile
-import json
 import traceback
-import google.generativeai as genai
 from yt_dlp import YoutubeDL
-# ✅ Gemini Configuration
-GEMINI_MODEL = None
-def configure_gemini(api_key):
-    try:
-        genai.configure(api_key=api_key)
-        global GEMINI_MODEL
-        GEMINI_MODEL = genai.GenerativeModel("gemini-1.5-flash-latest")
-        return "✅ Gemini API key configured successfully."
-    except Exception as e:
-        return f"❌ Gemini configuration failed: {str(e)}"
-# ✅ Robust metadata extraction with fallback
-def extract_metadata(url, cookies_file=None):
-    def run_yt_dlp(with_cookies):
         ydl_opts = {
-            'quiet': False,
-            'skip_download': True,
             'noplaylist': True,
-            'extract_flat': False,
-            'force_ipv4': True,
             'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
-            'referer': 'https://www.youtube.com/'
         }
-        if with_cookies and cookies_file and os.path.exists(cookies_file):
-            with open(cookies_file, "r", encoding="utf-8", errors="ignore") as f:
-                header = f.readline().strip()
-                if "# Netscape HTTP Cookie File" in header:
-                    ydl_opts['cookiefile'] = cookies_file
-                    print("✅ Using valid cookies file")
-                else:
-                    print("⚠️ Invalid cookies format. Skipping cookies.")
-        else:
-            print("🔓 Proceeding without cookies")
         with YoutubeDL(ydl_opts) as ydl:
-            return ydl.extract_info(url, download=False)
-    try:
-        # Try with cookies first
-        try:
-            info = run_yt_dlp(with_cookies=True)
-        except Exception as e:
-            print("⚠️ First attempt with cookies failed, retrying without cookies...", e)
-            info = run_yt_dlp(with_cookies=False)
-        print("✅ Metadata fetched successfully")
-        return {
-            'title': info.get("title", ""),
-            'description': info.get("description", ""),
-            'duration': info.get("duration", 0),
-            'uploader': info.get("uploader", ""),
-            'view_count': info.get("view_count", 0),
-            'upload_date': info.get("upload_date", "")
-        }, "✅ Video metadata extracted"
     except Exception as e:
         traceback.print_exc()
-        return None, f"❌ Metadata extraction failed: {str(e)}"
-# ✅ Gemini Prompt for Stock Extraction
-def query_gemini_stock_analysis(meta):
-    if GEMINI_MODEL is None:
-        return "❌ Gemini model is not initialized."
-    prompt = f"""
-    Analyze the following YouTube video metadata and extract any stock trading recommendations:
-    Title: {meta['title']}
-    Description: {meta['description']}
-    Please extract:
-    - Mentioned companies or stock symbols
-    - Any price targets, buy/sell/hold recommendations
-    - Bullish/bearish sentiments if expressed
-    - If no stock info is present, clearly say "No financial or trading recommendations found."
-    - Keep the output short and to the point
-    """
     try:
-        response = GEMINI_MODEL.generate_content(prompt)
-        if response and response.text:
-            return response.text.strip()
-        else:
-            return "⚠️ Gemini returned no content."
     except Exception as e:
-        traceback.print_exc()
-        return f"❌ Gemini query failed: {str(e)}"
-# ✅ Main Pipeline
-def run_pipeline(api_key, url, cookies):
-    print("🚀 Starting analysis...")
-    if not url or not api_key:
-        return "❌ Please provide both API key and YouTube URL", ""
-    status = configure_gemini(api_key)
-    if not status.startswith("✅"):
         return status, ""
-    # Save cookies if provided
-    cookie_path = None
-    if cookies:
-        try:
-            cookie_path = tempfile.mktemp(suffix=".txt")
-            with open(cookie_path, "wb") as f:
-                f.write(cookies.read())
-            print(f"✅ Saved uploaded cookies to: {cookie_path}")
-        except Exception as e:
-            print(f"❌ Failed to save cookies: {e}")
-    metadata, meta_status = extract_metadata(url, cookie_path)
-    if not metadata:
-        return meta_status, ""
-    print(f"📄 Title: {metadata['title']}")
-    print(f"📝 Description length: {len(metadata['description'])} characters")
-    result = query_gemini_stock_analysis(metadata)
-    return meta_status, result
-# ✅ Gradio UI
-with gr.Blocks(title="Gemini Stock Extractor (Debug Mode)") as demo:
     gr.Markdown("""
-    # 📈 Gemini-Based Stock Recommendation Extractor
-    Paste a YouTube link and get stock-related insights using only the title + description.
-    No audio, no transcription required. Fast and simple.
     """)
     with gr.Row():
-        api_input = gr.Textbox(label="🔑 Gemini API Key", type="password")
-        url_input = gr.Textbox(label="YouTube Video URL")
-        cookies_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])
-    go_btn = gr.Button("🚀 Analyze")
-    status_box = gr.Textbox(label="Status", lines=1)
-    output_box = gr.Textbox(label="Extracted Stock Insights", lines=12)
-    go_btn.click(fn=run_pipeline, inputs=[api_input, url_input, cookies_input], outputs=[status_box, output_box])
 if __name__ == "__main__":
     demo.launch(debug=True)

+# ✅ Stock Recommendation Extractor from YouTube Audio (Working Pipeline)
 import os
+import gradio as gr
 import tempfile
+import shutil
+import re
 import traceback
 from yt_dlp import YoutubeDL
+# Optional: use OpenAI Whisper if available
+try:
+    import whisper
+    WHISPER_AVAILABLE = True
+except:
+    WHISPER_AVAILABLE = False
+# ✅ Download audio using working logic
+def download_audio(url, cookies_path=None):
+    try:
+        temp_dir = tempfile.mkdtemp()
+        output_path = os.path.join(temp_dir, "audio")
         ydl_opts = {
+            'format': 'bestaudio[ext=m4a]/bestaudio/best',
+            'outtmpl': output_path + '.%(ext)s',
+            'quiet': True,
             'noplaylist': True,
+            'cookiefile': cookies_path if cookies_path else None,
             'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
+            'referer': 'https://www.youtube.com/',
+            'force_ipv4': True,
+            'http_headers': {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
+                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+                'Accept-Language': 'en-US,en;q=0.5',
+                'Referer': 'https://www.youtube.com/'
+            },
         }
         with YoutubeDL(ydl_opts) as ydl:
+            ydl.download([url])
+        for ext in [".m4a", ".webm", ".mp3"]:
+            final_path = output_path + ext
+            if os.path.exists(final_path):
+                return final_path, "✅ Audio downloaded successfully"
+        return None, "❌ Audio file not found"
     except Exception as e:
         traceback.print_exc()
+        return None, f"❌ Download error: {str(e)}"
+# ✅ Transcribe audio using Whisper
+def transcribe_audio(path):
+    if not WHISPER_AVAILABLE:
+        return "❌ Whisper not available. Please install openai-whisper."
+    try:
+        model = whisper.load_model("tiny")
+        result = model.transcribe(path)
+        return result["text"]
+    except Exception as e:
+        traceback.print_exc()
+        return f"❌ Transcription failed: {str(e)}"
+# ✅ Extract stock-related information from transcript
+def extract_stock_info(text):
     try:
+        companies = re.findall(r'\b[A-Z][a-z]+(?: [A-Z][a-z]+)*\b', text)
+        symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
+        prices = re.findall(r'\$\d+(?:\.\d{1,2})?', text)
+        actions = re.findall(r'\b(buy|sell|hold|target|bullish|bearish|stop loss)\b', text, re.IGNORECASE)
+        result = "=== STOCK RECOMMENDATION ANALYSIS ===\n\n"
+        if companies:
+            result += f"🏢 Companies Mentioned: {', '.join(set(companies[:10]))}\n"
+        if symbols:
+            result += f"🔠 Symbols: {', '.join(set(symbols[:10]))}\n"
+        if prices:
+            result += f"💲 Prices: {', '.join(set(prices[:10]))}\n"
+        if actions:
+            result += f"📊 Actions: {', '.join(set(actions[:10]))}\n"
+        # Highlight potential recommendations
+        recommendations = []
+        for line in text.split("."):
+            if any(word in line.lower() for word in ['buy', 'sell', 'target', 'hold']):
+                recommendations.append(line.strip())
+        if recommendations:
+            result += "\n🎯 Potential Recommendations:\n"
+            for r in recommendations[:5]:
+                result += f"• {r}\n"
+        if not any([companies, symbols, prices, actions]):
+            result += "\n⚠️ No stock-related insights detected."
+        return result
     except Exception as e:
+        return f"❌ Stock info extraction failed: {str(e)}"
+# ✅ Save uploaded cookies.txt
+def save_cookies(file):
+    if file is None:
+        return None
+    temp_path = tempfile.mktemp(suffix=".txt")
+    with open(temp_path, "wb") as f:
+        f.write(file.read())
+    return temp_path
+# ✅ Full pipeline
+def run_pipeline(url, cookies_file):
+    if not WHISPER_AVAILABLE:
+        return "❌ Whisper is not installed. Run: pip install openai-whisper", ""
+    if not url:
+        return "❌ YouTube URL required", ""
+    cookie_path = save_cookies(cookies_file)
+    audio_path, status = download_audio(url, cookie_path)
+    if not audio_path:
         return status, ""
+    transcript = transcribe_audio(audio_path)
+    if transcript.startswith("❌"):
+        return transcript, ""
+    stock_info = extract_stock_info(transcript)
+    return "✅ Complete", stock_info
+# ✅ Gradio Interface
+with gr.Blocks(title="Stock Insights from YouTube Audio") as demo:
     gr.Markdown("""
+    # 🎧 Extract Stock Recommendations from YouTube Audio
+    This app downloads the audio from a YouTube video, transcribes it with Whisper,
+    and extracts stock trading recommendations, sentiments, and symbols.
     """)
     with gr.Row():
+        url_input = gr.Textbox(label="🎥 YouTube Video URL")
+        cookie_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])
+    run_btn = gr.Button("🚀 Extract Stock Info")
+    status_output = gr.Textbox(label="Status")
+    result_output = gr.Textbox(label="Stock Info", lines=12)
+    run_btn.click(fn=run_pipeline, inputs=[url_input, cookie_input], outputs=[status_output, result_output])
 if __name__ == "__main__":
     demo.launch(debug=True)