Spaces:

developer28
/

Youtubedownloader

Sleeping

File size: 5,665 Bytes

bb84391
 
 
 
4fde749
a6e9713
c963386
bb84391
 
 
 
0921abd
bb84391
c963386
 
0921abd
 
 
c963386
bb84391
c963386
 
 
 
bb84391
c963386
0921abd
bb84391
c963386
9518e76
0921abd
 
9518e76
 
 
bb84391
 
9518e76
bb84391
3a866c5
bb84391
9518e76
bb84391
9518e76
bb84391
0921abd
bb84391
 
0921abd
9518e76
0921abd
bb84391
 
9518e76
bb84391
0921abd
bb84391
9518e76
 
bb84391
9518e76
bb84391
 
0921abd
bb84391
0921abd
9518e76
16e2e72
bb84391
0921abd
bb84391
 
 
0921abd
bb84391
 
 
 
 
 
 
 
0921abd
bb84391
3a866c5
bb84391
 
 
16e2e72
bb84391
 
7c660a9
bb84391
 
 
 
 
 
7c660a9
bb84391
7c660a9
 
 
 
 
 
 
 
 
 
bb84391
 
 
 
 
 
 
 
16e2e72
bb84391
 
 
 
 
 
 
7c660a9
 
 
bb84391
 
 
7c660a9
bb84391
 
 
7c660a9
bb84391
 
 
7c660a9
bb84391
 
0921abd
bb84391
 
0921abd
bb84391
0921abd
bb84391
 
 
 
 
 
 
 
 
0921abd
16e2e72
bb84391

# ✅ Combined YouTube Analyzer with Stock Info Extractor
# ⬇️ Based on your working app + whisper + stock extraction

import gradio as gr
import os
import tempfile
import shutil
import re
import torch
import numpy as np
from yt_dlp import YoutubeDL

# Whisper setup
WHISPER_AVAILABLE = False
WHISPER_TYPE = None
try:
    import whisper
    WHISPER_AVAILABLE = True
    WHISPER_TYPE = "openai-whisper"
except ImportError:
    try:
        from transformers import pipeline
        WHISPER_AVAILABLE = True
        WHISPER_TYPE = "transformers"
    except ImportError:
        pass

# Stock Info Extraction

def extract_stock_info_simple(text):
    try:
        stock_info = []
        companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
        symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
        prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
        actions = re.findall(r'\b(?:buy|sell|hold|bullish|bearish|target|stop loss)\b', text, re.IGNORECASE)

        result = "=== EXTRACTED STOCK INFORMATION ===\n\n"

        if companies:
            result += f"\U0001F4CA Mentioned Companies: {', '.join(set(companies[:10]))}\n\n"
        if symbols:
            result += f"\U0001F524 Potential Stock Symbols: {', '.join(set(symbols[:10]))}\n\n"
        if prices:
            result += f"\U0001F4B0 Price Mentions: {', '.join(set(prices[:10]))}\n\n"
        if actions:
            result += f"\U0001F4C8 Trading Actions: {', '.join(set(actions[:10]))}\n\n"

        recommendations = []
        sentences = text.split('.')
        for sentence in sentences:
            if any(word in sentence.lower() for word in ['buy', 'sell', 'target']):
                if any(sym in sentence for sym in symbols[:5]):
                    recommendations.append(sentence.strip())

        if recommendations:
            result += "\U0001F3AF Potential Recommendations:\n"
            for rec in recommendations[:5]:
                result += f"• {rec}\n"

        if not any([companies, symbols, prices, actions]):
            result += "⚠️ No clear stock recommendations found.\n"

        return result

    except Exception as e:
        return f"Error extracting stock info: {str(e)}"

# Whisper Transcription

def transcribe_audio(file_path):
    if not WHISPER_AVAILABLE:
        return "❌ Whisper not available", ""
    try:
        if WHISPER_TYPE == "openai-whisper":
            model = whisper.load_model("tiny")
            result = model.transcribe(file_path)
            return result["text"], "✅ Transcription complete"
        else:
            pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
            result = pipe(file_path)
            return result["text"], "✅ Transcription complete"
    except Exception as e:
        return "❌ Transcription failed", str(e)

# Audio Downloader using yt-dlp

def download_audio_youtube(url, cookies_file=None):
    try:
        temp_dir = tempfile.mkdtemp()
        out_path = os.path.join(temp_dir, "audio")
            ydl_opts = {
            'format': 'bestaudio[ext=m4a]/bestaudio/best',
            'outtmpl': out_path + '.%(ext)s',
            'quiet': True,
            'noplaylist': True,
            'cookiefile': cookies_file if cookies_file else None,
            'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
            'referer': 'https://www.youtube.com/',
            'force_ipv4': True,
            'http_headers': {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                'Accept-Language': 'en-US,en;q=0.5',
                'Accept-Encoding': 'gzip, deflate',
                'DNT': '1',
                'Connection': 'keep-alive',
                'Upgrade-Insecure-Requests': '1',
                'Referer': 'https://www.youtube.com/',
            },
        }
        with YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        for ext in ['.m4a', '.mp3', '.webm']:
            full_path = out_path + ext
            if os.path.exists(full_path):
                return full_path, "✅ Audio downloaded"
        return None, "❌ Audio file not found"
    except Exception as e:
        return None, f"❌ Download error: {str(e)}"

# Gradio UI

def full_pipeline(url, cookies):
    if not url:
        return "❌ Enter a valid YouTube URL", "", ""
    
    temp_cookie = save_uploaded_cookie(cookies)
    
    audio_path, msg = download_audio_youtube(url, temp_cookie)
    if not audio_path:
        return msg, "", ""
    
    transcript, tmsg = transcribe_audio(audio_path)
    if "❌" in transcript:
        return msg, transcript, tmsg
    
    stock_data = extract_stock_info_simple(transcript)
    return "✅ Complete", transcript, stock_data


# Gradio App
with gr.Blocks(title="📈 Stock Info Extractor from YouTube") as demo:
    gr.Markdown("""
    # 📈 Extract Stock Mentions from YouTube
    Upload a YouTube link + cookies.txt, and extract trading mentions using Whisper + AI
    """)

    with gr.Row():
        url_input = gr.Textbox(label="YouTube URL")
        cookies_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])

    run_btn = gr.Button("🚀 Run Extraction")
    status = gr.Textbox(label="Status")
    transcript_box = gr.Textbox(label="Transcript", lines=10)
    stock_box = gr.Textbox(label="Stock Info", lines=10)

    run_btn.click(fn=full_pipeline, inputs=[url_input, cookies_input], outputs=[status, transcript_box, stock_box])

if __name__ == "__main__":
    demo.launch(debug=True)