File size: 5,665 Bytes
bb84391
 
 
 
4fde749
a6e9713
c963386
bb84391
 
 
 
0921abd
bb84391
c963386
 
0921abd
 
 
c963386
bb84391
c963386
 
 
 
bb84391
c963386
0921abd
bb84391
c963386
9518e76
0921abd
 
9518e76
 
 
bb84391
 
9518e76
bb84391
3a866c5
bb84391
9518e76
bb84391
9518e76
bb84391
0921abd
bb84391
 
0921abd
9518e76
0921abd
bb84391
 
9518e76
bb84391
0921abd
bb84391
9518e76
 
bb84391
9518e76
bb84391
 
0921abd
bb84391
0921abd
9518e76
16e2e72
bb84391
0921abd
bb84391
 
 
0921abd
bb84391
 
 
 
 
 
 
 
0921abd
bb84391
3a866c5
bb84391
 
 
16e2e72
bb84391
 
7c660a9
bb84391
 
 
 
 
 
7c660a9
bb84391
7c660a9
 
 
 
 
 
 
 
 
 
bb84391
 
 
 
 
 
 
 
16e2e72
bb84391
 
 
 
 
 
 
7c660a9
 
 
bb84391
 
 
7c660a9
bb84391
 
 
7c660a9
bb84391
 
 
7c660a9
bb84391
 
0921abd
bb84391
 
0921abd
bb84391
0921abd
bb84391
 
 
 
 
 
 
 
 
0921abd
16e2e72
bb84391
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# βœ… Combined YouTube Analyzer with Stock Info Extractor
# ⬇️ Based on your working app + whisper + stock extraction

import gradio as gr
import os
import tempfile
import shutil
import re
import torch
import numpy as np
from yt_dlp import YoutubeDL

# Whisper setup
WHISPER_AVAILABLE = False
WHISPER_TYPE = None
try:
    import whisper
    WHISPER_AVAILABLE = True
    WHISPER_TYPE = "openai-whisper"
except ImportError:
    try:
        from transformers import pipeline
        WHISPER_AVAILABLE = True
        WHISPER_TYPE = "transformers"
    except ImportError:
        pass

# Stock Info Extraction

def extract_stock_info_simple(text):
    try:
        stock_info = []
        companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
        symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
        prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
        actions = re.findall(r'\b(?:buy|sell|hold|bullish|bearish|target|stop loss)\b', text, re.IGNORECASE)

        result = "=== EXTRACTED STOCK INFORMATION ===\n\n"

        if companies:
            result += f"\U0001F4CA Mentioned Companies: {', '.join(set(companies[:10]))}\n\n"
        if symbols:
            result += f"\U0001F524 Potential Stock Symbols: {', '.join(set(symbols[:10]))}\n\n"
        if prices:
            result += f"\U0001F4B0 Price Mentions: {', '.join(set(prices[:10]))}\n\n"
        if actions:
            result += f"\U0001F4C8 Trading Actions: {', '.join(set(actions[:10]))}\n\n"

        recommendations = []
        sentences = text.split('.')
        for sentence in sentences:
            if any(word in sentence.lower() for word in ['buy', 'sell', 'target']):
                if any(sym in sentence for sym in symbols[:5]):
                    recommendations.append(sentence.strip())

        if recommendations:
            result += "\U0001F3AF Potential Recommendations:\n"
            for rec in recommendations[:5]:
                result += f"β€’ {rec}\n"

        if not any([companies, symbols, prices, actions]):
            result += "⚠️ No clear stock recommendations found.\n"

        return result

    except Exception as e:
        return f"Error extracting stock info: {str(e)}"

# Whisper Transcription

def transcribe_audio(file_path):
    if not WHISPER_AVAILABLE:
        return "❌ Whisper not available", ""
    try:
        if WHISPER_TYPE == "openai-whisper":
            model = whisper.load_model("tiny")
            result = model.transcribe(file_path)
            return result["text"], "βœ… Transcription complete"
        else:
            pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
            result = pipe(file_path)
            return result["text"], "βœ… Transcription complete"
    except Exception as e:
        return "❌ Transcription failed", str(e)

# Audio Downloader using yt-dlp

def download_audio_youtube(url, cookies_file=None):
    try:
        temp_dir = tempfile.mkdtemp()
        out_path = os.path.join(temp_dir, "audio")
            ydl_opts = {
            'format': 'bestaudio[ext=m4a]/bestaudio/best',
            'outtmpl': out_path + '.%(ext)s',
            'quiet': True,
            'noplaylist': True,
            'cookiefile': cookies_file if cookies_file else None,
            'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
            'referer': 'https://www.youtube.com/',
            'force_ipv4': True,
            'http_headers': {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                'Accept-Language': 'en-US,en;q=0.5',
                'Accept-Encoding': 'gzip, deflate',
                'DNT': '1',
                'Connection': 'keep-alive',
                'Upgrade-Insecure-Requests': '1',
                'Referer': 'https://www.youtube.com/',
            },
        }
        with YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        for ext in ['.m4a', '.mp3', '.webm']:
            full_path = out_path + ext
            if os.path.exists(full_path):
                return full_path, "βœ… Audio downloaded"
        return None, "❌ Audio file not found"
    except Exception as e:
        return None, f"❌ Download error: {str(e)}"

# Gradio UI

def full_pipeline(url, cookies):
    if not url:
        return "❌ Enter a valid YouTube URL", "", ""
    
    temp_cookie = save_uploaded_cookie(cookies)
    
    audio_path, msg = download_audio_youtube(url, temp_cookie)
    if not audio_path:
        return msg, "", ""
    
    transcript, tmsg = transcribe_audio(audio_path)
    if "❌" in transcript:
        return msg, transcript, tmsg
    
    stock_data = extract_stock_info_simple(transcript)
    return "βœ… Complete", transcript, stock_data


# Gradio App
with gr.Blocks(title="πŸ“ˆ Stock Info Extractor from YouTube") as demo:
    gr.Markdown("""
    # πŸ“ˆ Extract Stock Mentions from YouTube
    Upload a YouTube link + cookies.txt, and extract trading mentions using Whisper + AI
    """)

    with gr.Row():
        url_input = gr.Textbox(label="YouTube URL")
        cookies_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])

    run_btn = gr.Button("πŸš€ Run Extraction")
    status = gr.Textbox(label="Status")
    transcript_box = gr.Textbox(label="Transcript", lines=10)
    stock_box = gr.Textbox(label="Stock Info", lines=10)

    run_btn.click(fn=full_pipeline, inputs=[url_input, cookies_input], outputs=[status, transcript_box, stock_box])

if __name__ == "__main__":
    demo.launch(debug=True)