File size: 5,246 Bytes
91c1296
bb84391
4fde749
91c1296
a6e9713
91c1296
 
e79671d
bb84391
0921abd
91c1296
 
 
 
 
 
c963386
91c1296
16e2e72
91c1296
 
 
 
0921abd
f869bf3
91c1296
 
 
bb84391
91c1296
172f5c9
91c1296
 
 
 
 
 
 
 
f869bf3
 
bb84391
91c1296
172f5c9
91c1296
 
 
 
 
 
f869bf3
16e2e72
e79671d
91c1296
8eb7fd0
91c1296
bb84391
91c1296
 
 
 
 
 
 
 
 
 
8eb7fd0
91c1296
8eb7fd0
91c1296
8eb7fd0
91c1296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8eb7fd0
91c1296
 
 
 
 
 
 
 
 
 
 
f869bf3
91c1296
bb84391
91c1296
 
 
 
 
e79671d
91c1296
 
 
8eb7fd0
f869bf3
91c1296
 
 
 
 
 
 
 
 
0921abd
91c1296
 
 
0921abd
bb84391
0921abd
91c1296
 
bb84391
91c1296
 
 
bb84391
91c1296
0921abd
16e2e72
bb84391
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# βœ… Stock Recommendation Extractor from YouTube Audio (Working Pipeline)

import os
import gradio as gr
import tempfile
import shutil
import re
import traceback
from yt_dlp import YoutubeDL

# Optional: use OpenAI Whisper if available
try:
    import whisper
    WHISPER_AVAILABLE = True
except:
    WHISPER_AVAILABLE = False

# βœ… Download audio using working logic

def download_audio(url, cookies_path=None):
    try:
        temp_dir = tempfile.mkdtemp()
        output_path = os.path.join(temp_dir, "audio")

        ydl_opts = {
            'format': 'bestaudio[ext=m4a]/bestaudio/best',
            'outtmpl': output_path + '.%(ext)s',
            'quiet': True,
            'noplaylist': True,
            'cookiefile': cookies_path if cookies_path else None,
            'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
            'referer': 'https://www.youtube.com/',
            'force_ipv4': True,
            'http_headers': {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                'Accept-Language': 'en-US,en;q=0.5',
                'Referer': 'https://www.youtube.com/'
            },
        }

        with YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])

        for ext in [".m4a", ".webm", ".mp3"]:
            final_path = output_path + ext
            if os.path.exists(final_path):
                return final_path, "βœ… Audio downloaded successfully"

        return None, "❌ Audio file not found"

    except Exception as e:
        traceback.print_exc()
        return None, f"❌ Download error: {str(e)}"

# βœ… Transcribe audio using Whisper

def transcribe_audio(path):
    if not WHISPER_AVAILABLE:
        return "❌ Whisper not available. Please install openai-whisper."
    try:
        model = whisper.load_model("tiny")
        result = model.transcribe(path)
        return result["text"]
    except Exception as e:
        traceback.print_exc()
        return f"❌ Transcription failed: {str(e)}"

# βœ… Extract stock-related information from transcript

def extract_stock_info(text):
    try:
        companies = re.findall(r'\b[A-Z][a-z]+(?: [A-Z][a-z]+)*\b', text)
        symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
        prices = re.findall(r'\$\d+(?:\.\d{1,2})?', text)
        actions = re.findall(r'\b(buy|sell|hold|target|bullish|bearish|stop loss)\b', text, re.IGNORECASE)

        result = "=== STOCK RECOMMENDATION ANALYSIS ===\n\n"
        if companies:
            result += f"🏒 Companies Mentioned: {', '.join(set(companies[:10]))}\n"
        if symbols:
            result += f"πŸ”  Symbols: {', '.join(set(symbols[:10]))}\n"
        if prices:
            result += f"πŸ’² Prices: {', '.join(set(prices[:10]))}\n"
        if actions:
            result += f"πŸ“Š Actions: {', '.join(set(actions[:10]))}\n"

        # Highlight potential recommendations
        recommendations = []
        for line in text.split("."):
            if any(word in line.lower() for word in ['buy', 'sell', 'target', 'hold']):
                recommendations.append(line.strip())

        if recommendations:
            result += "\n🎯 Potential Recommendations:\n"
            for r in recommendations[:5]:
                result += f"β€’ {r}\n"

        if not any([companies, symbols, prices, actions]):
            result += "\n⚠️ No stock-related insights detected."

        return result

    except Exception as e:
        return f"❌ Stock info extraction failed: {str(e)}"

# βœ… Save uploaded cookies.txt

def save_cookies(file):
    if file is None:
        return None
    temp_path = tempfile.mktemp(suffix=".txt")
    with open(temp_path, "wb") as f:
        f.write(file.read())
    return temp_path

# βœ… Full pipeline

def run_pipeline(url, cookies_file):
    if not WHISPER_AVAILABLE:
        return "❌ Whisper is not installed. Run: pip install openai-whisper", ""
    if not url:
        return "❌ YouTube URL required", ""

    cookie_path = save_cookies(cookies_file)
    audio_path, status = download_audio(url, cookie_path)
    if not audio_path:
        return status, ""

    transcript = transcribe_audio(audio_path)
    if transcript.startswith("❌"):
        return transcript, ""

    stock_info = extract_stock_info(transcript)
    return "βœ… Complete", stock_info

# βœ… Gradio Interface
with gr.Blocks(title="Stock Insights from YouTube Audio") as demo:
    gr.Markdown("""
    # 🎧 Extract Stock Recommendations from YouTube Audio
    This app downloads the audio from a YouTube video, transcribes it with Whisper,
    and extracts stock trading recommendations, sentiments, and symbols.
    """)

    with gr.Row():
        url_input = gr.Textbox(label="πŸŽ₯ YouTube Video URL")
        cookie_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])

    run_btn = gr.Button("πŸš€ Extract Stock Info")
    status_output = gr.Textbox(label="Status")
    result_output = gr.Textbox(label="Stock Info", lines=12)

    run_btn.click(fn=run_pipeline, inputs=[url_input, cookie_input], outputs=[status_output, result_output])

if __name__ == "__main__":
    demo.launch(debug=True)