File size: 7,881 Bytes
4fde749
91c1296
a6e9713
91c1296
 
e79671d
bb84391
0921abd
91c1296
 
 
 
 
 
c963386
168cef5
91c1296
 
 
 
0921abd
f869bf3
91c1296
 
 
bb84391
91c1296
168cef5
91c1296
 
f869bf3
 
bb84391
91c1296
172f5c9
91c1296
 
 
 
 
 
f869bf3
16e2e72
e79671d
91c1296
8eb7fd0
168cef5
91c1296
 
 
 
5a12060
91c1296
 
 
 
 
8eb7fd0
168cef5
91c1296
8eb7fd0
91c1296
 
 
168cef5
91c1296
 
 
 
 
 
 
 
 
 
 
 
 
168cef5
91c1296
 
 
 
 
 
 
 
 
 
 
 
8eb7fd0
91c1296
 
168cef5
91c1296
 
 
633443e
91c1296
633443e
3d3b497
633443e
 
3d3b497
633443e
 
 
 
 
 
5a12060
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168cef5
 
3a135c5
 
168cef5
3e5d7ea
 
536d676
3e5d7ea
 
 
 
f869bf3
5a12060
 
 
 
 
3a135c5
 
91c1296
3a135c5
168cef5
 
 
 
3a135c5
 
 
 
 
91c1296
5a12060
168cef5
 
3d3b497
 
168cef5
3d3b497
 
 
5a12060
3e5d7ea
 
 
 
 
 
3d3b497
5a12060
 
 
 
 
3d3b497
 
 
 
168cef5
 
 
 
bb84391
3d3b497
 
 
 
bb84391
5a12060
168cef5
3d3b497
 
168cef5
 
3d3b497
bb84391
3d3b497
 
168cef5
3d3b497
168cef5
3d3b497
 
168cef5
 
3d3b497
 
 
168cef5
3d3b497
 
168cef5
 
0921abd
16e2e72
bb84391
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import os
import gradio as gr
import tempfile
import shutil
import re
import traceback
from yt_dlp import YoutubeDL

# Optional: use OpenAI Whisper if available
try:
    import whisper
    WHISPER_AVAILABLE = True
except:
    WHISPER_AVAILABLE = False

# Download audio from YouTube
def download_audio(url, cookies_path=None):
    try:
        temp_dir = tempfile.mkdtemp()
        output_path = os.path.join(temp_dir, "audio")

        ydl_opts = {
            'format': 'bestaudio[ext=m4a]/bestaudio/best',
            'outtmpl': output_path + '.%(ext)s',
            'quiet': True,
            'noplaylist': True,
            'cookiefile': cookies_path if cookies_path else None,
            'user_agent': 'Mozilla/5.0',
            'referer': 'https://www.youtube.com/',
            'force_ipv4': True,
        }

        with YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])

        for ext in [".m4a", ".webm", ".mp3"]:
            final_path = output_path + ext
            if os.path.exists(final_path):
                return final_path, "βœ… Audio downloaded successfully"

        return None, "❌ Audio file not found"

    except Exception as e:
        traceback.print_exc()
        return None, f"❌ Download error: {str(e)}"

# Transcribe using Whisper
def transcribe_audio(path):
    if not WHISPER_AVAILABLE:
        return "❌ Whisper not available. Please install openai-whisper."
    try:
        model = whisper.load_model("tiny.en")
        result = model.transcribe(path)
        return result["text"]
    except Exception as e:
        traceback.print_exc()
        return f"❌ Transcription failed: {str(e)}"

# Extract stock insights
def extract_stock_info(text):
    try:
        companies = re.findall(r'\b[A-Z][a-z]+(?: [A-Z][a-z]+)*\b', text)
        symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
        prices = re.findall(r'\$\d+(?:\.\d{1,2})?', text)
        actions = re.findall(r'\b(buy|sell|hold|target|bullish|bearish|stop loss|accumulate|short|take profit|entry|exit)\b', text, re.IGNORECASE)

        result = "=== STOCK RECOMMENDATION ANALYSIS ===\n\n"
        if companies:
            result += f"🏒 Companies Mentioned: {', '.join(set(companies[:10]))}\n"
        if symbols:
            result += f"πŸ”  Symbols: {', '.join(set(symbols[:10]))}\n"
        if prices:
            result += f"πŸ’² Prices: {', '.join(set(prices[:10]))}\n"
        if actions:
            result += f"πŸ“Š Actions: {', '.join(set(actions[:10]))}\n"

        recommendations = []
        for line in text.split("."):
            if any(word in line.lower() for word in ['buy', 'sell', 'target', 'hold', 'accumulate', 'short', 'entry', 'exit']):
                recommendations.append(line.strip())

        if recommendations:
            result += "\n🎯 Potential Recommendations:\n"
            for r in recommendations[:5]:
                result += f"β€’ {r}\n"

        if not any([companies, symbols, prices, actions]):
            result += "\n⚠️ No stock-related insights detected."

        return result

    except Exception as e:
        return f"❌ Stock info extraction failed: {str(e)}"

# Save cookies
def save_cookies(file):
    if file is None:
        return None

    temp_path = tempfile.mktemp(suffix=".txt")
    try:
        if hasattr(file, "read"):
            with open(temp_path, "wb") as f:
                f.write(file.read())
        else:
            shutil.copy(file, temp_path)
        return temp_path
    except Exception as e:
        print(f"❌ Failed to handle cookies.txt: {e}")
        return None


# βœ… Trim audio to shorter length (2 minutes) for CPU speed
import subprocess

def trim_audio(input_path, output_path, duration_sec=120):
    try:
        command = [
            "ffmpeg", "-y", "-i", input_path,
            "-t", str(duration_sec),  # duration in seconds
            "-c", "copy", output_path
        ]
        subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        return output_path
    except Exception as e:
        print("❌ Error trimming audio:", e)
        return input_path


# YouTube flow
def run_pipeline(url, cookies_file, show_transcript):
    try:
        if not WHISPER_AVAILABLE:
            return "❌ Whisper not installed", ""
        if not url:
            return "❌ YouTube URL required", ""

        cookie_path = save_cookies(cookies_file)
        audio_path, status = download_audio(url, cookie_path)
        if not audio_path:
            return status, ""

        # ⏱ Trim audio to 2 minutes before transcription
        trimmed_path = tempfile.mktemp(suffix=".mp3")
        trim_audio(audio_path, trimmed_path)

        transcript = transcribe_audio(trimmed_path)
        if transcript.startswith("❌"):
            return transcript, ""

        stock_info = extract_stock_info(transcript)
        if show_transcript:
            return "βœ… Complete", f"πŸ“œ Transcript:\n\n{transcript}\n\n\n{stock_info}"
        else:
            return "βœ… Complete", stock_info

    except Exception as e:
        tb = traceback.format_exc()
        print(tb)
        return f"❌ Unhandled Error:\n{tb}", ""


# Audio upload flow
def run_pipeline_audio(audio_file, show_transcript):
    try:
        if not WHISPER_AVAILABLE:
            return "❌ Whisper not installed", ""
        if audio_file is None:
            return "❌ No audio file uploaded", ""

        # Save uploaded file
        temp_audio_path = tempfile.mktemp(suffix=os.path.splitext(str(audio_file))[-1])
        if hasattr(audio_file, "read"):
            with open(temp_audio_path, "wb") as f:
                f.write(audio_file.read())
        else:
            shutil.copy(str(audio_file), temp_audio_path)

        # ⏱ Trim audio to 2 minutes
        trimmed_path = tempfile.mktemp(suffix=".mp3")
        trim_audio(temp_audio_path, trimmed_path)

        transcript = transcribe_audio(trimmed_path)
        if transcript.startswith("❌"):
            return transcript, ""

        stock_info = extract_stock_info(transcript)
        if show_transcript:
            return "βœ… Complete", f"πŸ“œ Transcript:\n\n{transcript}\n\n\n{stock_info}"
        else:
            return "βœ… Complete", stock_info

    except Exception as e:
        tb = traceback.format_exc()
        print(tb)
        return f"❌ Unhandled Error:\n{tb}", ""


# Gradio UI
with gr.Blocks(title="Stock Insights from YouTube or Audio") as demo:
    gr.Markdown("""
    # πŸ“ˆ Extract Stock Recommendations from YouTube or Uploaded Audio
    Upload a YouTube video or audio file. We'll transcribe it using Whisper and extract stock insights.
    """)

    with gr.Tab("πŸ“Ί From YouTube Video"):
        with gr.Row():
            url_input = gr.Textbox(label="πŸŽ₯ YouTube URL")
            cookie_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])
        show_transcript_yt = gr.Checkbox(label="Show Transcript", value=False)
        yt_run_btn = gr.Button("πŸš€ Extract from YouTube")
        yt_status = gr.Textbox(label="Status")
        yt_result = gr.Textbox(label="Transcript & Stock Info", lines=15)
        yt_run_btn.click(fn=run_pipeline, inputs=[url_input, cookie_input, show_transcript_yt], outputs=[yt_status, yt_result])

    with gr.Tab("🎡 From Uploaded Audio"):
        audio_input = gr.File(label="Upload Audio File", file_types=[".mp3", ".wav", ".m4a", ".webm"])
        show_transcript_audio = gr.Checkbox(label="Show Transcript", value=False)
        audio_run_btn = gr.Button("πŸš€ Extract from Audio")
        audio_status = gr.Textbox(label="Status")
        audio_result = gr.Textbox(label="Transcript & Stock Info", lines=15)
        audio_run_btn.click(fn=run_pipeline_audio, inputs=[audio_input, show_transcript_audio], outputs=[audio_status, audio_result])

if __name__ == "__main__":
    demo.launch(debug=True)