developer28 commited on
Commit
8eb7fd0
Β·
verified Β·
1 Parent(s): f869bf3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -141
app.py CHANGED
@@ -1,181 +1,117 @@
1
- # βœ… Combined YouTube Analyzer with Stock Info Extractor (fixed download using working app logic)
 
2
 
3
  import gradio as gr
4
  import os
5
  import tempfile
6
- import shutil
7
- import re
8
- import torch
9
- import numpy as np
10
  from yt_dlp import YoutubeDL
11
 
12
- # Whisper setup
13
- WHISPER_AVAILABLE = False
14
- WHISPER_TYPE = None
15
- try:
16
- import whisper
17
- WHISPER_AVAILABLE = True
18
- WHISPER_TYPE = "openai-whisper"
19
- except ImportError:
20
- try:
21
- from transformers import pipeline
22
- WHISPER_AVAILABLE = True
23
- WHISPER_TYPE = "transformers"
24
- except ImportError:
25
- pass
26
-
27
- # Stock Info Extraction
28
 
29
- def extract_stock_info_simple(text):
30
  try:
31
- companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
32
- symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
33
- prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
34
- actions = re.findall(r'\b(?:buy|sell|hold|bullish|bearish|target|stop loss)\b', text, re.IGNORECASE)
35
-
36
- result = "=== EXTRACTED STOCK INFORMATION ===\n\n"
37
-
38
- if companies:
39
- result += f"\U0001F4CA Mentioned Companies: {', '.join(set(companies[:10]))}\n\n"
40
- if symbols:
41
- result += f"\U0001F524 Potential Stock Symbols: {', '.join(set(symbols[:10]))}\n\n"
42
- if prices:
43
- result += f"\U0001F4B0 Price Mentions: {', '.join(set(prices[:10]))}\n\n"
44
- if actions:
45
- result += f"\U0001F4C8 Trading Actions: {', '.join(set(actions[:10]))}\n\n"
46
-
47
- recommendations = []
48
- sentences = text.split('.')
49
- for sentence in sentences:
50
- if any(word in sentence.lower() for word in ['buy', 'sell', 'target']):
51
- if any(sym in sentence for sym in symbols[:5]):
52
- recommendations.append(sentence.strip())
53
-
54
- if recommendations:
55
- result += "\U0001F3AF Potential Recommendations:\n"
56
- for rec in recommendations[:5]:
57
- result += f"β€’ {rec}\n"
58
-
59
- if not any([companies, symbols, prices, actions]):
60
- result += "⚠️ No clear stock recommendations found.\n"
61
-
62
- return result
63
-
64
  except Exception as e:
65
- return f"Error extracting stock info: {str(e)}"
66
 
67
- # Whisper Transcription
68
 
69
- def transcribe_audio(file_path):
70
- if not WHISPER_AVAILABLE:
71
- return "❌ Whisper not available", ""
72
  try:
73
- if WHISPER_TYPE == "openai-whisper":
74
- model = whisper.load_model("tiny")
75
- result = model.transcribe(file_path)
76
- return result["text"], "βœ… Transcription complete"
77
- else:
78
- pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
79
- result = pipe(file_path)
80
- return result["text"], "βœ… Transcription complete"
81
- except Exception as e:
82
- return "❌ Transcription failed", str(e)
83
-
84
- # βœ… Reused working download logic from other app
85
-
86
- def download_audio_youtube(url, cookies_file=None):
87
- try:
88
- temp_dir = tempfile.mkdtemp()
89
- out_path = os.path.join(temp_dir, "audio")
90
-
91
  ydl_opts = {
92
- 'format': 'bestaudio[ext=m4a]/bestaudio/best',
93
- 'outtmpl': out_path + '.%(ext)s',
94
  'quiet': True,
 
95
  'noplaylist': True,
96
- 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
97
- 'referer': 'https://www.youtube.com/',
98
- 'force_ipv4': True,
99
- 'extractor_retries': 3,
100
- 'fragment_retries': 3,
101
- 'retry_sleep_functions': {'http': lambda n: 2 ** n},
102
  }
103
-
104
  if cookies_file and os.path.exists(cookies_file):
105
  ydl_opts['cookiefile'] = cookies_file
106
- else:
107
- print("⚠️ No cookies file provided")
108
-
109
- ydl_opts['http_headers'] = {
110
- 'User-Agent': ydl_opts['user_agent'],
111
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
112
- 'Accept-Language': 'en-US,en;q=0.5',
113
- 'Accept-Encoding': 'gzip, deflate',
114
- 'DNT': '1',
115
- 'Connection': 'keep-alive',
116
- 'Upgrade-Insecure-Requests': '1',
117
- 'Referer': 'https://www.youtube.com/',
118
- }
119
 
120
  with YoutubeDL(ydl_opts) as ydl:
121
- ydl.download([url])
122
-
123
- for ext in ['.m4a', '.mp3', '.webm']:
124
- full_path = out_path + ext
125
- if os.path.exists(full_path):
126
- return full_path, "βœ… Audio downloaded"
127
 
128
- return None, "❌ Audio file not found"
 
 
 
 
 
 
 
129
 
130
  except Exception as e:
131
- import traceback
132
- traceback.print_exc()
133
- return None, f"❌ Download error: {str(e)}"
 
 
 
 
134
 
135
- # Copy cookie to tmp
 
136
 
137
- def save_uploaded_cookie(cookies):
138
- if cookies is None:
139
- return None
140
- temp_cookie_path = tempfile.mktemp(suffix=".txt")
141
- shutil.copy2(cookies.name, temp_cookie_path)
142
- return temp_cookie_path
 
 
 
 
 
 
 
 
 
 
143
 
144
- # Gradio app logic
145
 
146
- def full_pipeline(url, cookies):
147
- if not url:
148
- return "❌ Enter a valid YouTube URL", "", ""
 
149
 
150
- temp_cookie = save_uploaded_cookie(cookies)
151
- audio_path, msg = download_audio_youtube(url, temp_cookie)
152
- if not audio_path:
153
- return msg, "", ""
 
 
154
 
155
- transcript, tmsg = transcribe_audio(audio_path)
156
- if "❌" in transcript:
157
- return msg, transcript, tmsg
158
 
159
- stock_data = extract_stock_info_simple(transcript)
160
- return "βœ… Complete", transcript, stock_data
161
 
162
- # Gradio App
163
- with gr.Blocks(title="πŸ“ˆ Stock Info Extractor from YouTube") as demo:
164
  gr.Markdown("""
165
- # πŸ“ˆ Extract Stock Mentions from YouTube
166
- Upload a YouTube link + cookies.txt, and extract trading mentions using Whisper + AI
 
167
  """)
168
 
169
  with gr.Row():
170
- url_input = gr.Textbox(label="YouTube URL")
171
- cookies_input = gr.File(label="cookies.txt (exported from YouTube tab)", file_types=[".txt"])
 
172
 
173
- run_btn = gr.Button("πŸš€ Run Extraction")
174
- status = gr.Textbox(label="Status")
175
- transcript_box = gr.Textbox(label="Transcript", lines=10)
176
- stock_box = gr.Textbox(label="Stock Info", lines=10)
177
 
178
- run_btn.click(fn=full_pipeline, inputs=[url_input, cookies_input], outputs=[status, transcript_box, stock_box])
179
 
180
  if __name__ == "__main__":
181
  demo.launch(debug=True)
 
1
+ # βœ… Gemini-Based Stock Recommendation Extractor (No Audio, No Whisper)
2
+ # Uses video metadata (title + description) + Gemini Flash to extract stock info
3
 
4
  import gradio as gr
5
  import os
6
  import tempfile
7
+ import json
8
+ import google.generativeai as genai
 
 
9
  from yt_dlp import YoutubeDL
10
 
11
+ # βœ… Gemini Configuration
12
+ GEMINI_MODEL = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ def configure_gemini(api_key):
15
  try:
16
+ genai.configure(api_key=api_key)
17
+ global GEMINI_MODEL
18
+ GEMINI_MODEL = genai.GenerativeModel("gemini-1.5-flash-latest")
19
+ return "βœ… Gemini API key configured successfully."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  except Exception as e:
21
+ return f"❌ Gemini configuration failed: {str(e)}"
22
 
23
+ # βœ… Extract video metadata only (no download)
24
 
25
+ def extract_metadata(url, cookies_file=None):
 
 
26
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  ydl_opts = {
 
 
28
  'quiet': True,
29
+ 'skip_download': True,
30
  'noplaylist': True,
 
 
 
 
 
 
31
  }
 
32
  if cookies_file and os.path.exists(cookies_file):
33
  ydl_opts['cookiefile'] = cookies_file
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  with YoutubeDL(ydl_opts) as ydl:
36
+ info = ydl.extract_info(url, download=False)
 
 
 
 
 
37
 
38
+ return {
39
+ 'title': info.get("title", ""),
40
+ 'description': info.get("description", ""),
41
+ 'duration': info.get("duration", 0),
42
+ 'uploader': info.get("uploader", ""),
43
+ 'view_count': info.get("view_count", 0),
44
+ 'upload_date': info.get("upload_date", "")
45
+ }, "βœ… Video metadata extracted"
46
 
47
  except Exception as e:
48
+ return None, f"❌ Metadata extraction failed: {str(e)}"
49
+
50
+ # βœ… Gemini Prompt for Stock Extraction
51
+
52
+ def query_gemini_stock_analysis(meta):
53
+ if GEMINI_MODEL is None:
54
+ return "❌ Gemini model is not initialized."
55
 
56
+ prompt = f"""
57
+ Analyze the following YouTube video metadata and extract any stock trading recommendations:
58
 
59
+ Title: {meta['title']}
60
+ Description: {meta['description']}
61
+
62
+ Please extract:
63
+ - Mentioned companies or stock symbols
64
+ - Any price targets, buy/sell/hold recommendations
65
+ - Bullish/bearish sentiments if expressed
66
+ - If no stock info is present, clearly say "No financial or trading recommendations found."
67
+ - Keep the output short and to the point
68
+ """
69
+
70
+ try:
71
+ response = GEMINI_MODEL.generate_content(prompt)
72
+ return response.text if response else "⚠️ No response from Gemini."
73
+ except Exception as e:
74
+ return f"❌ Gemini query failed: {str(e)}"
75
 
76
+ # βœ… Main Pipeline
77
 
78
+ def run_pipeline(api_key, url, cookies):
79
+ status = configure_gemini(api_key)
80
+ if not status.startswith("βœ…"):
81
+ return status, ""
82
 
83
+ # Save cookies if provided
84
+ cookie_path = None
85
+ if cookies:
86
+ cookie_path = tempfile.mktemp(suffix=".txt")
87
+ with open(cookie_path, "wb") as f:
88
+ f.write(cookies.read())
89
 
90
+ metadata, meta_status = extract_metadata(url, cookie_path)
91
+ if not metadata:
92
+ return meta_status, ""
93
 
94
+ result = query_gemini_stock_analysis(metadata)
95
+ return meta_status, result
96
 
97
+ # βœ… Gradio UI
98
+ with gr.Blocks(title="Gemini Stock Extractor") as demo:
99
  gr.Markdown("""
100
+ # πŸ“ˆ Gemini-Based Stock Recommendation Extractor
101
+ Paste a YouTube link and get stock-related insights using only the title + description.
102
+ No audio, no transcription required. Fast and simple.
103
  """)
104
 
105
  with gr.Row():
106
+ api_input = gr.Textbox(label="πŸ”‘ Gemini API Key", type="password")
107
+ url_input = gr.Textbox(label="YouTube Video URL")
108
+ cookies_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])
109
 
110
+ go_btn = gr.Button("πŸš€ Analyze")
111
+ status_box = gr.Textbox(label="Status", lines=1)
112
+ output_box = gr.Textbox(label="Extracted Stock Insights", lines=12)
 
113
 
114
+ go_btn.click(fn=run_pipeline, inputs=[api_input, url_input, cookies_input], outputs=[status_box, output_box])
115
 
116
  if __name__ == "__main__":
117
  demo.launch(debug=True)