Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,155 +1,156 @@
|
|
1 |
-
# β
|
2 |
|
3 |
-
import gradio as gr
|
4 |
import os
|
|
|
5 |
import tempfile
|
6 |
-
import
|
|
|
7 |
import traceback
|
8 |
-
import google.generativeai as genai
|
9 |
from yt_dlp import YoutubeDL
|
10 |
|
11 |
-
#
|
12 |
-
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
|
15 |
-
try:
|
16 |
-
genai.configure(api_key=api_key)
|
17 |
-
global GEMINI_MODEL
|
18 |
-
GEMINI_MODEL = genai.GenerativeModel("gemini-1.5-flash-latest")
|
19 |
-
return "β
Gemini API key configured successfully."
|
20 |
-
except Exception as e:
|
21 |
-
return f"β Gemini configuration failed: {str(e)}"
|
22 |
|
23 |
-
|
|
|
|
|
|
|
24 |
|
25 |
-
def extract_metadata(url, cookies_file=None):
|
26 |
-
def run_yt_dlp(with_cookies):
|
27 |
ydl_opts = {
|
28 |
-
'
|
29 |
-
'
|
|
|
30 |
'noplaylist': True,
|
31 |
-
'
|
32 |
-
'force_ipv4': True,
|
33 |
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
|
34 |
-
'referer': 'https://www.youtube.com/'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
}
|
36 |
-
if with_cookies and cookies_file and os.path.exists(cookies_file):
|
37 |
-
with open(cookies_file, "r", encoding="utf-8", errors="ignore") as f:
|
38 |
-
header = f.readline().strip()
|
39 |
-
if "# Netscape HTTP Cookie File" in header:
|
40 |
-
ydl_opts['cookiefile'] = cookies_file
|
41 |
-
print("β
Using valid cookies file")
|
42 |
-
else:
|
43 |
-
print("β οΈ Invalid cookies format. Skipping cookies.")
|
44 |
-
else:
|
45 |
-
print("π Proceeding without cookies")
|
46 |
|
47 |
with YoutubeDL(ydl_opts) as ydl:
|
48 |
-
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
info = run_yt_dlp(with_cookies=False)
|
57 |
-
|
58 |
-
print("β
Metadata fetched successfully")
|
59 |
-
|
60 |
-
return {
|
61 |
-
'title': info.get("title", ""),
|
62 |
-
'description': info.get("description", ""),
|
63 |
-
'duration': info.get("duration", 0),
|
64 |
-
'uploader': info.get("uploader", ""),
|
65 |
-
'view_count': info.get("view_count", 0),
|
66 |
-
'upload_date': info.get("upload_date", "")
|
67 |
-
}, "β
Video metadata extracted"
|
68 |
|
69 |
except Exception as e:
|
70 |
traceback.print_exc()
|
71 |
-
return None, f"β
|
72 |
-
|
73 |
-
# β
Gemini Prompt for Stock Extraction
|
74 |
|
75 |
-
|
76 |
-
if GEMINI_MODEL is None:
|
77 |
-
return "β Gemini model is not initialized."
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
-
|
86 |
-
- Mentioned companies or stock symbols
|
87 |
-
- Any price targets, buy/sell/hold recommendations
|
88 |
-
- Bullish/bearish sentiments if expressed
|
89 |
-
- If no stock info is present, clearly say "No financial or trading recommendations found."
|
90 |
-
- Keep the output short and to the point
|
91 |
-
"""
|
92 |
|
|
|
93 |
try:
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
except Exception as e:
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
-
# β
|
104 |
|
105 |
-
def run_pipeline(
|
106 |
-
|
107 |
-
|
108 |
-
|
|
|
109 |
|
110 |
-
|
111 |
-
|
|
|
112 |
return status, ""
|
113 |
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
print(f"β Failed to save cookies: {e}")
|
124 |
-
|
125 |
-
metadata, meta_status = extract_metadata(url, cookie_path)
|
126 |
-
if not metadata:
|
127 |
-
return meta_status, ""
|
128 |
-
|
129 |
-
print(f"π Title: {metadata['title']}")
|
130 |
-
print(f"π Description length: {len(metadata['description'])} characters")
|
131 |
-
|
132 |
-
result = query_gemini_stock_analysis(metadata)
|
133 |
-
return meta_status, result
|
134 |
-
|
135 |
-
# β
Gradio UI
|
136 |
-
with gr.Blocks(title="Gemini Stock Extractor (Debug Mode)") as demo:
|
137 |
gr.Markdown("""
|
138 |
-
#
|
139 |
-
|
140 |
-
|
141 |
""")
|
142 |
|
143 |
with gr.Row():
|
144 |
-
|
145 |
-
|
146 |
-
cookies_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])
|
147 |
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
|
152 |
-
|
153 |
|
154 |
if __name__ == "__main__":
|
155 |
demo.launch(debug=True)
|
|
|
1 |
+
# β
Stock Recommendation Extractor from YouTube Audio (Working Pipeline)
|
2 |
|
|
|
3 |
import os
|
4 |
+
import gradio as gr
|
5 |
import tempfile
|
6 |
+
import shutil
|
7 |
+
import re
|
8 |
import traceback
|
|
|
9 |
from yt_dlp import YoutubeDL
|
10 |
|
11 |
+
# Optional: use OpenAI Whisper if available
|
12 |
+
try:
|
13 |
+
import whisper
|
14 |
+
WHISPER_AVAILABLE = True
|
15 |
+
except:
|
16 |
+
WHISPER_AVAILABLE = False
|
17 |
|
18 |
+
# β
Download audio using working logic
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
+
def download_audio(url, cookies_path=None):
|
21 |
+
try:
|
22 |
+
temp_dir = tempfile.mkdtemp()
|
23 |
+
output_path = os.path.join(temp_dir, "audio")
|
24 |
|
|
|
|
|
25 |
ydl_opts = {
|
26 |
+
'format': 'bestaudio[ext=m4a]/bestaudio/best',
|
27 |
+
'outtmpl': output_path + '.%(ext)s',
|
28 |
+
'quiet': True,
|
29 |
'noplaylist': True,
|
30 |
+
'cookiefile': cookies_path if cookies_path else None,
|
|
|
31 |
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
|
32 |
+
'referer': 'https://www.youtube.com/',
|
33 |
+
'force_ipv4': True,
|
34 |
+
'http_headers': {
|
35 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
|
36 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
37 |
+
'Accept-Language': 'en-US,en;q=0.5',
|
38 |
+
'Referer': 'https://www.youtube.com/'
|
39 |
+
},
|
40 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
with YoutubeDL(ydl_opts) as ydl:
|
43 |
+
ydl.download([url])
|
44 |
|
45 |
+
for ext in [".m4a", ".webm", ".mp3"]:
|
46 |
+
final_path = output_path + ext
|
47 |
+
if os.path.exists(final_path):
|
48 |
+
return final_path, "β
Audio downloaded successfully"
|
49 |
+
|
50 |
+
return None, "β Audio file not found"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
except Exception as e:
|
53 |
traceback.print_exc()
|
54 |
+
return None, f"β Download error: {str(e)}"
|
|
|
|
|
55 |
|
56 |
+
# β
Transcribe audio using Whisper
|
|
|
|
|
57 |
|
58 |
+
def transcribe_audio(path):
|
59 |
+
if not WHISPER_AVAILABLE:
|
60 |
+
return "β Whisper not available. Please install openai-whisper."
|
61 |
+
try:
|
62 |
+
model = whisper.load_model("tiny")
|
63 |
+
result = model.transcribe(path)
|
64 |
+
return result["text"]
|
65 |
+
except Exception as e:
|
66 |
+
traceback.print_exc()
|
67 |
+
return f"β Transcription failed: {str(e)}"
|
68 |
|
69 |
+
# β
Extract stock-related information from transcript
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
+
def extract_stock_info(text):
|
72 |
try:
|
73 |
+
companies = re.findall(r'\b[A-Z][a-z]+(?: [A-Z][a-z]+)*\b', text)
|
74 |
+
symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
|
75 |
+
prices = re.findall(r'\$\d+(?:\.\d{1,2})?', text)
|
76 |
+
actions = re.findall(r'\b(buy|sell|hold|target|bullish|bearish|stop loss)\b', text, re.IGNORECASE)
|
77 |
+
|
78 |
+
result = "=== STOCK RECOMMENDATION ANALYSIS ===\n\n"
|
79 |
+
if companies:
|
80 |
+
result += f"π’ Companies Mentioned: {', '.join(set(companies[:10]))}\n"
|
81 |
+
if symbols:
|
82 |
+
result += f"π Symbols: {', '.join(set(symbols[:10]))}\n"
|
83 |
+
if prices:
|
84 |
+
result += f"π² Prices: {', '.join(set(prices[:10]))}\n"
|
85 |
+
if actions:
|
86 |
+
result += f"π Actions: {', '.join(set(actions[:10]))}\n"
|
87 |
+
|
88 |
+
# Highlight potential recommendations
|
89 |
+
recommendations = []
|
90 |
+
for line in text.split("."):
|
91 |
+
if any(word in line.lower() for word in ['buy', 'sell', 'target', 'hold']):
|
92 |
+
recommendations.append(line.strip())
|
93 |
+
|
94 |
+
if recommendations:
|
95 |
+
result += "\nπ― Potential Recommendations:\n"
|
96 |
+
for r in recommendations[:5]:
|
97 |
+
result += f"β’ {r}\n"
|
98 |
+
|
99 |
+
if not any([companies, symbols, prices, actions]):
|
100 |
+
result += "\nβ οΈ No stock-related insights detected."
|
101 |
+
|
102 |
+
return result
|
103 |
+
|
104 |
except Exception as e:
|
105 |
+
return f"β Stock info extraction failed: {str(e)}"
|
106 |
+
|
107 |
+
# β
Save uploaded cookies.txt
|
108 |
+
|
109 |
+
def save_cookies(file):
|
110 |
+
if file is None:
|
111 |
+
return None
|
112 |
+
temp_path = tempfile.mktemp(suffix=".txt")
|
113 |
+
with open(temp_path, "wb") as f:
|
114 |
+
f.write(file.read())
|
115 |
+
return temp_path
|
116 |
|
117 |
+
# β
Full pipeline
|
118 |
|
119 |
+
def run_pipeline(url, cookies_file):
|
120 |
+
if not WHISPER_AVAILABLE:
|
121 |
+
return "β Whisper is not installed. Run: pip install openai-whisper", ""
|
122 |
+
if not url:
|
123 |
+
return "β YouTube URL required", ""
|
124 |
|
125 |
+
cookie_path = save_cookies(cookies_file)
|
126 |
+
audio_path, status = download_audio(url, cookie_path)
|
127 |
+
if not audio_path:
|
128 |
return status, ""
|
129 |
|
130 |
+
transcript = transcribe_audio(audio_path)
|
131 |
+
if transcript.startswith("β"):
|
132 |
+
return transcript, ""
|
133 |
+
|
134 |
+
stock_info = extract_stock_info(transcript)
|
135 |
+
return "β
Complete", stock_info
|
136 |
+
|
137 |
+
# β
Gradio Interface
|
138 |
+
with gr.Blocks(title="Stock Insights from YouTube Audio") as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
gr.Markdown("""
|
140 |
+
# π§ Extract Stock Recommendations from YouTube Audio
|
141 |
+
This app downloads the audio from a YouTube video, transcribes it with Whisper,
|
142 |
+
and extracts stock trading recommendations, sentiments, and symbols.
|
143 |
""")
|
144 |
|
145 |
with gr.Row():
|
146 |
+
url_input = gr.Textbox(label="π₯ YouTube Video URL")
|
147 |
+
cookie_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])
|
|
|
148 |
|
149 |
+
run_btn = gr.Button("π Extract Stock Info")
|
150 |
+
status_output = gr.Textbox(label="Status")
|
151 |
+
result_output = gr.Textbox(label="Stock Info", lines=12)
|
152 |
|
153 |
+
run_btn.click(fn=run_pipeline, inputs=[url_input, cookie_input], outputs=[status_output, result_output])
|
154 |
|
155 |
if __name__ == "__main__":
|
156 |
demo.launch(debug=True)
|