Spaces:
Sleeping
Sleeping
# β Combined YouTube Analyzer with Stock Info Extractor | |
# β¬οΈ Based on your working app + whisper + stock extraction | |
import gradio as gr | |
import os | |
import tempfile | |
import shutil | |
import re | |
import torch | |
import numpy as np | |
from yt_dlp import YoutubeDL | |
# Whisper setup | |
WHISPER_AVAILABLE = False | |
WHISPER_TYPE = None | |
try: | |
import whisper | |
WHISPER_AVAILABLE = True | |
WHISPER_TYPE = "openai-whisper" | |
except ImportError: | |
try: | |
from transformers import pipeline | |
WHISPER_AVAILABLE = True | |
WHISPER_TYPE = "transformers" | |
except ImportError: | |
pass | |
# Stock Info Extraction | |
def extract_stock_info_simple(text): | |
try: | |
stock_info = [] | |
companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text) | |
symbols = re.findall(r'\b[A-Z]{2,5}\b', text) | |
prices = re.findall(r'\$\d+(?:\.\d{2})?', text) | |
actions = re.findall(r'\b(?:buy|sell|hold|bullish|bearish|target|stop loss)\b', text, re.IGNORECASE) | |
result = "=== EXTRACTED STOCK INFORMATION ===\n\n" | |
if companies: | |
result += f"\U0001F4CA Mentioned Companies: {', '.join(set(companies[:10]))}\n\n" | |
if symbols: | |
result += f"\U0001F524 Potential Stock Symbols: {', '.join(set(symbols[:10]))}\n\n" | |
if prices: | |
result += f"\U0001F4B0 Price Mentions: {', '.join(set(prices[:10]))}\n\n" | |
if actions: | |
result += f"\U0001F4C8 Trading Actions: {', '.join(set(actions[:10]))}\n\n" | |
recommendations = [] | |
sentences = text.split('.') | |
for sentence in sentences: | |
if any(word in sentence.lower() for word in ['buy', 'sell', 'target']): | |
if any(sym in sentence for sym in symbols[:5]): | |
recommendations.append(sentence.strip()) | |
if recommendations: | |
result += "\U0001F3AF Potential Recommendations:\n" | |
for rec in recommendations[:5]: | |
result += f"β’ {rec}\n" | |
if not any([companies, symbols, prices, actions]): | |
result += "β οΈ No clear stock recommendations found.\n" | |
return result | |
except Exception as e: | |
return f"Error extracting stock info: {str(e)}" | |
# Whisper Transcription | |
def transcribe_audio(file_path): | |
if not WHISPER_AVAILABLE: | |
return "β Whisper not available", "" | |
try: | |
if WHISPER_TYPE == "openai-whisper": | |
model = whisper.load_model("tiny") | |
result = model.transcribe(file_path) | |
return result["text"], "β Transcription complete" | |
else: | |
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny") | |
result = pipe(file_path) | |
return result["text"], "β Transcription complete" | |
except Exception as e: | |
return "β Transcription failed", str(e) | |
# Audio Downloader using yt-dlp | |
def download_audio_youtube(url, cookies_file=None): | |
try: | |
temp_dir = tempfile.mkdtemp() | |
out_path = os.path.join(temp_dir, "audio") | |
ydl_opts = { | |
'format': 'bestaudio[ext=m4a]/bestaudio/best', | |
'outtmpl': out_path + '.%(ext)s', | |
'quiet': True, | |
'noplaylist': True, | |
'cookiefile': cookies_file if cookies_file else None, | |
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', | |
'referer': 'https://www.youtube.com/', | |
'force_ipv4': True, | |
'http_headers': { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | |
'Accept-Language': 'en-US,en;q=0.5', | |
'Accept-Encoding': 'gzip, deflate', | |
'DNT': '1', | |
'Connection': 'keep-alive', | |
'Upgrade-Insecure-Requests': '1', | |
'Referer': 'https://www.youtube.com/', | |
}, | |
} | |
with YoutubeDL(ydl_opts) as ydl: | |
ydl.download([url]) | |
for ext in ['.m4a', '.mp3', '.webm']: | |
full_path = out_path + ext | |
if os.path.exists(full_path): | |
return full_path, "β Audio downloaded" | |
return None, "β Audio file not found" | |
except Exception as e: | |
return None, f"β Download error: {str(e)}" | |
# Gradio UI | |
def full_pipeline(url, cookies): | |
if not url: | |
return "β Enter a valid YouTube URL", "", "" | |
temp_cookie = save_uploaded_cookie(cookies) | |
audio_path, msg = download_audio_youtube(url, temp_cookie) | |
if not audio_path: | |
return msg, "", "" | |
transcript, tmsg = transcribe_audio(audio_path) | |
if "β" in transcript: | |
return msg, transcript, tmsg | |
stock_data = extract_stock_info_simple(transcript) | |
return "β Complete", transcript, stock_data | |
# Gradio App | |
with gr.Blocks(title="π Stock Info Extractor from YouTube") as demo: | |
gr.Markdown(""" | |
# π Extract Stock Mentions from YouTube | |
Upload a YouTube link + cookies.txt, and extract trading mentions using Whisper + AI | |
""") | |
with gr.Row(): | |
url_input = gr.Textbox(label="YouTube URL") | |
cookies_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"]) | |
run_btn = gr.Button("π Run Extraction") | |
status = gr.Textbox(label="Status") | |
transcript_box = gr.Textbox(label="Transcript", lines=10) | |
stock_box = gr.Textbox(label="Stock Info", lines=10) | |
run_btn.click(fn=full_pipeline, inputs=[url_input, cookies_input], outputs=[status, transcript_box, stock_box]) | |
if __name__ == "__main__": | |
demo.launch(debug=True) | |