developer28's picture
Update app.py
7c660a9 verified
raw
history blame
5.67 kB
# βœ… Combined YouTube Analyzer with Stock Info Extractor
# ⬇️ Based on your working app + whisper + stock extraction
import gradio as gr
import os
import tempfile
import shutil
import re
import torch
import numpy as np
from yt_dlp import YoutubeDL
# Whisper setup
WHISPER_AVAILABLE = False
WHISPER_TYPE = None
try:
import whisper
WHISPER_AVAILABLE = True
WHISPER_TYPE = "openai-whisper"
except ImportError:
try:
from transformers import pipeline
WHISPER_AVAILABLE = True
WHISPER_TYPE = "transformers"
except ImportError:
pass
# Stock Info Extraction
def extract_stock_info_simple(text):
try:
stock_info = []
companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
actions = re.findall(r'\b(?:buy|sell|hold|bullish|bearish|target|stop loss)\b', text, re.IGNORECASE)
result = "=== EXTRACTED STOCK INFORMATION ===\n\n"
if companies:
result += f"\U0001F4CA Mentioned Companies: {', '.join(set(companies[:10]))}\n\n"
if symbols:
result += f"\U0001F524 Potential Stock Symbols: {', '.join(set(symbols[:10]))}\n\n"
if prices:
result += f"\U0001F4B0 Price Mentions: {', '.join(set(prices[:10]))}\n\n"
if actions:
result += f"\U0001F4C8 Trading Actions: {', '.join(set(actions[:10]))}\n\n"
recommendations = []
sentences = text.split('.')
for sentence in sentences:
if any(word in sentence.lower() for word in ['buy', 'sell', 'target']):
if any(sym in sentence for sym in symbols[:5]):
recommendations.append(sentence.strip())
if recommendations:
result += "\U0001F3AF Potential Recommendations:\n"
for rec in recommendations[:5]:
result += f"β€’ {rec}\n"
if not any([companies, symbols, prices, actions]):
result += "⚠️ No clear stock recommendations found.\n"
return result
except Exception as e:
return f"Error extracting stock info: {str(e)}"
# Whisper Transcription
def transcribe_audio(file_path):
if not WHISPER_AVAILABLE:
return "❌ Whisper not available", ""
try:
if WHISPER_TYPE == "openai-whisper":
model = whisper.load_model("tiny")
result = model.transcribe(file_path)
return result["text"], "βœ… Transcription complete"
else:
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
result = pipe(file_path)
return result["text"], "βœ… Transcription complete"
except Exception as e:
return "❌ Transcription failed", str(e)
# Audio Downloader using yt-dlp
def download_audio_youtube(url, cookies_file=None):
try:
temp_dir = tempfile.mkdtemp()
out_path = os.path.join(temp_dir, "audio")
ydl_opts = {
'format': 'bestaudio[ext=m4a]/bestaudio/best',
'outtmpl': out_path + '.%(ext)s',
'quiet': True,
'noplaylist': True,
'cookiefile': cookies_file if cookies_file else None,
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
'referer': 'https://www.youtube.com/',
'force_ipv4': True,
'http_headers': {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Referer': 'https://www.youtube.com/',
},
}
with YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
for ext in ['.m4a', '.mp3', '.webm']:
full_path = out_path + ext
if os.path.exists(full_path):
return full_path, "βœ… Audio downloaded"
return None, "❌ Audio file not found"
except Exception as e:
return None, f"❌ Download error: {str(e)}"
# Gradio UI
def full_pipeline(url, cookies):
if not url:
return "❌ Enter a valid YouTube URL", "", ""
temp_cookie = save_uploaded_cookie(cookies)
audio_path, msg = download_audio_youtube(url, temp_cookie)
if not audio_path:
return msg, "", ""
transcript, tmsg = transcribe_audio(audio_path)
if "❌" in transcript:
return msg, transcript, tmsg
stock_data = extract_stock_info_simple(transcript)
return "βœ… Complete", transcript, stock_data
# Gradio App
with gr.Blocks(title="πŸ“ˆ Stock Info Extractor from YouTube") as demo:
gr.Markdown("""
# πŸ“ˆ Extract Stock Mentions from YouTube
Upload a YouTube link + cookies.txt, and extract trading mentions using Whisper + AI
""")
with gr.Row():
url_input = gr.Textbox(label="YouTube URL")
cookies_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])
run_btn = gr.Button("πŸš€ Run Extraction")
status = gr.Textbox(label="Status")
transcript_box = gr.Textbox(label="Transcript", lines=10)
stock_box = gr.Textbox(label="Stock Info", lines=10)
run_btn.click(fn=full_pipeline, inputs=[url_input, cookies_input], outputs=[status, transcript_box, stock_box])
if __name__ == "__main__":
demo.launch(debug=True)