Spaces:
Sleeping
Sleeping
File size: 5,665 Bytes
bb84391 4fde749 a6e9713 c963386 bb84391 0921abd bb84391 c963386 0921abd c963386 bb84391 c963386 bb84391 c963386 0921abd bb84391 c963386 9518e76 0921abd 9518e76 bb84391 9518e76 bb84391 3a866c5 bb84391 9518e76 bb84391 9518e76 bb84391 0921abd bb84391 0921abd 9518e76 0921abd bb84391 9518e76 bb84391 0921abd bb84391 9518e76 bb84391 9518e76 bb84391 0921abd bb84391 0921abd 9518e76 16e2e72 bb84391 0921abd bb84391 0921abd bb84391 0921abd bb84391 3a866c5 bb84391 16e2e72 bb84391 7c660a9 bb84391 7c660a9 bb84391 7c660a9 bb84391 16e2e72 bb84391 7c660a9 bb84391 7c660a9 bb84391 7c660a9 bb84391 7c660a9 bb84391 0921abd bb84391 0921abd bb84391 0921abd bb84391 0921abd 16e2e72 bb84391 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
# β
Combined YouTube Analyzer with Stock Info Extractor
# β¬οΈ Based on your working app + whisper + stock extraction
import gradio as gr
import os
import tempfile
import shutil
import re
import torch
import numpy as np
from yt_dlp import YoutubeDL
# Whisper setup
WHISPER_AVAILABLE = False
WHISPER_TYPE = None
try:
import whisper
WHISPER_AVAILABLE = True
WHISPER_TYPE = "openai-whisper"
except ImportError:
try:
from transformers import pipeline
WHISPER_AVAILABLE = True
WHISPER_TYPE = "transformers"
except ImportError:
pass
# Stock Info Extraction
def extract_stock_info_simple(text):
try:
stock_info = []
companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
actions = re.findall(r'\b(?:buy|sell|hold|bullish|bearish|target|stop loss)\b', text, re.IGNORECASE)
result = "=== EXTRACTED STOCK INFORMATION ===\n\n"
if companies:
result += f"\U0001F4CA Mentioned Companies: {', '.join(set(companies[:10]))}\n\n"
if symbols:
result += f"\U0001F524 Potential Stock Symbols: {', '.join(set(symbols[:10]))}\n\n"
if prices:
result += f"\U0001F4B0 Price Mentions: {', '.join(set(prices[:10]))}\n\n"
if actions:
result += f"\U0001F4C8 Trading Actions: {', '.join(set(actions[:10]))}\n\n"
recommendations = []
sentences = text.split('.')
for sentence in sentences:
if any(word in sentence.lower() for word in ['buy', 'sell', 'target']):
if any(sym in sentence for sym in symbols[:5]):
recommendations.append(sentence.strip())
if recommendations:
result += "\U0001F3AF Potential Recommendations:\n"
for rec in recommendations[:5]:
result += f"β’ {rec}\n"
if not any([companies, symbols, prices, actions]):
result += "β οΈ No clear stock recommendations found.\n"
return result
except Exception as e:
return f"Error extracting stock info: {str(e)}"
# Whisper Transcription
def transcribe_audio(file_path):
if not WHISPER_AVAILABLE:
return "β Whisper not available", ""
try:
if WHISPER_TYPE == "openai-whisper":
model = whisper.load_model("tiny")
result = model.transcribe(file_path)
return result["text"], "β
Transcription complete"
else:
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
result = pipe(file_path)
return result["text"], "β
Transcription complete"
except Exception as e:
return "β Transcription failed", str(e)
# Audio Downloader using yt-dlp
def download_audio_youtube(url, cookies_file=None):
try:
temp_dir = tempfile.mkdtemp()
out_path = os.path.join(temp_dir, "audio")
ydl_opts = {
'format': 'bestaudio[ext=m4a]/bestaudio/best',
'outtmpl': out_path + '.%(ext)s',
'quiet': True,
'noplaylist': True,
'cookiefile': cookies_file if cookies_file else None,
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
'referer': 'https://www.youtube.com/',
'force_ipv4': True,
'http_headers': {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Referer': 'https://www.youtube.com/',
},
}
with YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
for ext in ['.m4a', '.mp3', '.webm']:
full_path = out_path + ext
if os.path.exists(full_path):
return full_path, "β
Audio downloaded"
return None, "β Audio file not found"
except Exception as e:
return None, f"β Download error: {str(e)}"
# Gradio UI
def full_pipeline(url, cookies):
if not url:
return "β Enter a valid YouTube URL", "", ""
temp_cookie = save_uploaded_cookie(cookies)
audio_path, msg = download_audio_youtube(url, temp_cookie)
if not audio_path:
return msg, "", ""
transcript, tmsg = transcribe_audio(audio_path)
if "β" in transcript:
return msg, transcript, tmsg
stock_data = extract_stock_info_simple(transcript)
return "β
Complete", transcript, stock_data
# Gradio App
with gr.Blocks(title="π Stock Info Extractor from YouTube") as demo:
gr.Markdown("""
# π Extract Stock Mentions from YouTube
Upload a YouTube link + cookies.txt, and extract trading mentions using Whisper + AI
""")
with gr.Row():
url_input = gr.Textbox(label="YouTube URL")
cookies_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])
run_btn = gr.Button("π Run Extraction")
status = gr.Textbox(label="Status")
transcript_box = gr.Textbox(label="Transcript", lines=10)
stock_box = gr.Textbox(label="Stock Info", lines=10)
run_btn.click(fn=full_pipeline, inputs=[url_input, cookies_input], outputs=[status, transcript_box, stock_box])
if __name__ == "__main__":
demo.launch(debug=True)
|