Spaces:

developer28
/

Youtubedownloader

Sleeping

App Files Files Community

Youtubedownloader / app.py

developer28

Update app.py

7c660a9 verified 2 months ago

raw

history blame

5.67 kB

	# ✅ Combined YouTube Analyzer with Stock Info Extractor
	# ⬇️ Based on your working app + whisper + stock extraction

	import gradio as gr
	import os
	import tempfile
	import shutil
	import re
	import torch
	import numpy as np
	from yt_dlp import YoutubeDL

	# Whisper setup
	WHISPER_AVAILABLE = False
	WHISPER_TYPE = None
	try:
	import whisper
	WHISPER_AVAILABLE = True
	WHISPER_TYPE = "openai-whisper"
	except ImportError:
	try:
	from transformers import pipeline
	WHISPER_AVAILABLE = True
	WHISPER_TYPE = "transformers"
	except ImportError:
	pass

	# Stock Info Extraction

	def extract_stock_info_simple(text):
	try:
	stock_info = []
	companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc\|Corp\|Company\|Ltd)\.?)?', text)
	symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
	prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
	actions = re.findall(r'\b(?:buy\|sell\|hold\|bullish\|bearish\|target\|stop loss)\b', text, re.IGNORECASE)

	result = "=== EXTRACTED STOCK INFORMATION ===\n\n"

	if companies:
	result += f"\U0001F4CA Mentioned Companies: {', '.join(set(companies[:10]))}\n\n"
	if symbols:
	result += f"\U0001F524 Potential Stock Symbols: {', '.join(set(symbols[:10]))}\n\n"
	if prices:
	result += f"\U0001F4B0 Price Mentions: {', '.join(set(prices[:10]))}\n\n"
	if actions:
	result += f"\U0001F4C8 Trading Actions: {', '.join(set(actions[:10]))}\n\n"

	recommendations = []
	sentences = text.split('.')
	for sentence in sentences:
	if any(word in sentence.lower() for word in ['buy', 'sell', 'target']):
	if any(sym in sentence for sym in symbols[:5]):
	recommendations.append(sentence.strip())

	if recommendations:
	result += "\U0001F3AF Potential Recommendations:\n"
	for rec in recommendations[:5]:
	result += f"• {rec}\n"

	if not any([companies, symbols, prices, actions]):
	result += "⚠️ No clear stock recommendations found.\n"

	return result

	except Exception as e:
	return f"Error extracting stock info: {str(e)}"

	# Whisper Transcription

	def transcribe_audio(file_path):
	if not WHISPER_AVAILABLE:
	return "❌ Whisper not available", ""
	try:
	if WHISPER_TYPE == "openai-whisper":
	model = whisper.load_model("tiny")
	result = model.transcribe(file_path)
	return result["text"], "✅ Transcription complete"
	else:
	pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
	result = pipe(file_path)
	return result["text"], "✅ Transcription complete"
	except Exception as e:
	return "❌ Transcription failed", str(e)

	# Audio Downloader using yt-dlp

	def download_audio_youtube(url, cookies_file=None):
	try:
	temp_dir = tempfile.mkdtemp()
	out_path = os.path.join(temp_dir, "audio")
	ydl_opts = {
	'format': 'bestaudio[ext=m4a]/bestaudio/best',
	'outtmpl': out_path + '.%(ext)s',
	'quiet': True,
	'noplaylist': True,
	'cookiefile': cookies_file if cookies_file else None,
	'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
	'referer': 'https://www.youtube.com/',
	'force_ipv4': True,
	'http_headers': {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.5',
	'Accept-Encoding': 'gzip, deflate',
	'DNT': '1',
	'Connection': 'keep-alive',
	'Upgrade-Insecure-Requests': '1',
	'Referer': 'https://www.youtube.com/',
	},
	}
	with YoutubeDL(ydl_opts) as ydl:
	ydl.download([url])
	for ext in ['.m4a', '.mp3', '.webm']:
	full_path = out_path + ext
	if os.path.exists(full_path):
	return full_path, "✅ Audio downloaded"
	return None, "❌ Audio file not found"
	except Exception as e:
	return None, f"❌ Download error: {str(e)}"

	# Gradio UI

	def full_pipeline(url, cookies):
	if not url:
	return "❌ Enter a valid YouTube URL", "", ""

	temp_cookie = save_uploaded_cookie(cookies)

	audio_path, msg = download_audio_youtube(url, temp_cookie)
	if not audio_path:
	return msg, "", ""

	transcript, tmsg = transcribe_audio(audio_path)
	if "❌" in transcript:
	return msg, transcript, tmsg

	stock_data = extract_stock_info_simple(transcript)
	return "✅ Complete", transcript, stock_data


	# Gradio App
	with gr.Blocks(title="📈 Stock Info Extractor from YouTube") as demo:
	gr.Markdown("""
	# 📈 Extract Stock Mentions from YouTube
	Upload a YouTube link + cookies.txt, and extract trading mentions using Whisper + AI
	""")

	with gr.Row():
	url_input = gr.Textbox(label="YouTube URL")
	cookies_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])

	run_btn = gr.Button("🚀 Run Extraction")
	status = gr.Textbox(label="Status")
	transcript_box = gr.Textbox(label="Transcript", lines=10)
	stock_box = gr.Textbox(label="Stock Info", lines=10)

	run_btn.click(fn=full_pipeline, inputs=[url_input, cookies_input], outputs=[status, transcript_box, stock_box])

	if __name__ == "__main__":
	demo.launch(debug=True)