Spaces:

developer28
/

Youtubedownloader

Sleeping

App Files Files Community

Youtubedownloader / app.py

developer28

Update app.py

0921abd verified 16 days ago

raw

history blame

11.8 kB

	import os
	import tempfile
	import gradio as gr
	import re
	import sys
	import shutil

	# Try to import required packages with error handling
	try:
	from yt_dlp import YoutubeDL
	YT_DLP_AVAILABLE = True
	except ImportError as e:
	YT_DLP_AVAILABLE = False
	print(f"yt-dlp import error: {e}")

	try:
	import whisper
	WHISPER_AVAILABLE = True
	except ImportError as e:
	WHISPER_AVAILABLE = False
	print(f"whisper import error: {e}")

	print(f"Python version: {sys.version}")
	print(f"yt-dlp available: {YT_DLP_AVAILABLE}")
	print(f"whisper available: {WHISPER_AVAILABLE}")

	def download_audio(url, cookies_file_path=None):
	"""Download audio from YouTube URL and return the file path"""
	if not YT_DLP_AVAILABLE:
	raise Exception("yt-dlp is not available. Please check the installation.")

	try:
	# Create a temporary directory for downloads
	temp_dir = tempfile.mkdtemp()
	output_path = os.path.join(temp_dir, "audio")

	ydl_opts = {
	'format': 'bestaudio[ext=m4a]/bestaudio/best',
	'outtmpl': output_path + '.%(ext)s',
	'quiet': True,
	'no_warnings': True,
	}

	# Add cookies file if provided
	if cookies_file_path and os.path.exists(cookies_file_path):
	ydl_opts['cookiefile'] = cookies_file_path
	print(f"Using cookies file: {cookies_file_path}")

	with YoutubeDL(ydl_opts) as ydl:
	info_dict = ydl.extract_info(url, download=True)
	filename = ydl.prepare_filename(info_dict)

	# Find the downloaded file
	for ext in ['.m4a', '.webm', '.mp4', '.mp3']:
	potential_file = output_path + ext
	if os.path.exists(potential_file):
	return potential_file

	raise FileNotFoundError(f"Downloaded audio file not found")

	except Exception as e:
	raise Exception(f"Failed to download audio: {str(e)}")

	def transcribe_audio(file_path):
	"""Transcribe audio file using Whisper"""
	if not WHISPER_AVAILABLE:
	raise Exception("OpenAI Whisper is not available. Please check the installation.")

	try:
	# Use the smallest model to reduce memory usage
	model = whisper.load_model("tiny")
	result = model.transcribe(file_path)
	return result["text"]
	except Exception as e:
	raise Exception(f"Failed to transcribe audio: {str(e)}")

	def extract_stock_info_simple(text):
	"""Extract stock information using simple pattern matching"""
	try:
	stock_info = []

	# Simple patterns to look for stock-related information
	stock_patterns = [
	r'\b[A-Z]{1,5}\b(?:\s+stock\|\s+shares\|\s+symbol)', # Stock symbols
	r'(?:buy\|sell\|target\|price)\s+[A-Z]{1,5}',
	r'\$\d+(?:\.\d{2})?', # Dollar amounts
	r'\b(?:bullish\|bearish\|buy\|sell\|hold)\b',
	]

	# Look for company names and stock mentions
	companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc\|Corp\|Company\|Ltd)\.?)?', text)
	symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
	prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
	actions = re.findall(r'\b(?:buy\|sell\|hold\|bullish\|bearish\|target\|stop\s+loss)\b', text, re.IGNORECASE)

	# Format the extracted information
	result = "=== EXTRACTED STOCK INFORMATION ===\n\n"

	if companies:
	result += f"📊 Mentioned Companies: {', '.join(set(companies[:10]))}\n\n"

	if symbols:
	result += f"🔤 Potential Stock Symbols: {', '.join(set(symbols[:10]))}\n\n"

	if prices:
	result += f"💰 Price Mentions: {', '.join(set(prices[:10]))}\n\n"

	if actions:
	result += f"📈 Trading Actions: {', '.join(set(actions[:10]))}\n\n"

	# Look for specific recommendation patterns
	recommendations = []
	sentences = text.split('.')
	for sentence in sentences:
	if any(word in sentence.lower() for word in ['buy', 'sell', 'target', 'recommendation']):
	if any(symbol in sentence for symbol in symbols[:5]):
	recommendations.append(sentence.strip())

	if recommendations:
	result += "🎯 Potential Recommendations:\n"
	for rec in recommendations[:5]:
	result += f"• {rec}\n"

	if not any([companies, symbols, prices, actions]):
	result += "⚠️ No clear stock recommendations found in the transcript.\n"
	result += "This might be because:\n"
	result += "• The video doesn't contain stock recommendations\n"
	result += "• The audio quality was poor\n"
	result += "• The content is not in English\n"

	return result

	except Exception as e:
	return f"Error extracting stock info: {str(e)}"

	def cleanup_file(file_path):
	"""Clean up temporary files"""
	try:
	if file_path and os.path.exists(file_path):
	os.remove(file_path)
	# Also try to remove the directory if it's empty
	try:
	os.rmdir(os.path.dirname(file_path))
	except:
	pass
	except:
	pass

	def process_cookies_file(cookies_file):
	"""Process uploaded cookies file and return the path"""
	if cookies_file is None:
	return None

	try:
	# Create a temporary file for cookies
	temp_cookies_path = tempfile.mktemp(suffix='.txt')

	# Copy the uploaded file to temp location
	shutil.copy2(cookies_file.name, temp_cookies_path)

	return temp_cookies_path
	except Exception as e:
	print(f"Error processing cookies file: {e}")
	return None

	def process_video(url, cookies_file, progress=gr.Progress()):
	"""Main function to process YouTube video"""

	# Check if required packages are available
	if not YT_DLP_AVAILABLE:
	return "Error: yt-dlp is not installed properly. Please check the requirements.", "", "❌ Error: Missing yt-dlp"

	if not WHISPER_AVAILABLE:
	return "Error: OpenAI Whisper is not installed properly. Please check the requirements.", "", "❌ Error: Missing Whisper"

	if not url or not url.strip():
	return "Please provide a valid YouTube URL", "", "❌ Error: Invalid URL"

	audio_path = None
	cookies_temp_path = None

	try:
	# Validate URL
	if not any(domain in url.lower() for domain in ['youtube.com', 'youtu.be']):
	return "Please provide a valid YouTube URL", "", "❌ Error: Invalid URL"

	# Process cookies file if provided
	progress(0.05, desc="Processing cookies...")
	cookies_temp_path = process_cookies_file(cookies_file)

	status_msg = "✅ Cookies loaded" if cookies_temp_path else "⚠️ No cookies (may encounter bot detection)"

	# Download audio
	progress(0.2, desc="Downloading audio...")
	audio_path = download_audio(url, cookies_temp_path)

	# Transcribe audio
	progress(0.6, desc="Transcribing audio...")
	transcript = transcribe_audio(audio_path)

	if not transcript.strip():
	return "No speech detected in the video", "", "❌ No speech detected"

	# Extract stock information
	progress(0.9, desc="Extracting stock information...")
	stock_details = extract_stock_info_simple(transcript)

	progress(1.0, desc="Complete!")
	return transcript, stock_details, "✅ Processing completed successfully"

	except Exception as e:
	error_msg = f"Error processing video: {str(e)}"
	return error_msg, "", f"❌ Error: {str(e)}"

	finally:
	# Clean up temporary files
	cleanup_file(audio_path)
	cleanup_file(cookies_temp_path)

	# Create Gradio interface
	with gr.Blocks(
	title="Stock Recommendation Extractor",
	theme=gr.themes.Soft(),
	css="""
	.gradio-container {
	max-width: 1400px;
	margin: auto;
	}
	.status-box {
	padding: 10px;
	border-radius: 5px;
	margin: 10px 0;
	}
	"""
	) as demo:

	gr.Markdown("""
	# 📈 Stock Recommendation Extractor from YouTube

	Extract stock recommendations and trading information from YouTube videos using AI transcription.

	How it works:
	1. Upload your cookies.txt file (optional but recommended to avoid bot detection)
	2. Paste YouTube video URL
	3. Downloads audio from YouTube video
	4. Transcribes using OpenAI Whisper
	5. Extracts stock-related information

	⚠️ Disclaimer: This is for educational purposes only. Always do your own research!
	""")

	with gr.Row():
	with gr.Column(scale=1):
	# Cookies file upload
	cookies_input = gr.File(
	label="🍪 Upload Cookies File (cookies.txt)",
	file_types=[".txt"],
	file_count="single"
	)

	gr.Markdown("""
	How to get cookies.txt:
	1. Install browser extension like "Get cookies.txt LOCALLY"
	2. Visit YouTube in your browser (logged in)
	3. Export cookies for youtube.com
	4. Upload the downloaded cookies.txt file here
	""")

	url_input = gr.Textbox(
	label="📺 YouTube URL",
	placeholder="https://www.youtube.com/watch?v=...",
	lines=2
	)

	process_btn = gr.Button(
	"🚀 Extract Stock Information",
	variant="primary",
	size="lg"
	)

	# Status display
	status_output = gr.Textbox(
	label="📊 Status",
	lines=1,
	interactive=False
	)

	gr.Markdown("""
	### 💡 Tips:
	- Upload cookies.txt to avoid bot detection
	- Works best with financial YouTube channels
	- Ensure video has clear audio
	- English content works best
	""")

	with gr.Row():
	with gr.Column():
	transcript_output = gr.Textbox(
	label="📝 Full Transcript",
	lines=15,
	max_lines=20,
	show_copy_button=True
	)

	with gr.Column():
	stock_info_output = gr.Textbox(
	label="📊 Extracted Stock Information",
	lines=15,
	max_lines=20,
	show_copy_button=True
	)

	# Event handlers
	process_btn.click(
	fn=process_video,
	inputs=[url_input, cookies_input],
	outputs=[transcript_output, stock_info_output, status_output],
	show_progress=True
	)

	# Example section
	gr.Markdown("### 📋 Example URLs (Replace with actual financial videos)")
	gr.Examples(
	examples=[
	["https://www.youtube.com/watch?v=dQw4w9WgXcQ"],
	],
	inputs=[url_input],
	label="Click to try example"
	)

	gr.Markdown("""
	### 🔧 Troubleshooting:
	- Bot Detection Error: Upload your cookies.txt file
	- No Audio Found: Check if video has audio track
	- Transcription Failed: Video might be too long or audio quality poor
	- No Stock Info: Video might not contain financial content
	""")

	if __name__ == "__main__":
	demo.launch()