Spaces:
Sleeping
Sleeping
# β Stock Recommendation Extractor from YouTube Audio (Working Pipeline) | |
import os | |
import gradio as gr | |
import tempfile | |
import shutil | |
import re | |
import traceback | |
from yt_dlp import YoutubeDL | |
# Optional: use OpenAI Whisper if available | |
try: | |
import whisper | |
WHISPER_AVAILABLE = True | |
except: | |
WHISPER_AVAILABLE = False | |
# β Download audio using working logic | |
def download_audio(url, cookies_path=None): | |
try: | |
temp_dir = tempfile.mkdtemp() | |
output_path = os.path.join(temp_dir, "audio") | |
ydl_opts = { | |
'format': 'bestaudio[ext=m4a]/bestaudio/best', | |
'outtmpl': output_path + '.%(ext)s', | |
'quiet': True, | |
'noplaylist': True, | |
'cookiefile': cookies_path if cookies_path else None, | |
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', | |
'referer': 'https://www.youtube.com/', | |
'force_ipv4': True, | |
'http_headers': { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | |
'Accept-Language': 'en-US,en;q=0.5', | |
'Referer': 'https://www.youtube.com/' | |
}, | |
} | |
with YoutubeDL(ydl_opts) as ydl: | |
ydl.download([url]) | |
for ext in [".m4a", ".webm", ".mp3"]: | |
final_path = output_path + ext | |
if os.path.exists(final_path): | |
return final_path, "β Audio downloaded successfully" | |
return None, "β Audio file not found" | |
except Exception as e: | |
traceback.print_exc() | |
return None, f"β Download error: {str(e)}" | |
# β Transcribe audio using Whisper | |
def transcribe_audio(path): | |
if not WHISPER_AVAILABLE: | |
return "β Whisper not available. Please install openai-whisper." | |
try: | |
model = whisper.load_model("tiny") | |
result = model.transcribe(path) | |
return result["text"] | |
except Exception as e: | |
traceback.print_exc() | |
return f"β Transcription failed: {str(e)}" | |
# β Extract stock-related information from transcript | |
def extract_stock_info(text): | |
try: | |
companies = re.findall(r'\b[A-Z][a-z]+(?: [A-Z][a-z]+)*\b', text) | |
symbols = re.findall(r'\b[A-Z]{2,5}\b', text) | |
prices = re.findall(r'\$\d+(?:\.\d{1,2})?', text) | |
actions = re.findall(r'\b(buy|sell|hold|target|bullish|bearish|stop loss)\b', text, re.IGNORECASE) | |
result = "=== STOCK RECOMMENDATION ANALYSIS ===\n\n" | |
if companies: | |
result += f"π’ Companies Mentioned: {', '.join(set(companies[:10]))}\n" | |
if symbols: | |
result += f"π Symbols: {', '.join(set(symbols[:10]))}\n" | |
if prices: | |
result += f"π² Prices: {', '.join(set(prices[:10]))}\n" | |
if actions: | |
result += f"π Actions: {', '.join(set(actions[:10]))}\n" | |
# Highlight potential recommendations | |
recommendations = [] | |
for line in text.split("."): | |
if any(word in line.lower() for word in ['buy', 'sell', 'target', 'hold']): | |
recommendations.append(line.strip()) | |
if recommendations: | |
result += "\nπ― Potential Recommendations:\n" | |
for r in recommendations[:5]: | |
result += f"β’ {r}\n" | |
if not any([companies, symbols, prices, actions]): | |
result += "\nβ οΈ No stock-related insights detected." | |
return result | |
except Exception as e: | |
return f"β Stock info extraction failed: {str(e)}" | |
# β Save uploaded cookies.txt | |
def save_cookies(file): | |
if file is None: | |
return None | |
temp_path = tempfile.mktemp(suffix=".txt") | |
with open(temp_path, "wb") as f: | |
f.write(file.read()) | |
return temp_path | |
# β Full pipeline | |
def run_pipeline(url, cookies_file): | |
if not WHISPER_AVAILABLE: | |
return "β Whisper is not installed. Run: pip install openai-whisper", "" | |
if not url: | |
return "β YouTube URL required", "" | |
cookie_path = save_cookies(cookies_file) | |
audio_path, status = download_audio(url, cookie_path) | |
if not audio_path: | |
return status, "" | |
transcript = transcribe_audio(audio_path) | |
if transcript.startswith("β"): | |
return transcript, "" | |
stock_info = extract_stock_info(transcript) | |
return "β Complete", stock_info | |
# β Gradio Interface | |
with gr.Blocks(title="Stock Insights from YouTube Audio") as demo: | |
gr.Markdown(""" | |
# π§ Extract Stock Recommendations from YouTube Audio | |
This app downloads the audio from a YouTube video, transcribes it with Whisper, | |
and extracts stock trading recommendations, sentiments, and symbols. | |
""") | |
with gr.Row(): | |
url_input = gr.Textbox(label="π₯ YouTube Video URL") | |
cookie_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"]) | |
run_btn = gr.Button("π Extract Stock Info") | |
status_output = gr.Textbox(label="Status") | |
result_output = gr.Textbox(label="Stock Info", lines=12) | |
run_btn.click(fn=run_pipeline, inputs=[url_input, cookie_input], outputs=[status_output, result_output]) | |
if __name__ == "__main__": | |
demo.launch(debug=True) | |