Spaces:

developer28
/

Youtubedownloader

Sleeping

App Files Files Community

developer28 commited on 12 days ago

Commit

f869bf3

verified ·

1 Parent(s): 7c660a9

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -23

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
-# ✅ Combined YouTube Analyzer with Stock Info Extractor
-# ⬇️ Based on your working app + whisper + stock extraction
 import gradio as gr
 import os
@@ -29,7 +28,6 @@ except ImportError:
 def extract_stock_info_simple(text):
     try:
-        stock_info = []
         companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
         symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
         prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
@@ -83,62 +81,84 @@ def transcribe_audio(file_path):
     except Exception as e:
         return "❌ Transcription failed", str(e)
-# Audio Downloader using yt-dlp
 def download_audio_youtube(url, cookies_file=None):
     try:
         temp_dir = tempfile.mkdtemp()
         out_path = os.path.join(temp_dir, "audio")
-            ydl_opts = {
             'format': 'bestaudio[ext=m4a]/bestaudio/best',
             'outtmpl': out_path + '.%(ext)s',
             'quiet': True,
             'noplaylist': True,
-            'cookiefile': cookies_file if cookies_file else None,
             'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
             'referer': 'https://www.youtube.com/',
             'force_ipv4': True,
-            'http_headers': {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-                'Accept-Language': 'en-US,en;q=0.5',
-                'Accept-Encoding': 'gzip, deflate',
-                'DNT': '1',
-                'Connection': 'keep-alive',
-                'Upgrade-Insecure-Requests': '1',
-                'Referer': 'https://www.youtube.com/',
-            },
         }
         with YoutubeDL(ydl_opts) as ydl:
             ydl.download([url])
         for ext in ['.m4a', '.mp3', '.webm']:
             full_path = out_path + ext
             if os.path.exists(full_path):
                 return full_path, "✅ Audio downloaded"
         return None, "❌ Audio file not found"
     except Exception as e:
         return None, f"❌ Download error: {str(e)}"
-# Gradio UI
 def full_pipeline(url, cookies):
     if not url:
         return "❌ Enter a valid YouTube URL", "", ""
     temp_cookie = save_uploaded_cookie(cookies)
     audio_path, msg = download_audio_youtube(url, temp_cookie)
     if not audio_path:
         return msg, "", ""
     transcript, tmsg = transcribe_audio(audio_path)
     if "❌" in transcript:
         return msg, transcript, tmsg
     stock_data = extract_stock_info_simple(transcript)
     return "✅ Complete", transcript, stock_data
 # Gradio App
 with gr.Blocks(title="📈 Stock Info Extractor from YouTube") as demo:
     gr.Markdown("""
@@ -148,7 +168,7 @@ with gr.Blocks(title="📈 Stock Info Extractor from YouTube") as demo:
     with gr.Row():
         url_input = gr.Textbox(label="YouTube URL")
-        cookies_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])
     run_btn = gr.Button("🚀 Run Extraction")
     status = gr.Textbox(label="Status")

+# ✅ Combined YouTube Analyzer with Stock Info Extractor (fixed download using working app logic)
 import gradio as gr
 import os
 def extract_stock_info_simple(text):
     try:
         companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
         symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
         prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
     except Exception as e:
         return "❌ Transcription failed", str(e)
+# ✅ Reused working download logic from other app
 def download_audio_youtube(url, cookies_file=None):
     try:
         temp_dir = tempfile.mkdtemp()
         out_path = os.path.join(temp_dir, "audio")
+        ydl_opts = {
             'format': 'bestaudio[ext=m4a]/bestaudio/best',
             'outtmpl': out_path + '.%(ext)s',
             'quiet': True,
             'noplaylist': True,
             'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
             'referer': 'https://www.youtube.com/',
             'force_ipv4': True,
+            'extractor_retries': 3,
+            'fragment_retries': 3,
+            'retry_sleep_functions': {'http': lambda n: 2 ** n},
+        }
+        if cookies_file and os.path.exists(cookies_file):
+            ydl_opts['cookiefile'] = cookies_file
+        else:
+            print("⚠️ No cookies file provided")
+        ydl_opts['http_headers'] = {
+            'User-Agent': ydl_opts['user_agent'],
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
+            'Accept-Encoding': 'gzip, deflate',
+            'DNT': '1',
+            'Connection': 'keep-alive',
+            'Upgrade-Insecure-Requests': '1',
+            'Referer': 'https://www.youtube.com/',
         }
         with YoutubeDL(ydl_opts) as ydl:
             ydl.download([url])
         for ext in ['.m4a', '.mp3', '.webm']:
             full_path = out_path + ext
             if os.path.exists(full_path):
                 return full_path, "✅ Audio downloaded"
         return None, "❌ Audio file not found"
     except Exception as e:
+        import traceback
+        traceback.print_exc()
         return None, f"❌ Download error: {str(e)}"
+# Copy cookie to tmp
+def save_uploaded_cookie(cookies):
+    if cookies is None:
+        return None
+    temp_cookie_path = tempfile.mktemp(suffix=".txt")
+    shutil.copy2(cookies.name, temp_cookie_path)
+    return temp_cookie_path
+# Gradio app logic
 def full_pipeline(url, cookies):
     if not url:
         return "❌ Enter a valid YouTube URL", "", ""
     temp_cookie = save_uploaded_cookie(cookies)
     audio_path, msg = download_audio_youtube(url, temp_cookie)
     if not audio_path:
         return msg, "", ""
     transcript, tmsg = transcribe_audio(audio_path)
     if "❌" in transcript:
         return msg, transcript, tmsg
     stock_data = extract_stock_info_simple(transcript)
     return "✅ Complete", transcript, stock_data
 # Gradio App
 with gr.Blocks(title="📈 Stock Info Extractor from YouTube") as demo:
     gr.Markdown("""
     with gr.Row():
         url_input = gr.Textbox(label="YouTube URL")
+        cookies_input = gr.File(label="cookies.txt (exported from YouTube tab)", file_types=[".txt"])
     run_btn = gr.Button("🚀 Run Extraction")
     status = gr.Textbox(label="Status")