developer28 commited on
Commit
f869bf3
Β·
verified Β·
1 Parent(s): 7c660a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -23
app.py CHANGED
@@ -1,5 +1,4 @@
1
- # βœ… Combined YouTube Analyzer with Stock Info Extractor
2
- # ⬇️ Based on your working app + whisper + stock extraction
3
 
4
  import gradio as gr
5
  import os
@@ -29,7 +28,6 @@ except ImportError:
29
 
30
  def extract_stock_info_simple(text):
31
  try:
32
- stock_info = []
33
  companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
34
  symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
35
  prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
@@ -83,62 +81,84 @@ def transcribe_audio(file_path):
83
  except Exception as e:
84
  return "❌ Transcription failed", str(e)
85
 
86
- # Audio Downloader using yt-dlp
87
 
88
  def download_audio_youtube(url, cookies_file=None):
89
  try:
90
  temp_dir = tempfile.mkdtemp()
91
  out_path = os.path.join(temp_dir, "audio")
92
- ydl_opts = {
 
93
  'format': 'bestaudio[ext=m4a]/bestaudio/best',
94
  'outtmpl': out_path + '.%(ext)s',
95
  'quiet': True,
96
  'noplaylist': True,
97
- 'cookiefile': cookies_file if cookies_file else None,
98
  'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
99
  'referer': 'https://www.youtube.com/',
100
  'force_ipv4': True,
101
- 'http_headers': {
102
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
103
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
104
- 'Accept-Language': 'en-US,en;q=0.5',
105
- 'Accept-Encoding': 'gzip, deflate',
106
- 'DNT': '1',
107
- 'Connection': 'keep-alive',
108
- 'Upgrade-Insecure-Requests': '1',
109
- 'Referer': 'https://www.youtube.com/',
110
- },
 
 
 
 
 
 
 
 
 
111
  }
 
112
  with YoutubeDL(ydl_opts) as ydl:
113
  ydl.download([url])
 
114
  for ext in ['.m4a', '.mp3', '.webm']:
115
  full_path = out_path + ext
116
  if os.path.exists(full_path):
117
  return full_path, "βœ… Audio downloaded"
 
118
  return None, "❌ Audio file not found"
 
119
  except Exception as e:
 
 
120
  return None, f"❌ Download error: {str(e)}"
121
 
122
- # Gradio UI
 
 
 
 
 
 
 
 
 
123
 
124
  def full_pipeline(url, cookies):
125
  if not url:
126
  return "❌ Enter a valid YouTube URL", "", ""
127
-
128
  temp_cookie = save_uploaded_cookie(cookies)
129
-
130
  audio_path, msg = download_audio_youtube(url, temp_cookie)
131
  if not audio_path:
132
  return msg, "", ""
133
-
134
  transcript, tmsg = transcribe_audio(audio_path)
135
  if "❌" in transcript:
136
  return msg, transcript, tmsg
137
-
138
  stock_data = extract_stock_info_simple(transcript)
139
  return "βœ… Complete", transcript, stock_data
140
 
141
-
142
  # Gradio App
143
  with gr.Blocks(title="πŸ“ˆ Stock Info Extractor from YouTube") as demo:
144
  gr.Markdown("""
@@ -148,7 +168,7 @@ with gr.Blocks(title="πŸ“ˆ Stock Info Extractor from YouTube") as demo:
148
 
149
  with gr.Row():
150
  url_input = gr.Textbox(label="YouTube URL")
151
- cookies_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])
152
 
153
  run_btn = gr.Button("πŸš€ Run Extraction")
154
  status = gr.Textbox(label="Status")
 
1
+ # βœ… Combined YouTube Analyzer with Stock Info Extractor (fixed download using working app logic)
 
2
 
3
  import gradio as gr
4
  import os
 
28
 
29
  def extract_stock_info_simple(text):
30
  try:
 
31
  companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
32
  symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
33
  prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
 
81
  except Exception as e:
82
  return "❌ Transcription failed", str(e)
83
 
84
+ # βœ… Reused working download logic from other app
85
 
86
  def download_audio_youtube(url, cookies_file=None):
87
  try:
88
  temp_dir = tempfile.mkdtemp()
89
  out_path = os.path.join(temp_dir, "audio")
90
+
91
+ ydl_opts = {
92
  'format': 'bestaudio[ext=m4a]/bestaudio/best',
93
  'outtmpl': out_path + '.%(ext)s',
94
  'quiet': True,
95
  'noplaylist': True,
 
96
  'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
97
  'referer': 'https://www.youtube.com/',
98
  'force_ipv4': True,
99
+ 'extractor_retries': 3,
100
+ 'fragment_retries': 3,
101
+ 'retry_sleep_functions': {'http': lambda n: 2 ** n},
102
+ }
103
+
104
+ if cookies_file and os.path.exists(cookies_file):
105
+ ydl_opts['cookiefile'] = cookies_file
106
+ else:
107
+ print("⚠️ No cookies file provided")
108
+
109
+ ydl_opts['http_headers'] = {
110
+ 'User-Agent': ydl_opts['user_agent'],
111
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
112
+ 'Accept-Language': 'en-US,en;q=0.5',
113
+ 'Accept-Encoding': 'gzip, deflate',
114
+ 'DNT': '1',
115
+ 'Connection': 'keep-alive',
116
+ 'Upgrade-Insecure-Requests': '1',
117
+ 'Referer': 'https://www.youtube.com/',
118
  }
119
+
120
  with YoutubeDL(ydl_opts) as ydl:
121
  ydl.download([url])
122
+
123
  for ext in ['.m4a', '.mp3', '.webm']:
124
  full_path = out_path + ext
125
  if os.path.exists(full_path):
126
  return full_path, "βœ… Audio downloaded"
127
+
128
  return None, "❌ Audio file not found"
129
+
130
  except Exception as e:
131
+ import traceback
132
+ traceback.print_exc()
133
  return None, f"❌ Download error: {str(e)}"
134
 
135
+ # Copy cookie to tmp
136
+
137
+ def save_uploaded_cookie(cookies):
138
+ if cookies is None:
139
+ return None
140
+ temp_cookie_path = tempfile.mktemp(suffix=".txt")
141
+ shutil.copy2(cookies.name, temp_cookie_path)
142
+ return temp_cookie_path
143
+
144
+ # Gradio app logic
145
 
146
  def full_pipeline(url, cookies):
147
  if not url:
148
  return "❌ Enter a valid YouTube URL", "", ""
149
+
150
  temp_cookie = save_uploaded_cookie(cookies)
 
151
  audio_path, msg = download_audio_youtube(url, temp_cookie)
152
  if not audio_path:
153
  return msg, "", ""
154
+
155
  transcript, tmsg = transcribe_audio(audio_path)
156
  if "❌" in transcript:
157
  return msg, transcript, tmsg
158
+
159
  stock_data = extract_stock_info_simple(transcript)
160
  return "βœ… Complete", transcript, stock_data
161
 
 
162
  # Gradio App
163
  with gr.Blocks(title="πŸ“ˆ Stock Info Extractor from YouTube") as demo:
164
  gr.Markdown("""
 
168
 
169
  with gr.Row():
170
  url_input = gr.Textbox(label="YouTube URL")
171
+ cookies_input = gr.File(label="cookies.txt (exported from YouTube tab)", file_types=[".txt"])
172
 
173
  run_btn = gr.Button("πŸš€ Run Extraction")
174
  status = gr.Textbox(label="Status")