Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
-
# β
Combined YouTube Analyzer with Stock Info Extractor
|
2 |
-
# β¬οΈ Based on your working app + whisper + stock extraction
|
3 |
|
4 |
import gradio as gr
|
5 |
import os
|
@@ -29,7 +28,6 @@ except ImportError:
|
|
29 |
|
30 |
def extract_stock_info_simple(text):
|
31 |
try:
|
32 |
-
stock_info = []
|
33 |
companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
|
34 |
symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
|
35 |
prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
|
@@ -83,62 +81,84 @@ def transcribe_audio(file_path):
|
|
83 |
except Exception as e:
|
84 |
return "β Transcription failed", str(e)
|
85 |
|
86 |
-
#
|
87 |
|
88 |
def download_audio_youtube(url, cookies_file=None):
|
89 |
try:
|
90 |
temp_dir = tempfile.mkdtemp()
|
91 |
out_path = os.path.join(temp_dir, "audio")
|
92 |
-
|
|
|
93 |
'format': 'bestaudio[ext=m4a]/bestaudio/best',
|
94 |
'outtmpl': out_path + '.%(ext)s',
|
95 |
'quiet': True,
|
96 |
'noplaylist': True,
|
97 |
-
'cookiefile': cookies_file if cookies_file else None,
|
98 |
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
|
99 |
'referer': 'https://www.youtube.com/',
|
100 |
'force_ipv4': True,
|
101 |
-
'
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
}
|
|
|
112 |
with YoutubeDL(ydl_opts) as ydl:
|
113 |
ydl.download([url])
|
|
|
114 |
for ext in ['.m4a', '.mp3', '.webm']:
|
115 |
full_path = out_path + ext
|
116 |
if os.path.exists(full_path):
|
117 |
return full_path, "β
Audio downloaded"
|
|
|
118 |
return None, "β Audio file not found"
|
|
|
119 |
except Exception as e:
|
|
|
|
|
120 |
return None, f"β Download error: {str(e)}"
|
121 |
|
122 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
def full_pipeline(url, cookies):
|
125 |
if not url:
|
126 |
return "β Enter a valid YouTube URL", "", ""
|
127 |
-
|
128 |
temp_cookie = save_uploaded_cookie(cookies)
|
129 |
-
|
130 |
audio_path, msg = download_audio_youtube(url, temp_cookie)
|
131 |
if not audio_path:
|
132 |
return msg, "", ""
|
133 |
-
|
134 |
transcript, tmsg = transcribe_audio(audio_path)
|
135 |
if "β" in transcript:
|
136 |
return msg, transcript, tmsg
|
137 |
-
|
138 |
stock_data = extract_stock_info_simple(transcript)
|
139 |
return "β
Complete", transcript, stock_data
|
140 |
|
141 |
-
|
142 |
# Gradio App
|
143 |
with gr.Blocks(title="π Stock Info Extractor from YouTube") as demo:
|
144 |
gr.Markdown("""
|
@@ -148,7 +168,7 @@ with gr.Blocks(title="π Stock Info Extractor from YouTube") as demo:
|
|
148 |
|
149 |
with gr.Row():
|
150 |
url_input = gr.Textbox(label="YouTube URL")
|
151 |
-
cookies_input = gr.File(label="cookies.txt (
|
152 |
|
153 |
run_btn = gr.Button("π Run Extraction")
|
154 |
status = gr.Textbox(label="Status")
|
|
|
1 |
+
# β
Combined YouTube Analyzer with Stock Info Extractor (fixed download using working app logic)
|
|
|
2 |
|
3 |
import gradio as gr
|
4 |
import os
|
|
|
28 |
|
29 |
def extract_stock_info_simple(text):
|
30 |
try:
|
|
|
31 |
companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
|
32 |
symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
|
33 |
prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
|
|
|
81 |
except Exception as e:
|
82 |
return "β Transcription failed", str(e)
|
83 |
|
84 |
+
# β
Reused working download logic from other app
|
85 |
|
86 |
def download_audio_youtube(url, cookies_file=None):
|
87 |
try:
|
88 |
temp_dir = tempfile.mkdtemp()
|
89 |
out_path = os.path.join(temp_dir, "audio")
|
90 |
+
|
91 |
+
ydl_opts = {
|
92 |
'format': 'bestaudio[ext=m4a]/bestaudio/best',
|
93 |
'outtmpl': out_path + '.%(ext)s',
|
94 |
'quiet': True,
|
95 |
'noplaylist': True,
|
|
|
96 |
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
|
97 |
'referer': 'https://www.youtube.com/',
|
98 |
'force_ipv4': True,
|
99 |
+
'extractor_retries': 3,
|
100 |
+
'fragment_retries': 3,
|
101 |
+
'retry_sleep_functions': {'http': lambda n: 2 ** n},
|
102 |
+
}
|
103 |
+
|
104 |
+
if cookies_file and os.path.exists(cookies_file):
|
105 |
+
ydl_opts['cookiefile'] = cookies_file
|
106 |
+
else:
|
107 |
+
print("β οΈ No cookies file provided")
|
108 |
+
|
109 |
+
ydl_opts['http_headers'] = {
|
110 |
+
'User-Agent': ydl_opts['user_agent'],
|
111 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
112 |
+
'Accept-Language': 'en-US,en;q=0.5',
|
113 |
+
'Accept-Encoding': 'gzip, deflate',
|
114 |
+
'DNT': '1',
|
115 |
+
'Connection': 'keep-alive',
|
116 |
+
'Upgrade-Insecure-Requests': '1',
|
117 |
+
'Referer': 'https://www.youtube.com/',
|
118 |
}
|
119 |
+
|
120 |
with YoutubeDL(ydl_opts) as ydl:
|
121 |
ydl.download([url])
|
122 |
+
|
123 |
for ext in ['.m4a', '.mp3', '.webm']:
|
124 |
full_path = out_path + ext
|
125 |
if os.path.exists(full_path):
|
126 |
return full_path, "β
Audio downloaded"
|
127 |
+
|
128 |
return None, "β Audio file not found"
|
129 |
+
|
130 |
except Exception as e:
|
131 |
+
import traceback
|
132 |
+
traceback.print_exc()
|
133 |
return None, f"β Download error: {str(e)}"
|
134 |
|
135 |
+
# Copy cookie to tmp
|
136 |
+
|
137 |
+
def save_uploaded_cookie(cookies):
|
138 |
+
if cookies is None:
|
139 |
+
return None
|
140 |
+
temp_cookie_path = tempfile.mktemp(suffix=".txt")
|
141 |
+
shutil.copy2(cookies.name, temp_cookie_path)
|
142 |
+
return temp_cookie_path
|
143 |
+
|
144 |
+
# Gradio app logic
|
145 |
|
146 |
def full_pipeline(url, cookies):
|
147 |
if not url:
|
148 |
return "β Enter a valid YouTube URL", "", ""
|
149 |
+
|
150 |
temp_cookie = save_uploaded_cookie(cookies)
|
|
|
151 |
audio_path, msg = download_audio_youtube(url, temp_cookie)
|
152 |
if not audio_path:
|
153 |
return msg, "", ""
|
154 |
+
|
155 |
transcript, tmsg = transcribe_audio(audio_path)
|
156 |
if "β" in transcript:
|
157 |
return msg, transcript, tmsg
|
158 |
+
|
159 |
stock_data = extract_stock_info_simple(transcript)
|
160 |
return "β
Complete", transcript, stock_data
|
161 |
|
|
|
162 |
# Gradio App
|
163 |
with gr.Blocks(title="π Stock Info Extractor from YouTube") as demo:
|
164 |
gr.Markdown("""
|
|
|
168 |
|
169 |
with gr.Row():
|
170 |
url_input = gr.Textbox(label="YouTube URL")
|
171 |
+
cookies_input = gr.File(label="cookies.txt (exported from YouTube tab)", file_types=[".txt"])
|
172 |
|
173 |
run_btn = gr.Button("π Run Extraction")
|
174 |
status = gr.Textbox(label="Status")
|