developer28 commited on
Commit
e364389
Β·
verified Β·
1 Parent(s): 5a12060

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +962 -195
app.py CHANGED
@@ -1,225 +1,992 @@
1
- import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import gradio as gr
 
 
3
  import tempfile
4
  import shutil
 
5
  import re
6
- import traceback
7
- from yt_dlp import YoutubeDL
8
-
9
- # Optional: use OpenAI Whisper if available
10
- try:
11
- import whisper
12
- WHISPER_AVAILABLE = True
13
- except:
14
- WHISPER_AVAILABLE = False
15
-
16
- # Download audio from YouTube
17
- def download_audio(url, cookies_path=None):
18
- try:
19
- temp_dir = tempfile.mkdtemp()
20
- output_path = os.path.join(temp_dir, "audio")
21
-
22
- ydl_opts = {
23
- 'format': 'bestaudio[ext=m4a]/bestaudio/best',
24
- 'outtmpl': output_path + '.%(ext)s',
25
- 'quiet': True,
26
- 'noplaylist': True,
27
- 'cookiefile': cookies_path if cookies_path else None,
28
- 'user_agent': 'Mozilla/5.0',
29
- 'referer': 'https://www.youtube.com/',
30
- 'force_ipv4': True,
31
- }
32
-
33
- with YoutubeDL(ydl_opts) as ydl:
34
- ydl.download([url])
35
 
36
- for ext in [".m4a", ".webm", ".mp3"]:
37
- final_path = output_path + ext
38
- if os.path.exists(final_path):
39
- return final_path, "βœ… Audio downloaded successfully"
40
 
41
- return None, "❌ Audio file not found"
42
 
43
- except Exception as e:
44
- traceback.print_exc()
45
- return None, f"❌ Download error: {str(e)}"
46
-
47
- # Transcribe using Whisper
48
- def transcribe_audio(path):
49
- if not WHISPER_AVAILABLE:
50
- return "❌ Whisper not available. Please install openai-whisper."
51
  try:
52
- model = whisper.load_model("tiny.en")
53
- result = model.transcribe(path)
54
- return result["text"]
55
- except Exception as e:
56
- traceback.print_exc()
57
- return f"❌ Transcription failed: {str(e)}"
58
-
59
- # Extract stock insights
60
- def extract_stock_info(text):
61
- try:
62
- companies = re.findall(r'\b[A-Z][a-z]+(?: [A-Z][a-z]+)*\b', text)
63
- symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
64
- prices = re.findall(r'\$\d+(?:\.\d{1,2})?', text)
65
- actions = re.findall(r'\b(buy|sell|hold|target|bullish|bearish|stop loss|accumulate|short|take profit|entry|exit)\b', text, re.IGNORECASE)
66
-
67
- result = "=== STOCK RECOMMENDATION ANALYSIS ===\n\n"
68
- if companies:
69
- result += f"🏒 Companies Mentioned: {', '.join(set(companies[:10]))}\n"
70
- if symbols:
71
- result += f"πŸ”  Symbols: {', '.join(set(symbols[:10]))}\n"
72
- if prices:
73
- result += f"πŸ’² Prices: {', '.join(set(prices[:10]))}\n"
74
- if actions:
75
- result += f"πŸ“Š Actions: {', '.join(set(actions[:10]))}\n"
76
-
77
- recommendations = []
78
- for line in text.split("."):
79
- if any(word in line.lower() for word in ['buy', 'sell', 'target', 'hold', 'accumulate', 'short', 'entry', 'exit']):
80
- recommendations.append(line.strip())
81
-
82
- if recommendations:
83
- result += "\n🎯 Potential Recommendations:\n"
84
- for r in recommendations[:5]:
85
- result += f"β€’ {r}\n"
86
-
87
- if not any([companies, symbols, prices, actions]):
88
- result += "\n⚠️ No stock-related insights detected."
89
-
 
 
 
 
 
 
 
 
 
 
 
 
90
  return result
91
 
92
  except Exception as e:
93
- return f"❌ Stock info extraction failed: {str(e)}"
94
-
95
- # Save cookies
96
- def save_cookies(file):
97
- if file is None:
98
  return None
99
 
100
- temp_path = tempfile.mktemp(suffix=".txt")
 
101
  try:
102
- if hasattr(file, "read"):
103
- with open(temp_path, "wb") as f:
104
- f.write(file.read())
105
- else:
106
- shutil.copy(file, temp_path)
107
- return temp_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  except Exception as e:
109
- print(f"❌ Failed to handle cookies.txt: {e}")
 
 
110
  return None
111
-
112
-
113
- # βœ… Trim audio to shorter length (2 minutes) for CPU speed
114
- import subprocess
115
-
116
- def trim_audio(input_path, output_path, duration_sec=120):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  try:
118
- command = [
119
- "ffmpeg", "-y", "-i", input_path,
120
- "-t", str(duration_sec), # duration in seconds
121
- "-c", "copy", output_path
122
- ]
123
- subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
124
- return output_path
 
 
 
 
 
 
 
125
  except Exception as e:
126
- print("❌ Error trimming audio:", e)
127
- return input_path
128
 
129
 
130
- # YouTube flow
131
- def run_pipeline(url, cookies_file, show_transcript):
132
  try:
133
- if not WHISPER_AVAILABLE:
134
- return "❌ Whisper not installed", ""
135
- if not url:
136
- return "❌ YouTube URL required", ""
137
-
138
- cookie_path = save_cookies(cookies_file)
139
- audio_path, status = download_audio(url, cookie_path)
140
- if not audio_path:
141
- return status, ""
142
-
143
- # ⏱ Trim audio to 2 minutes before transcription
144
- trimmed_path = tempfile.mktemp(suffix=".mp3")
145
- trim_audio(audio_path, trimmed_path)
146
-
147
- transcript = transcribe_audio(trimmed_path)
148
- if transcript.startswith("❌"):
149
- return transcript, ""
150
-
151
- stock_info = extract_stock_info(transcript)
152
- if show_transcript:
153
- return "βœ… Complete", f"πŸ“œ Transcript:\n\n{transcript}\n\n\n{stock_info}"
154
- else:
155
- return "βœ… Complete", stock_info
156
-
157
- except Exception as e:
158
- tb = traceback.format_exc()
159
- print(tb)
160
- return f"❌ Unhandled Error:\n{tb}", ""
161
 
 
 
 
162
 
163
- # Audio upload flow
164
- def run_pipeline_audio(audio_file, show_transcript):
165
- try:
166
- if not WHISPER_AVAILABLE:
167
- return "❌ Whisper not installed", ""
168
- if audio_file is None:
169
- return "❌ No audio file uploaded", ""
170
-
171
- # Save uploaded file
172
- temp_audio_path = tempfile.mktemp(suffix=os.path.splitext(str(audio_file))[-1])
173
- if hasattr(audio_file, "read"):
174
- with open(temp_audio_path, "wb") as f:
175
- f.write(audio_file.read())
176
- else:
177
- shutil.copy(str(audio_file), temp_audio_path)
178
 
179
- # ⏱ Trim audio to 2 minutes
180
- trimmed_path = tempfile.mktemp(suffix=".mp3")
181
- trim_audio(temp_audio_path, trimmed_path)
 
182
 
183
- transcript = transcribe_audio(trimmed_path)
184
- if transcript.startswith("❌"):
185
- return transcript, ""
186
 
187
- stock_info = extract_stock_info(transcript)
188
- if show_transcript:
189
- return "βœ… Complete", f"πŸ“œ Transcript:\n\n{transcript}\n\n\n{stock_info}"
190
- else:
191
- return "βœ… Complete", stock_info
192
 
193
  except Exception as e:
194
- tb = traceback.format_exc()
195
- print(tb)
196
- return f"❌ Unhandled Error:\n{tb}", ""
197
-
198
-
199
- # Gradio UI
200
- with gr.Blocks(title="Stock Insights from YouTube or Audio") as demo:
201
- gr.Markdown("""
202
- # πŸ“ˆ Extract Stock Recommendations from YouTube or Uploaded Audio
203
- Upload a YouTube video or audio file. We'll transcribe it using Whisper and extract stock insights.
204
- """)
205
-
206
- with gr.Tab("πŸ“Ί From YouTube Video"):
207
- with gr.Row():
208
- url_input = gr.Textbox(label="πŸŽ₯ YouTube URL")
209
- cookie_input = gr.File(label="cookies.txt (optional)", file_types=[".txt"])
210
- show_transcript_yt = gr.Checkbox(label="Show Transcript", value=False)
211
- yt_run_btn = gr.Button("πŸš€ Extract from YouTube")
212
- yt_status = gr.Textbox(label="Status")
213
- yt_result = gr.Textbox(label="Transcript & Stock Info", lines=15)
214
- yt_run_btn.click(fn=run_pipeline, inputs=[url_input, cookie_input, show_transcript_yt], outputs=[yt_status, yt_result])
215
-
216
- with gr.Tab("🎡 From Uploaded Audio"):
217
- audio_input = gr.File(label="Upload Audio File", file_types=[".mp3", ".wav", ".m4a", ".webm"])
218
- show_transcript_audio = gr.Checkbox(label="Show Transcript", value=False)
219
- audio_run_btn = gr.Button("πŸš€ Extract from Audio")
220
- audio_status = gr.Textbox(label="Status")
221
- audio_result = gr.Textbox(label="Transcript & Stock Info", lines=15)
222
- audio_run_btn.click(fn=run_pipeline_audio, inputs=[audio_input, show_transcript_audio], outputs=[audio_status, audio_result])
223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  if __name__ == "__main__":
225
- demo.launch(debug=True)
 
 
 
 
1
+ ef format_scene_breakdown(scenes):
2
+ rows = """
3
+ <table style='width:100%; border-collapse: collapse; background-color:#1a1a1a; color: #FFFFFF; border: 2px solid #FF8C00; font-size: 14px;box-shadow: 0 4px 8px rgba(0,0,0,0.3);'>
4
+ <tr style='background-color:#FF8C00; color: #000000;'>
5
+ <th style='padding: 8px; border: 1px solid #FF8C00;color: #000000;'>Timestamp</th>
6
+ <th style='padding: 8px; border: 1px solid #FF8C00;color: #000000;'> Description</th>
7
+ </tr>
8
+ """
9
+ pattern = re.compile(r"\*\*\[(.*?)\]\*\*:\s*(.*)")
10
+
11
+
12
+ for scene in scenes:
13
+ match = pattern.match(scene)
14
+ if match:
15
+ timestamp = match.group(1).strip()
16
+ description = match.group(2).strip()
17
+ rows += f"""
18
+ <tr style='background-color:#1a1a1a;'>
19
+ <td style='padding: 8px; border: 1px solid #444; color: #87CEEB; font-weight: bold;font-size: 12px;vertical-align: top;'>{timestamp}</td>
20
+ <td style='padding: 8px; border: 1px solid #444; color: #87CEEB; font-weight: bold;font-size: 12px;line-height: 1.4;'>{description}</td>
21
+ </tr>
22
+ """
23
+
24
+ rows += "</table>"
25
+ return rows
26
+
27
+
28
  import gradio as gr
29
+ import yt_dlp
30
+ import os
31
  import tempfile
32
  import shutil
33
+ from pathlib import Path
34
  import re
35
+ import uuid
36
+ import json
37
+ from datetime import datetime
38
+ import google.generativeai as genai
39
+ from xhtml2pdf import pisa
40
+ from io import BytesIO
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
 
 
 
 
42
 
43
+ cached_reports = {}
44
 
45
+ def generate_pdf_from_html(html_content):
46
+ """Generate a compact PDF that matches app appearance"""
 
 
 
 
 
 
47
  try:
48
+ pdf_html = f"""
49
+ <!DOCTYPE html>
50
+ <html>
51
+ <head>
52
+ <meta charset='UTF-8'>
53
+ <style>
54
+ @page {{
55
+ size: A4;
56
+ margin: 0.7cm;
57
+ }}
58
+ body {{
59
+ font-family: 'Segoe UI', sans-serif;
60
+ font-size: 9px;
61
+ line-height: 1.3;
62
+ color: #000;
63
+ background-color: #fff;
64
+ }}
65
+ table {{
66
+ width: 100%;
67
+ border-collapse: collapse;
68
+ font-size: 9px;
69
+ margin-bottom: 10px;
70
+ }}
71
+ th, td {{
72
+ border: 1px solid #ccc;
73
+ padding: 2px;
74
+ vertical-align: top;
75
+ line-height: 1.1;
76
+ }}
77
+ h1, h2, h3 {{
78
+ font-size: 11px;
79
+ background-color: #D3D3D3;
80
+ color: #000;
81
+ padding: 4px;
82
+ margin: 4px 0;
83
+ }}
84
+ </style>
85
+ </head>
86
+ <body>
87
+ {html_content}
88
+ </body>
89
+ </html>
90
+ """
91
+
92
+ result = BytesIO()
93
+ pisa_status = pisa.CreatePDF(pdf_html, dest=result)
94
+ if pisa_status.err:
95
+ print("❌ Pisa PDF creation error")
96
+ return None
97
+ result.seek(0)
98
  return result
99
 
100
  except Exception as e:
101
+ print(f"❌ PDF generation exception: {e}")
 
 
 
 
102
  return None
103
 
104
+ def generate_pdf_from_html_debug(html_content):
105
+ """Debug version to identify PDF generation issues"""
106
  try:
107
+ print("πŸ› Starting PDF generation...")
108
+ print(f"πŸ› HTML content length: {len(html_content)} characters")
109
+ print(f"πŸ› HTML preview: {html_content[:200]}...")
110
+
111
+ # Check if we have the required imports
112
+ try:
113
+ from xhtml2pdf import pisa
114
+ print("βœ… pisa import successful")
115
+ except ImportError as e:
116
+ print(f"❌ pisa import failed: {e}")
117
+ return None
118
+
119
+ try:
120
+ import re
121
+ print("βœ… re import successful")
122
+ except ImportError as e:
123
+ print(f"❌ re import failed: {e}")
124
+ return None
125
+
126
+ # Simple HTML cleanup (minimal for debugging)
127
+ simplified_html = html_content.replace(
128
+ "background: linear-gradient(135deg, #2d3748, #1a202c);",
129
+ "background-color: #ffffff;"
130
+ ).replace(
131
+ "color: #FFFFFF;",
132
+ "color: #000000;"
133
+ )
134
+
135
+ print("πŸ› HTML cleanup completed")
136
+
137
+ # Create a very simple PDF HTML
138
+ pdf_html = f"""
139
+ <!DOCTYPE html>
140
+ <html>
141
+ <head>
142
+ <meta charset="UTF-8">
143
+ <style>
144
+ body {{
145
+ font-family: Arial, sans-serif;
146
+ font-size: 12px;
147
+ color: #000000;
148
+ background-color: #ffffff;
149
+ }}
150
+ table {{ border-collapse: collapse; width: 100%; }}
151
+ th, td {{ border: 1px solid #ccc; padding: 4px; }}
152
+ th {{ background-color: #FF8C00; color: #000000; }}
153
+
154
+ </style>
155
+ </head>
156
+ <body>
157
+ {simplified_html}
158
+ </body>
159
+ </html>
160
+ """
161
+
162
+ print("πŸ› PDF HTML template created")
163
+
164
+ from io import BytesIO
165
+ result = BytesIO()
166
+
167
+ print("πŸ› Creating PDF with pisa...")
168
+ pisa_status = pisa.CreatePDF(pdf_html, dest=result)
169
+
170
+ print(f"πŸ› Pisa status - err: {pisa_status.err}")
171
+ print(f"πŸ› PDF buffer size: {len(result.getvalue())} bytes")
172
+
173
+ if pisa_status.err:
174
+ print(f"❌ PDF generation error: {pisa_status.err}")
175
+ return None
176
+
177
+ if len(result.getvalue()) == 0:
178
+ print("❌ PDF buffer is empty")
179
+ return None
180
+
181
+ result.seek(0)
182
+ print("βœ… PDF generation successful!")
183
+ return result
184
+
185
  except Exception as e:
186
+ print(f"❌ PDF generation exception: {e}")
187
+ import traceback
188
+ traceback.print_exc()
189
  return None
190
+
191
+ class YouTubeDownloader:
192
+ def __init__(self):
193
+ self.download_dir = tempfile.mkdtemp()
194
+ # Use temp directory for Gradio compatibility
195
+ self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")
196
+ # Also create user downloads folder for copying
197
+ self.downloads_folder = os.path.join(os.path.expanduser("~"), "Downloads", "YouTube_Downloads")
198
+ os.makedirs(self.downloads_folder, exist_ok=True)
199
+ self.gemini_model = None
200
+
201
+ def configure_gemini(self, api_key):
202
+ """Configure Gemini API with the provided key"""
203
+ try:
204
+ genai.configure(api_key=api_key)
205
+ self.gemini_model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest")
206
+ return True, "βœ… Gemini API configured successfully!"
207
+ except Exception as e:
208
+ return False, f"❌ Failed to configure Gemini API: {str(e)}"
209
+
210
+ def cleanup(self):
211
+ """Clean up temporary directories and files"""
212
+ try:
213
+ if hasattr(self, 'download_dir') and os.path.exists(self.download_dir):
214
+ shutil.rmtree(self.download_dir)
215
+ print(f"βœ… Cleaned up temporary directory: {self.download_dir}")
216
+ if hasattr(self, 'temp_downloads') and os.path.exists(self.temp_downloads):
217
+ shutil.rmtree(self.temp_downloads)
218
+ print(f"βœ… Cleaned up temp downloads directory: {self.temp_downloads}")
219
+ except Exception as e:
220
+ print(f"⚠️ Warning: Could not clean up temporary directory: {e}")
221
+
222
+ def is_valid_youtube_url(self, url):
223
+ youtube_regex = re.compile(
224
+ r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/'
225
+ r'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})'
226
+ )
227
+ return youtube_regex.match(url) is not None
228
+
229
+ def generate_scene_breakdown_gemini(self, video_info):
230
+ """Generate AI-powered scene breakdown using Gemini"""
231
+ if not self.gemini_model:
232
+ return self.generate_scene_breakdown_fallback(video_info)
233
+
234
+ try:
235
+ duration = video_info.get('duration', 0)
236
+ title = video_info.get('title', '')
237
+ description = video_info.get('description', '')[:1500] # Increased limit for better context
238
+
239
+ if not duration:
240
+ return ["**[Duration Unknown]**: Unable to generate timestamped breakdown - video duration not available"]
241
+
242
+ # Create enhanced prompt for Gemini
243
+ prompt = f"""
244
+ Analyze this YouTube video and create a detailed, scene-by-scene breakdown with precise timestamps and specific descriptions:
245
+
246
+ Title: {title}
247
+ Duration: {duration} seconds
248
+ Description: {description}
249
+
250
+ IMPORTANT INSTRUCTIONS:
251
+ 1. Create detailed scene descriptions that include:
252
+ - Physical appearance of people (age, gender, clothing, hair, etc.)
253
+ - Exact actions being performed
254
+ - Dialogue or speech (include actual lines if audible, or infer probable spoken lines based on actions and setting; format them as "Character: line...")
255
+ - Setting and environment details
256
+ - Props, objects, or products being shown
257
+ - Visual effects, text overlays, or graphics
258
+ - Mood, tone, and atmosphere
259
+ - Camera movements or angles (if apparent)
260
+ 2. Dialogue Emphasis:
261
+ - Include short dialogue lines in **every scene** wherever plausible.
262
+ - Write lines like: Character: "Actual or inferred line..."
263
+ - If dialogue is not available, intelligently infer probable phrases (e.g., "Welcome!", "Try this now!", "It feels amazing!").
264
+ - Do NOT skip dialogue unless it's clearly impossible.
265
+
266
+ 3. Timestamp Guidelines:
267
+ - For videos under 1 minute: 2-3 second segments
268
+ - For videos 1-5 minutes: 3-5 second segments
269
+ - For videos 5-15 minutes: 5-10 second segments
270
+ - For videos over 15 minutes: 10-15 second segments
271
+ - Maximum 20 scenes total for longer videos
272
+
273
+ 4. Format each scene EXACTLY like this:
274
+ **[MM:SS-MM:SS]**: Detailed description including who is visible, what they're wearing, what they're doing, what they're saying (if applicable), setting details, objects shown, and any visual elements.
275
+
276
+
277
+ 5. Write descriptions as if you're watching the video in real-time, noting everything visible and audible.
278
+
279
+ Based on the title and description, intelligently infer what would likely happen in each time segment. Consider the video type and create contextually appropriate, detailed descriptions.
280
+ """
281
+
282
+ response = self.gemini_model.generate_content(prompt)
283
+
284
+ # Parse the response into individual scenes
285
+ if response and response.text:
286
+ scenes = []
287
+ lines = response.text.split('\n')
288
+ current_scene = ""
289
+
290
+ for line in lines:
291
+ line = line.strip()
292
+ if line.strip().startswith("**[") and "]**:" in line:
293
+ # This is a new scene timestamp line
294
+ if current_scene:
295
+ scenes.append(current_scene.strip())
296
+ current_scene = line.strip()
297
+ elif current_scene:
298
+ # This is continuation of the current scene description
299
+ current_scene += "\n" + line.strip()
300
+
301
+ # Add the last scene if exists
302
+ if current_scene:
303
+ scenes.append(current_scene.strip())
304
+
305
+ return scenes if scenes else self.generate_scene_breakdown_fallback(video_info)
306
+ else:
307
+ return self.generate_scene_breakdown_fallback(video_info)
308
+
309
+ except Exception as e:
310
+ print(f"Gemini API error: {e}")
311
+ return self.generate_scene_breakdown_fallback(video_info)
312
+
313
+ def generate_scene_breakdown_fallback(self, video_info):
314
+ """Enhanced fallback scene generation when Gemini is not available"""
315
+ duration = video_info.get('duration', 0)
316
+ title = video_info.get('title', '').lower()
317
+ description = video_info.get('description', '').lower()
318
+ uploader = video_info.get('uploader', 'Content creator')
319
+
320
+ if not duration:
321
+ return ["**[Duration Unknown]**: Unable to generate timestamped breakdown"]
322
+
323
+ # Determine segment length based on duration
324
+ if duration <= 60:
325
+ segment_length = 3
326
+ elif duration <= 300:
327
+ segment_length = 5
328
+ elif duration <= 900:
329
+ segment_length = 10
330
+ else:
331
+ segment_length = 15
332
+
333
+ scenes = []
334
+ num_segments = min(duration // segment_length + 1, 20)
335
+
336
+ # Detect video type for better descriptions
337
+ video_type = self.detect_video_type_detailed(title, description)
338
+
339
+ for i in range(num_segments):
340
+ start_time = i * segment_length
341
+ end_time = min(start_time + segment_length - 1, duration)
342
+
343
+ start_formatted = f"{start_time//60}:{start_time%60:02d}"
344
+ end_formatted = f"{end_time//60}:{end_time%60:02d}"
345
+
346
+ # Generate contextual descriptions based on video type and timing
347
+ desc = self.generate_contextual_description(i, num_segments, video_type, uploader, title)
348
+
349
+ scenes.append(f"**[{start_formatted}-{end_formatted}]**: {desc}")
350
+
351
+ return scenes
352
+
353
+ def detect_video_type_detailed(self, title, description):
354
+ """Detect video type with more detail for better fallback descriptions"""
355
+ text = (title + " " + description).lower()
356
+
357
+ if any(word in text for word in ['tutorial', 'how to', 'guide', 'learn', 'diy', 'step by step']):
358
+ return 'tutorial'
359
+ elif any(word in text for word in ['review', 'unboxing', 'test', 'comparison', 'vs']):
360
+ return 'review'
361
+ elif any(word in text for word in ['vlog', 'daily', 'routine', 'day in', 'morning', 'skincare']):
362
+ return 'vlog'
363
+ elif any(word in text for word in ['music', 'song', 'cover', 'lyrics', 'dance']):
364
+ return 'music'
365
+ elif any(word in text for word in ['comedy', 'funny', 'prank', 'challenge', 'reaction']):
366
+ return 'entertainment'
367
+ elif any(word in text for word in ['news', 'breaking', 'update', 'report']):
368
+ return 'news'
369
+ elif any(word in text for word in ['cooking', 'recipe', 'food', 'kitchen']):
370
+ return 'cooking'
371
+ elif any(word in text for word in ['workout', 'fitness', 'exercise', 'yoga']):
372
+ return 'fitness'
373
+ else:
374
+ return 'general'
375
+
376
+ def generate_contextual_description(self, scene_index, total_scenes, video_type, uploader, title):
377
+ """Generate contextual descriptions based on video type and scene position"""
378
+
379
+ # Common elements
380
+ presenter_desc = f"The content creator"
381
+ if 'woman' in title.lower() or 'girl' in title.lower():
382
+ presenter_desc = "A woman"
383
+ elif 'man' in title.lower() or 'guy' in title.lower():
384
+ presenter_desc = "A man"
385
+
386
+ # Position-based descriptions
387
+ if scene_index == 0:
388
+ # Opening scene
389
+ if video_type == 'tutorial':
390
+ return f"{presenter_desc} appears on screen, likely introducing themselves and the topic. They may be in a well-lit indoor setting, wearing casual clothing, and addressing the camera directly with a welcoming gesture."
391
+ elif video_type == 'vlog':
392
+ return f"{presenter_desc} greets the camera with a smile, possibly waving. They appear to be in their usual filming location, wearing their typical style, and beginning their introduction to today's content."
393
+ elif video_type == 'review':
394
+ return f"{presenter_desc} introduces the product or topic they'll be reviewing, likely holding or displaying the item. The setting appears organized, possibly with the product prominently featured."
395
+ else:
396
+ return f"{presenter_desc} appears on screen to begin the video, introducing the topic with engaging body language and clear speech directed at the audience."
397
+
398
+ elif scene_index == total_scenes - 1:
399
+ # Closing scene
400
+ if video_type == 'tutorial':
401
+ return f"{presenter_desc} concludes the tutorial, possibly showing the final result. They may be thanking viewers, asking for engagement (likes/comments), and suggesting related content."
402
+ elif video_type == 'vlog':
403
+ return f"{presenter_desc} wraps up their vlog, possibly reflecting on the day's events. They appear relaxed and are likely saying goodbye to viewers with a friendly gesture."
404
+ else:
405
+ return f"{presenter_desc} concludes the video with final thoughts, thanking viewers for watching, and encouraging engagement through likes, comments, and subscriptions."
406
+
407
+ else:
408
+ # Middle scenes - content-specific
409
+ if video_type == 'tutorial':
410
+ step_num = scene_index
411
+ return f"{presenter_desc} demonstrates step {step_num} of the process, showing specific techniques and explaining the procedure. They may be using tools or materials, with close-up shots of their hands working."
412
+
413
+ elif video_type == 'review':
414
+ return f"{presenter_desc} examines different aspects of the product, pointing out features and sharing their opinions. They may be holding, using, or demonstrating the item while speaking to the camera."
415
+
416
+ elif video_type == 'vlog':
417
+ return f"{presenter_desc} continues sharing their experience, possibly showing different locations or activities. The scene captures candid moments with natural lighting and casual interactions."
418
+
419
+ elif video_type == 'cooking':
420
+ return f"{presenter_desc} works in the kitchen, preparing ingredients or cooking. They demonstrate techniques while explaining each step, with kitchen tools and ingredients visible on the counter."
421
+
422
+ elif video_type == 'fitness':
423
+ return f"{presenter_desc} demonstrates exercise movements, likely in workout attire in a gym or home setting. They show proper form while providing instruction and motivation."
424
+
425
+ else:
426
+ return f"{presenter_desc} continues with the main content, engaging with the audience through clear explanations and demonstrations. The setting remains consistent with good lighting and clear audio."
427
+
428
+ def detect_video_type(self, title, description):
429
+ """Detect video type based on title and description"""
430
+ text = (title + " " + description).lower()
431
+
432
+ if any(word in text for word in ['music', 'song', 'album', 'artist', 'band', 'lyrics']):
433
+ return "Music Video"
434
+ elif any(word in text for word in ['tutorial', 'how to', 'guide', 'learn', 'teaching']):
435
+ return "Tutorial/Educational"
436
+ elif any(word in text for word in ['funny', 'comedy', 'entertainment', 'vlog', 'challenge']):
437
+ return "Entertainment/Comedy"
438
+ elif any(word in text for word in ['news', 'breaking', 'report', 'update']):
439
+ return "News/Information"
440
+ elif any(word in text for word in ['review', 'unboxing', 'test', 'comparison']):
441
+ return "Review/Unboxing/Promotional"
442
+ elif any(word in text for word in ['commercial', 'ad', 'brand', 'product']):
443
+ return "Commercial/Advertisement"
444
+ else:
445
+ return "General Content"
446
+
447
+ def detect_background_music(self, video_info):
448
+ """Detect background music style"""
449
+ title = video_info.get('title', '').lower()
450
+ description = video_info.get('description', '').lower()
451
+
452
+ if any(word in title for word in ['music', 'song', 'soundtrack']):
453
+ return "Original Music/Soundtrack - Primary audio content"
454
+ elif any(word in title for word in ['commercial', 'ad', 'brand']):
455
+ return "Upbeat Commercial Music - Designed to enhance brand appeal"
456
+ elif any(word in title for word in ['tutorial', 'how to', 'guide']):
457
+ return "Minimal/No Background Music - Focus on instruction"
458
+ elif any(word in title for word in ['vlog', 'daily', 'life']):
459
+ return "Ambient Background Music - Complementary to narration"
460
+ else:
461
+ return "Background Music - Complementing video mood and pacing"
462
+
463
+ def detect_influencer_status(self, video_info):
464
+ """Detect influencer status"""
465
+ subscriber_count = video_info.get('channel_followers', 0)
466
+ view_count = video_info.get('view_count', 0)
467
+
468
+ if subscriber_count > 10000000:
469
+ return "Mega Influencer (10M+ subscribers)"
470
+ elif subscriber_count > 1000000:
471
+ return "Major Influencer (1M+ subscribers)"
472
+ elif subscriber_count > 100000:
473
+ return "Mid-tier Influencer (100K+ subscribers)"
474
+ elif subscriber_count > 10000:
475
+ return "Micro Influencer (10K+ subscribers)"
476
+ elif view_count > 100000:
477
+ return "Viral Content Creator"
478
+ else:
479
+ return "Regular Content Creator"
480
+
481
+ def format_number(self, num):
482
+ if num is None or num == 0:
483
+ return "0"
484
+ if num >= 1_000_000_000:
485
+ return f"{num/1_000_000_000:.1f}B"
486
+ elif num >= 1_000_000:
487
+ return f"{num/1_000_000:.1f}M"
488
+ elif num >= 1_000:
489
+ return f"{num/1_000:.1f}K"
490
+ return str(num)
491
+
492
+ def format_video_info(self, video_info):
493
+ """Compact video information formatting with tabular layout"""
494
+ if not video_info:
495
+ return "❌ No video information available."
496
+
497
+ # Basic information
498
+ title = video_info.get("title", "Unknown")
499
+ uploader = video_info.get("uploader", "Unknown")
500
+ duration = video_info.get("duration", 0)
501
+ duration_str = f"{duration//60}:{duration%60:02d}" if duration else "Unknown"
502
+ view_count = video_info.get("view_count", 0)
503
+ like_count = video_info.get("like_count", 0)
504
+ comment_count = video_info.get("comment_count", 0)
505
+ upload_date = video_info.get("upload_date", "Unknown")
506
+
507
+ # Format upload date
508
+ if len(upload_date) == 8:
509
+ formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}"
510
+ else:
511
+ formatted_date = upload_date
512
+
513
+ # Generate enhanced analysis
514
+ scene_descriptions = self.generate_scene_breakdown_gemini(video_info)
515
+ scene_table_html = format_scene_breakdown(scene_descriptions)
516
+ video_type = self.detect_video_type(title, video_info.get('description', ''))
517
+ background_music = self.detect_background_music(video_info)
518
+ influencer_status = self.detect_influencer_status(video_info)
519
+
520
+ # Calculate engagement metrics
521
+ engagement_rate = (like_count / view_count) * 100 if view_count > 0 else 0
522
+
523
+ # Generate compact report with contrasting background
524
+ report = f"""
525
+ <div style='font-family: "Roboto", "Segoe UI", "Open Sans", sans-serif; background-color: rgb(250, 250, 250); padding: 12px; border-radius: 6px; border: 1px solid #ddd;'>
526
+ <!-- TITLE -->
527
+ <div style='text-align: center; margin-bottom: 8px;'>
528
+ <h1 style='font-size: 16px; color: #FF6F00; margin-bottom: 3px; line-height: 1.2;'>{title}</h1>
529
+ <div style='height: 2px; background-color:rgb(211, 211, 211); width: 80px; margin: 0 auto;'></div>
530
+ </div>
531
+ <!-- INFO GRID (2 Columns) -->
532
+ <div style='display: grid; grid-template-columns: 1fr 1fr; gap: 8px; margin-bottom: 12px;'>
533
+ <!-- LEFT: BASIC INFO -->
534
+ <div style='background-color:rgb(211, 211, 211); border: 1px solid #CCC; border-left: 3px solid #FF6F00; border-radius: 4px; padding: 6px; max-width: 100%;'>
535
+ <h3 style='margin: 0 0 4px 0; font-size: 14px; background-color:#D3D3D3; color: #000000 !important; line-height: 1.2;'>Basic Information</h3>
536
+ <table style='width: 100%; font-size: 11px; color: #212121; border-spacing: 0;'>
537
+ <tr><td style='padding: 0px 1px;'><strong>Creator</strong></td><td style='padding: 1px 2px;'>{uploader[:20]}{'...' if len(uploader) > 20 else ''}</td></tr>
538
+ <tr><td style='padding: 0px 1px;'><strong>Date</strong></td><td style='padding: 1px 2px;'>{formatted_date}</td></tr>
539
+ <tr><td style='padding: 0px 1px;'><strong>Duration</strong></td><td style='padding: 1px 2px;'>{duration_str}</td></tr>
540
+ </table>
541
+ </div>
542
+ <!-- RIGHT: METRICS + ANALYSIS in a NESTED GRID -->
543
+ <div style='display: grid; grid-template-columns: 1fr 1fr; gap: 6px;'>
544
+ <!-- METRICS -->
545
+ <div style='background-color:rgb(211, 211, 211); border: 1px solid #CCC; border-left: 3px solid #FF6F00; border-radius: 4px; padding: 6px;'>
546
+ <h3 style='margin: 0 0 4px 0; font-size: 14px; color: #000000 !important; line-height: 1.2;'>Metrics</h3>
547
+ <table style='width: 100%; font-size: 11px; color: #212121;'>
548
+ <tr><td style='padding: 0px 1px;'><strong>Views</strong></td><td style='padding: 1px 2px;'>{self.format_number(view_count)}</td></tr>
549
+ <tr><td style='padding: 0px 1px;'><strong>Likes</strong></td><td style='padding: 1px 2px;'>{self.format_number(like_count)}</td></tr>
550
+ <tr><td style='padding: 0px 1px;'><strong>Comments</strong></td><td style='padding: 1px 2px;'>{self.format_number(comment_count)}</td></tr>
551
+ <tr><td style='padding: 0px 1px;'><strong>Engagement</strong></td><td style='padding: 1px 2px;'>{engagement_rate:.2f}%</td></tr>
552
+ </table>
553
+ </div>
554
+ <!-- ANALYSIS -->
555
+ <div style='background-color:rgb(211, 211, 211); border: 1px solid #CCC; border-left: 3px solid #FF6F00; border-radius: 4px; padding: 6px;'>
556
+ <h3 style='margin: 0 0 4px 0; font-size: 14px; color: #000000 !important; line-height: 1.2;'>Analysis</h3>
557
+ <table style='width: 100%; font-size: 11px; color: #212121;'>
558
+ <tr><td style='padding: 0px 1px;'><strong>Type</strong></td><td style='padding: 1px 2px;'>{video_type[:15]}{'...' if len(video_type) > 15 else ''}</td></tr>
559
+ <tr><td style='padding: 0px 1px;'><strong>Music</strong></td><td style='padding: 1px 2px;'>{background_music[:25]}{'...' if len(background_music) > 25 else ''}</td></tr>
560
+ <tr><td style='padding: 0px 1px;'><strong>Status</strong></td><td style='padding: 1px 2px;'>{influencer_status[:25]}{'...' if len(influencer_status) > 25 else ''}</td></tr>
561
+ </table>
562
+ </div>
563
+ </div>
564
+ </div>
565
+ <!-- SCENE-BY-SCENE SECTION (FULL WIDTH) -->
566
+ <div style='background-color:rgb(211, 211, 211); border: 1px solid #CCC; border-radius: 4px; padding: 8px;'>
567
+ <h3 style='text-align: center; font-size: 16px; color: #000000 !important; margin-bottom: 6px; line-height: 1.2;'> Scene-by-Scene Breakdown</h3>
568
+ {scene_table_html}
569
+ </div>
570
+ </div>
571
+ """
572
+ return report.strip()
573
+
574
+ def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
575
+ """Extract video information"""
576
+ if not url or not url.strip():
577
+ return None, "❌ Please enter a YouTube URL"
578
+
579
+ if not self.is_valid_youtube_url(url):
580
+ return None, "❌ Invalid YouTube URL format"
581
+
582
+ try:
583
+ progress(0.1, desc="Initializing YouTube extractor...")
584
+
585
+ ydl_opts = {
586
+ 'noplaylist': True,
587
+ 'extract_flat': False,
588
+ }
589
+
590
+ if cookiefile and os.path.exists(cookiefile):
591
+ ydl_opts['cookiefile'] = cookiefile
592
+ else:
593
+ print(f"⚠️ Cookie file not provided or not found: {cookiefile}")
594
+
595
+ # πŸ§ͺ Log yt_dlp options
596
+ print("πŸ” yt_dlp options:")
597
+ print(json.dumps(ydl_opts, indent=2))
598
+
599
+ progress(0.5, desc="Extracting video metadata...")
600
+
601
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
602
+ info = ydl.extract_info(url, download=False)
603
+
604
+ progress(1.0, desc="βœ… Analysis complete!")
605
+
606
+ return info, "βœ… Video information extracted successfully"
607
+
608
+ except Exception as e:
609
+ error_msg = str(e)
610
+ print(f"❌ yt_dlp extraction error: {error_msg}")
611
+
612
+ if "Video unavailable" in error_msg or "This content isn’t available" in error_msg:
613
+ return None, (
614
+ "❌ This video is unavailable or restricted. "
615
+ "Please check if it's private, deleted, age-restricted, or try again with a valid cookies.txt file."
616
+ )
617
+ elif "cookies" in error_msg.lower():
618
+ return None, f"❌ Error: {str(e)}"
619
+
620
+
621
+ def download_video(self, url, quality="best", audio_only=False, progress=gr.Progress(), cookiefile=None):
622
+ """Download video with progress tracking"""
623
+ if not url or not url.strip():
624
+ return None, "❌ Please enter a YouTube URL"
625
+
626
+ if not self.is_valid_youtube_url(url):
627
+ return None, "❌ Invalid YouTube URL format"
628
+
629
+ try:
630
+ progress(0.1, desc="Preparing download...")
631
+
632
+ # Create unique filename
633
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
634
+
635
+ # Download to temp directory first (Gradio compatible)
636
+ ydl_opts = {
637
+ 'outtmpl': os.path.join(self.temp_downloads, f'%(title)s_{timestamp}.%(ext)s'),
638
+ 'noplaylist': True,
639
+ }
640
+
641
+ if audio_only:
642
+ ydl_opts['format'] = 'bestaudio/best'
643
+ ydl_opts['postprocessors'] = [{
644
+ 'key': 'FFmpegExtractAudio',
645
+ 'preferredcodec': 'mp3',
646
+ 'preferredquality': '192',
647
+ }]
648
+ else:
649
+ if quality == "best":
650
+ ydl_opts['format'] = 'best[height<=1080]'
651
+ elif quality == "720p":
652
+ ydl_opts['format'] = 'best[height<=720]'
653
+ elif quality == "480p":
654
+ ydl_opts['format'] = 'best[height<=480]'
655
+ else:
656
+ ydl_opts['format'] = 'best'
657
+
658
+ if cookiefile and os.path.exists(cookiefile):
659
+ ydl_opts['cookiefile'] = cookiefile
660
+
661
+ # Progress hook
662
+ def progress_hook(d):
663
+ if d['status'] == 'downloading':
664
+ if 'total_bytes' in d:
665
+ percent = (d['downloaded_bytes'] / d['total_bytes']) * 100
666
+ progress(0.1 + (percent / 100) * 0.7, desc=f"Downloading... {percent:.1f}%")
667
+ else:
668
+ progress(0.5, desc="Downloading...")
669
+ elif d['status'] == 'finished':
670
+ progress(0.8, desc="Processing download...")
671
+
672
+ ydl_opts['progress_hooks'] = [progress_hook]
673
+
674
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
675
+ info = ydl.extract_info(url, download=True)
676
+
677
+ progress(0.9, desc="Copying to Downloads folder...")
678
+
679
+ # Find the downloaded file in temp directory
680
+ downloaded_file_temp = None
681
+
682
+ for file in os.listdir(self.temp_downloads):
683
+ if timestamp in file:
684
+ downloaded_file_temp = os.path.join(self.temp_downloads, file)
685
+ break
686
+
687
+ if not downloaded_file_temp:
688
+ return None, "❌ Downloaded file not found in temp directory"
689
+
690
+ # Copy to user's Downloads folder
691
+ final_filename = os.path.basename(downloaded_file_temp)
692
+ final_path = os.path.join(self.downloads_folder, final_filename)
693
+
694
+ try:
695
+ shutil.copy2(downloaded_file_temp, final_path)
696
+ copy_success = True
697
+ except Exception as e:
698
+ print(f"Warning: Could not copy to Downloads folder: {e}")
699
+ copy_success = False
700
+ final_path = "File downloaded to temp location only"
701
+
702
+ progress(1.0, desc="βœ… Download complete!")
703
+
704
+ success_msg = f"""βœ… Download successful!
705
+ πŸ“ Temp file (for download): {os.path.basename(downloaded_file_temp)}
706
+ πŸ“ Permanent location: {final_path if copy_success else 'Copy failed'}
707
+ 🎯 File size: {os.path.getsize(downloaded_file_temp) / (1024*1024):.1f} MB"""
708
+
709
+ return downloaded_file_temp, success_msg
710
+
711
+ except Exception as e:
712
+ return None, f"❌ Download failed: {str(e)}"
713
+
714
+ # Initialize global downloader
715
+ downloader = YouTubeDownloader()
716
+
717
+ def configure_api_key(api_key):
718
+ """Configure Gemini API key"""
719
+ if not api_key or not api_key.strip():
720
+ return "❌ Please enter a valid Google API key", gr.update(visible=False)
721
+
722
+ success, message = downloader.configure_gemini(api_key.strip())
723
+
724
+ if success:
725
+ return message, gr.update(visible=True)
726
+ else:
727
+ return message, gr.update(visible=False)
728
+
729
+ def analyze_with_cookies(url, cookies_file, progress=gr.Progress()):
730
+ """Main analysis function"""
731
  try:
732
+ progress(0.05, desc="Starting analysis...")
733
+
734
+ cookiefile = cookies_file if cookies_file and os.path.exists(cookies_file) else None
735
+ info, msg = downloader.get_video_info(url, progress=progress, cookiefile=cookiefile)
736
+
737
+ if info:
738
+ progress(0.95, desc="Generating comprehensive report...")
739
+ formatted_info = downloader.format_video_info(info)
740
+ cached_reports[url] = formatted_info # Cache the result
741
+ progress(1.0, desc="βœ… Complete!")
742
+ return formatted_info
743
+ else:
744
+ return f"❌ Analysis Failed: {msg}"
745
+
746
  except Exception as e:
747
+ return f"❌ System Error: {str(e)}"
 
748
 
749
 
750
+ def analyze_and_generate_pdf(url, cookies_file, progress=None):
751
+ """Generate PDF from cached HTML only"""
752
  try:
753
+ if progress: progress(0.1, desc="Checking cached analysis...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
754
 
755
+ if url not in cached_reports:
756
+ print("❌ No cached report found.")
757
+ return None
758
 
759
+ report_html = cached_reports[url]
760
+ if progress: progress(0.8, desc="Generating PDF...")
 
 
 
 
 
 
 
 
 
 
 
 
 
761
 
762
+ pdf_buffer = generate_pdf_from_html(report_html)
763
+ if pdf_buffer is None:
764
+ print("❌ PDF buffer is empty.")
765
+ return None
766
 
767
+ pdf_path = os.path.join(tempfile.gettempdir(), f"analysis_report_{uuid.uuid4().hex}.pdf")
768
+ with open(pdf_path, "wb") as f:
769
+ f.write(pdf_buffer.read())
770
 
771
+ if progress: progress(1.0, desc="βœ… PDF ready!")
772
+ return pdf_path
 
 
 
773
 
774
  except Exception as e:
775
+ print(f"❌ Error during PDF generation: {e}")
776
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
777
 
778
+ def download_with_cookies(url, quality, audio_only, cookies_file, progress=gr.Progress()):
779
+ """Main download function"""
780
+ try:
781
+ progress(0.05, desc="Preparing download...")
782
+
783
+ cookiefile = None
784
+ if cookies_file and os.path.exists(cookies_file):
785
+ cookiefile = cookies_file
786
+
787
+ file_path, msg = downloader.download_video(url, quality, audio_only, progress=progress, cookiefile=cookiefile)
788
+
789
+ if file_path:
790
+ return file_path, msg
791
+ else:
792
+ return None, msg
793
+
794
+ except Exception as e:
795
+ return None, f"❌ System Error: {str(e)}"
796
+
797
+ def create_interface():
798
+ """Create and configure the Gradio interface"""
799
+ with gr.Blocks(
800
+ css="""
801
+ /* Main dark theme background and text */
802
+ .gradio-container, .app, body {
803
+ background-color: #1a1a1a !important;
804
+ color: #87CEEB !important;
805
+ font-weight: bold !important;
806
+ }
807
+ /* πŸ”΅ Dark blue overrides for key labels */
808
+ h3, .gr-group h3, .gradio-container h3 {
809
+ color: #87CEEB !important;
810
+ }
811
+ label, .gr-textbox label, .gr-file label, .gr-dropdown label, .gr-checkbox label {
812
+ color: #00008B !important;
813
+ font-weight: bold !important;
814
+ }
815
+ .gr-file .file-name {
816
+ color: #00008B !important;
817
+ font-weight: bold !important;
818
+ }
819
+ /* Make tab labels dark blue too */
820
+ .gr-tab-nav button {
821
+ color: #00008B !important;
822
+ }
823
+ .gr-tab-nav button.selected {
824
+ background-color: #FF8C00 !important;
825
+ color: #000000 !important;
826
+ }
827
+ /* Light blue text for API status */
828
+ .light-blue-text textarea {
829
+ color: #87CEEB !important;
830
+ background-color: #2a2a2a !important;
831
+ }
832
+ .gr-file {
833
+ background-color: #2a2a2a !important;
834
+ border: 2px dashed #444 !important;
835
+ }
836
+ .gr-group, .gr-form, .gr-row {
837
+ background-color: #1a1a1a !important;
838
+ border: 1px solid #444 !important;
839
+ border-radius: 10px;
840
+ padding: 15px;
841
+ }
842
+ """,
843
+ theme=gr.themes.Soft(),
844
+ title="πŸ“Š YouTube Video Analyzer & Downloader"
845
+ ) as demo:
846
+
847
+ # API Key Configuration Section
848
+ with gr.Group():
849
+ gr.HTML("<h3>πŸ”‘ Google Gemini API Configuration</h3>")
850
+ with gr.Row():
851
+ api_key_input = gr.Textbox(
852
+ label="πŸ”‘ Google API Key",
853
+ placeholder="Enter your Google API Key for enhanced AI analysis...",
854
+ type="password",
855
+ value=""
856
+ )
857
+ configure_btn = gr.Button("πŸ”§ Configure API", variant="secondary")
858
+
859
+ api_status = gr.Textbox(
860
+ label="API Status",
861
+ value="❌ Gemini API not configured - Using fallback analysis",
862
+ interactive=False,
863
+ lines=1,
864
+ elem_classes="light-blue-text"
865
+ )
866
+
867
+ # Main Interface (initially hidden until API is configured)
868
+ main_interface = gr.Group(visible=False)
869
+
870
+ with main_interface:
871
+ with gr.Row():
872
+ url_input = gr.Textbox(
873
+ label="πŸ”— YouTube URL",
874
+ placeholder="Paste your YouTube video URL here...",
875
+ value=""
876
+ )
877
+
878
+ cookies_input = gr.File(
879
+ label="πŸͺ Upload cookies.txt (Mandatory)",
880
+ file_types=[".txt"],
881
+ type="filepath"
882
+ )
883
+
884
+ with gr.Tabs():
885
+ with gr.TabItem("πŸ“Š Video Analysis"):
886
+ analyze_btn = gr.Button("πŸ” Analyze Video", variant="primary")
887
+
888
+ analysis_output = gr.HTML(
889
+ label="πŸ“Š Analysis Report",
890
+ )
891
+ download_pdf_btn = gr.Button("πŸ“„ Download Report as PDF", variant="secondary")
892
+ pdf_file_output = gr.File(label="πŸ“₯ PDF Report", visible=True,interactive=False)
893
+
894
+ analyze_btn.click(
895
+ fn=analyze_with_cookies,
896
+ inputs=[url_input, cookies_input],
897
+ outputs=analysis_output,
898
+ show_progress=True
899
+ )
900
+ download_pdf_btn.click(
901
+ fn=analyze_and_generate_pdf,
902
+ inputs=[url_input, cookies_input],
903
+ outputs=pdf_file_output,
904
+ show_progress=True
905
+ )
906
+
907
+
908
+ with gr.TabItem("⬇️ Video Download"):
909
+ with gr.Row():
910
+ quality_dropdown = gr.Dropdown(
911
+ choices=["best", "720p", "480p"],
912
+ value="best",
913
+ label="πŸ“Ί Video Quality"
914
+ )
915
+
916
+ audio_only_checkbox = gr.Checkbox(
917
+ label="🎡 Audio Only (MP3)",
918
+ value=False
919
+ )
920
+
921
+ download_btn = gr.Button("⬇️ Download Video", variant="primary")
922
+
923
+ download_status = gr.Textbox(
924
+ label="πŸ“₯ Download Status",
925
+ lines=5,
926
+ show_copy_button=True
927
+ )
928
+
929
+ download_file = gr.File(
930
+ label="πŸ“ Downloaded File",
931
+ visible=False
932
+ )
933
+
934
+ def download_and_update(url, quality, audio_only, cookies_file, progress=gr.Progress()):
935
+ file_path, status = download_with_cookies(url, quality, audio_only, cookies_file, progress)
936
+ if file_path and os.path.exists(file_path):
937
+ return status, gr.update(value=file_path, visible=True)
938
+ else:
939
+ return status, gr.update(visible=False)
940
+
941
+ download_btn.click(
942
+ fn=download_and_update,
943
+ inputs=[url_input, quality_dropdown, audio_only_checkbox, cookies_input],
944
+ outputs=[download_status, download_file],
945
+ show_progress=True
946
+ )
947
+
948
+ # Configure API key button action
949
+ configure_btn.click(
950
+ fn=configure_api_key,
951
+ inputs=[api_key_input],
952
+ outputs=[api_status, main_interface]
953
+ )
954
+
955
+ # Always show interface option (for fallback mode)
956
+ with gr.Row():
957
+ show_interface_btn = gr.Button("πŸš€ Use Without Gemini API (Fallback Mode)", variant="secondary")
958
+
959
+ def show_fallback_interface():
960
+ return "⚠️ Using fallback analysis mode", gr.update(visible=True)
961
+
962
+ show_interface_btn.click(
963
+ fn=show_fallback_interface,
964
+ outputs=[api_status, main_interface]
965
+ )
966
+
967
+ gr.HTML("""
968
+ <div style="margin-top: 20px; padding: 15px; background-color: #2a2a2a; border-radius: 10px; border-left: 5px solid #FF8C00; color: #87CEEB !important;">
969
+ <h3 style="color: #87CEEB !important; font-weight: bold;">πŸ”‘ How to Get Google API Key:</h3>
970
+ <ol style="color: #87CEEB !important; font-weight: bold;">
971
+ <li style="color: #87CEEB !important;">Go to <a href="https://console.cloud.google.com/" target="_blank" style="color: #87CEEB !important;">Google Cloud Console</a></li>
972
+ <li style="color: #87CEEB !important;">Create a new project or select an existing one</li>
973
+ <li style="color: #87CEEB !important;">Enable the "Generative Language API"</li>
974
+ <li style="color: #87CEEB !important;">Go to "Credentials" and create an API key</li>
975
+ <li style="color: #87CEEB !important;">Copy the API key and paste it above</li>
976
+ </ol>
977
+ <h3 style="color: #87CEEB !important; font-weight: bold;">✨ Benefits of using Gemini API:</h3>
978
+ <ul style="color: #87CEEB !important; font-weight: bold;">
979
+ <li style="color: #87CEEB !important;">πŸ€– AI-powered scene descriptions with contextual understanding</li>
980
+ <li style="color: #87CEEB !important;">🎯 More accurate content type detection</li>
981
+ <li style="color: #87CEEB !important;">πŸ“Š Enhanced analysis based on video content</li>
982
+ <li style="color: #87CEEB !important;">⏰ Intelligent timestamp segmentation</li>
983
+ </ul>
984
+ </div>
985
+ """)
986
+
987
+ return demo
988
  if __name__ == "__main__":
989
+ demo = create_interface()
990
+ import atexit
991
+ atexit.register(downloader.cleanup)
992
+ demo.launch(debug=True, show_error=True)