developer28 commited on
Commit
0921abd
Β·
verified Β·
1 Parent(s): 16e2e72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +297 -1124
app.py CHANGED
@@ -1,1167 +1,340 @@
1
- def format_scene_breakdown(scenes):
2
- rows = """
3
- <table style='width:100%; border-collapse: collapse; background-color:#1a1a1a; color: #FFFFFF; border: 2px solid #FF8C00; font-size: 16px;box-shadow: 0 4px 8px rgba(0,0,0,0.3);'>
4
- <tr style='background-color:#FF8C00; color: #000000;'>
5
- <th style='padding: 8px; border: 1px solid #FF8C00; color: #000000; font-weight: bold;'>⏱️ Timestamp</th>
6
- <th style='padding: 8px; border: 1px solid # #FF8C00; color: #000000; font-weight: bold;'>πŸ“ Description</th>
7
- </tr>
8
- """
9
- pattern = re.compile(r"\*\*\[(.*?)\]\*\*:\s*(.*)")
10
-
11
-
12
- for scene in scenes:
13
- match = pattern.match(scene)
14
- if match:
15
- timestamp = match.group(1).strip()
16
- description = match.group(2).strip()
17
- rows += f"""
18
- <tr style='background-color:#1a1a1a;'>
19
- <td style='padding: 8px; border: 1px solid #444; color: #87CEEB; font-weight: bold;font-size: 16px;vertical-align: top;'>{timestamp}</td>
20
- <td style='padding: 8px; border: 1px solid #444; color: #87CEEB; font-weight: bold;font-size: 16px;line-height: 1.4;'>{description}</td>
21
- </tr>
22
- """
23
-
24
- rows += "</table>"
25
- return rows
26
-
27
-
28
- import gradio as gr
29
- import yt_dlp
30
  import os
31
  import tempfile
32
- import shutil
33
- from pathlib import Path
34
  import re
35
- import uuid
36
- import json
37
- from datetime import datetime
38
- import google.generativeai as genai
39
- from xhtml2pdf import pisa
40
- from io import BytesIO
41
-
42
 
43
- def generate_pdf_from_html(html_content):
44
- """Generate PDF with simplified HTML that works better with xhtml2pdf"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  try:
46
- # Create a simplified version of the HTML for PDF generation
47
- # Remove complex CSS that xhtml2pdf can't handle
48
- simplified_html = html_content.replace(
49
- "background: linear-gradient(135deg, #2d3748, #1a202c);",
50
- "background-color: #f5f5f5;"
51
- ).replace(
52
- "background: linear-gradient(90deg, #FF8C00, #87CEEB);",
53
- "background-color: #FF8C00;"
54
- ).replace(
55
- "rgba(135, 206, 235, 0.1)",
56
- "#f9f9f9"
57
- ).replace(
58
- "rgba(0, 0, 0, 0.3)",
59
- "#ffffff"
60
- ).replace(
61
- "text-shadow: 2px 2px 4px rgba(0,0,0,0.5);",
62
- ""
63
- ).replace(
64
- "box-shadow: 0 8px 32px rgba(255, 140, 0, 0.3);",
65
- ""
66
- ).replace(
67
- "box-shadow: 0 4px 8px rgba(0,0,0,0.3);",
68
- ""
69
- ).replace(
70
- "display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 15px;",
71
- "display: block;"
72
- ).replace(
73
- "background-color:#1a1a1a;",
74
- "background-color:#ffffff;"
75
- ).replace(
76
- "color: #FFFFFF;",
77
- "color: #000000;"
78
- ).replace(
79
- "background-color:#FF8C00; color: #000000;",
80
- "background-color:#FF8C00; color: #000000;"
81
- ).replace(
82
- "color: #87CEEB;",
83
- "color: #000080;"
84
- ).replace(
85
- "border: 2px solid #FF8C00;",
86
- "border: 1px solid #FF8C00;"
87
- )
88
-
89
- # Remove table styling that causes issues
90
- simplified_html = re.sub(r"style='[^']*background-color:#1a1a1a[^']*'", "style='background-color:#ffffff;'", simplified_html)
91
- simplified_html = re.sub(r"style='[^']*color: #87CEEB[^']*'", "style='color: #000080; padding: 8px;'", simplified_html)
92
-
93
- # Wrap in a complete HTML document with PDF-friendly CSS
94
- pdf_html = f"""
95
- <!DOCTYPE html>
96
- <html>
97
- <head>
98
- <meta charset="UTF-8">
99
- <style>
100
- @page {{
101
- size: A4;
102
- margin: 1cm;
103
- }}
104
- body {{
105
- font-family: Arial, sans-serif;
106
- font-size: 12px;
107
- line-height: 1.4;
108
- color: #000000;
109
- background-color: #ffffff;
110
- }}
111
- .report-container {{
112
- background-color: #ffffff;
113
- padding: 15px;
114
- border: 2px solid #FF8C00;
115
- border-radius: 8px;
116
- }}
117
- .header {{
118
- text-align: center;
119
- color: #FF8C00;
120
- font-size: 20px;
121
- font-weight: bold;
122
- margin-bottom: 15px;
123
- border-bottom: 2px solid #FF8C00;
124
- padding-bottom: 8px;
125
- }}
126
- .info-card {{
127
- background-color: #f9f9f9;
128
- padding: 12px;
129
- margin: 8px 0;
130
- border-left: 3px solid #87CEEB;
131
- border-radius: 4px;
132
- page-break-inside: avoid;
133
- }}
134
- .info-title {{
135
- color: #000080;
136
- font-size: 14px;
137
- font-weight: bold;
138
- margin-bottom: 8px;
139
- }}
140
- table {{
141
- width: 100%;
142
- border-collapse: collapse;
143
- margin: 8px 0;
144
- page-break-inside: avoid;
145
- }}
146
- th, td {{
147
- padding: 6px 8px;
148
- border: 1px solid #cccccc;
149
- text-align: left;
150
- vertical-align: top;
151
- font-size: 11px;
152
- }}
153
- th {{
154
- background-color: #FF8C00;
155
- color: #000000;
156
- font-weight: bold;
157
- }}
158
- tr:nth-child(even) {{
159
- background-color: #f9f9f9;
160
- }}
161
- .scene-table {{
162
- margin-top: 15px;
163
- }}
164
- .scene-header {{
165
- color: #000080;
166
- font-size: 16px;
167
- font-weight: bold;
168
- text-align: center;
169
- margin-bottom: 10px;
170
- }}
171
- div[style*="display: grid"] {{
172
- display: block !important;
173
- }}
174
- div[style*="grid-template-columns"] > div {{
175
- display: block !important;
176
- margin-bottom: 10px !important;
177
- width: 100% !important;
178
- }}
179
- </style>
180
- </head>
181
- <body>
182
- <div class="report-container">
183
- {simplified_html}
184
- </div>
185
- </body>
186
- </html>
187
- """
188
 
189
- result = BytesIO()
190
- pisa_status = pisa.CreatePDF(pdf_html, dest=result)
191
- print("PDF buffer length:", len(result.getvalue()))
 
192
 
193
- if pisa_status.err:
194
- print(f"PDF generation error: {pisa_status.err}")
195
- return None
196
 
197
- result.seek(0)
198
- return result
199
-
200
- except Exception as e:
201
- print(f"PDF generation exception: {e}")
202
- return None
203
-
204
- class YouTubeDownloader:
205
- def __init__(self):
206
- self.download_dir = tempfile.mkdtemp()
207
- # Use temp directory for Gradio compatibility
208
- self.temp_downloads = tempfile.mkdtemp(prefix="youtube_downloads_")
209
- # Also create user downloads folder for copying
210
- self.downloads_folder = os.path.join(os.path.expanduser("~"), "Downloads", "YouTube_Downloads")
211
- os.makedirs(self.downloads_folder, exist_ok=True)
212
- self.gemini_model = None
213
-
214
- def configure_gemini(self, api_key):
215
- """Configure Gemini API with the provided key"""
216
- try:
217
- genai.configure(api_key=api_key)
218
- self.gemini_model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest")
219
- return True, "βœ… Gemini API configured successfully!"
220
- except Exception as e:
221
- return False, f"❌ Failed to configure Gemini API: {str(e)}"
222
-
223
- def cleanup(self):
224
- """Clean up temporary directories and files"""
225
- try:
226
- if hasattr(self, 'download_dir') and os.path.exists(self.download_dir):
227
- shutil.rmtree(self.download_dir)
228
- print(f"βœ… Cleaned up temporary directory: {self.download_dir}")
229
- if hasattr(self, 'temp_downloads') and os.path.exists(self.temp_downloads):
230
- shutil.rmtree(self.temp_downloads)
231
- print(f"βœ… Cleaned up temp downloads directory: {self.temp_downloads}")
232
- except Exception as e:
233
- print(f"⚠️ Warning: Could not clean up temporary directory: {e}")
234
-
235
- def is_valid_youtube_url(self, url):
236
- youtube_regex = re.compile(
237
- r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/'
238
- r'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})'
239
- )
240
- return youtube_regex.match(url) is not None
241
-
242
- def generate_scene_breakdown_gemini(self, video_info):
243
- """Generate AI-powered scene breakdown using Gemini"""
244
- if not self.gemini_model:
245
- return self.generate_scene_breakdown_fallback(video_info)
246
-
247
- try:
248
- duration = video_info.get('duration', 0)
249
- title = video_info.get('title', '')
250
- description = video_info.get('description', '')[:1500] # Increased limit for better context
251
-
252
- if not duration:
253
- return ["**[Duration Unknown]**: Unable to generate timestamped breakdown - video duration not available"]
254
-
255
- # Create enhanced prompt for Gemini
256
- prompt = f"""
257
- Analyze this YouTube video and create a highly detailed, scene-by-scene breakdown with precise timestamps and specific descriptions:
258
-
259
- Title: {title}
260
- Duration: {duration} seconds
261
- Description: {description}
262
-
263
- IMPORTANT INSTRUCTIONS:
264
- 1. Create detailed scene descriptions that include:
265
- - Physical appearance of people (age, gender, clothing, hair, etc.)
266
- - Exact actions being performed
267
- - Dialogue or speech (include actual lines if audible, or infer probable spoken lines based on actions and setting; format them as "Character: line...")
268
- - Setting and environment details
269
- - Props, objects, or products being shown
270
- - Visual effects, text overlays, or graphics
271
- - Mood, tone, and atmosphere
272
- - Camera movements or angles (if apparent)
273
- 2. Dialogue Emphasis:
274
- - Include short dialogue lines in **every scene** wherever plausible.
275
- - Write lines like: Character: "Actual or inferred line..."
276
- - If dialogue is not available, intelligently infer probable phrases (e.g., "Welcome!", "Try this now!", "It feels amazing!").
277
- - Do NOT skip dialogue unless it's clearly impossible.
278
 
279
- 3. Timestamp Guidelines:
280
- - For videos under 1 minute: 2-3 second segments
281
- - For videos 1-5 minutes: 3-5 second segments
282
- - For videos 5-15 minutes: 5-10 second segments
283
- - For videos over 15 minutes: 10-15 second segments
284
- - Maximum 20 scenes total for longer videos
285
-
286
- 4. Format each scene EXACTLY like this:
287
- **[MM:SS-MM:SS]**: Detailed description including who is visible, what they're wearing, what they're doing, what they're saying (if applicable), setting details, objects shown, and any visual elements.
288
-
289
-
290
- 5. Write descriptions as if you're watching the video in real-time, noting everything visible and audible.
291
-
292
- Based on the title and description, intelligently infer what would likely happen in each time segment. Consider the video type and create contextually appropriate, detailed descriptions.
293
- """
294
-
295
- response = self.gemini_model.generate_content(prompt)
296
-
297
- # Parse the response into individual scenes
298
- if response and response.text:
299
- scenes = []
300
- lines = response.text.split('\n')
301
- current_scene = ""
302
-
303
- for line in lines:
304
- line = line.strip()
305
- if line.strip().startswith("**[") and "]**:" in line:
306
- # This is a new scene timestamp line
307
- if current_scene:
308
- scenes.append(current_scene.strip())
309
- current_scene = line.strip()
310
- elif current_scene:
311
- # This is continuation of the current scene description
312
- current_scene += "\n" + line.strip()
313
-
314
- # Add the last scene if exists
315
- if current_scene:
316
- scenes.append(current_scene.strip())
317
-
318
- return scenes if scenes else self.generate_scene_breakdown_fallback(video_info)
319
- else:
320
- return self.generate_scene_breakdown_fallback(video_info)
321
 
322
- except Exception as e:
323
- print(f"Gemini API error: {e}")
324
- return self.generate_scene_breakdown_fallback(video_info)
325
-
326
- def generate_scene_breakdown_fallback(self, video_info):
327
- """Enhanced fallback scene generation when Gemini is not available"""
328
- duration = video_info.get('duration', 0)
329
- title = video_info.get('title', '').lower()
330
- description = video_info.get('description', '').lower()
331
- uploader = video_info.get('uploader', 'Content creator')
332
-
333
- if not duration:
334
- return ["**[Duration Unknown]**: Unable to generate timestamped breakdown"]
335
-
336
- # Determine segment length based on duration
337
- if duration <= 60:
338
- segment_length = 3
339
- elif duration <= 300:
340
- segment_length = 5
341
- elif duration <= 900:
342
- segment_length = 10
343
- else:
344
- segment_length = 15
345
-
346
- scenes = []
347
- num_segments = min(duration // segment_length + 1, 20)
348
-
349
- # Detect video type for better descriptions
350
- video_type = self.detect_video_type_detailed(title, description)
351
-
352
- for i in range(num_segments):
353
- start_time = i * segment_length
354
- end_time = min(start_time + segment_length - 1, duration)
355
-
356
- start_formatted = f"{start_time//60}:{start_time%60:02d}"
357
- end_formatted = f"{end_time//60}:{end_time%60:02d}"
358
-
359
- # Generate contextual descriptions based on video type and timing
360
- desc = self.generate_contextual_description(i, num_segments, video_type, uploader, title)
361
-
362
- scenes.append(f"**[{start_formatted}-{end_formatted}]**: {desc}")
363
-
364
- return scenes
365
-
366
- def detect_video_type_detailed(self, title, description):
367
- """Detect video type with more detail for better fallback descriptions"""
368
- text = (title + " " + description).lower()
369
-
370
- if any(word in text for word in ['tutorial', 'how to', 'guide', 'learn', 'diy', 'step by step']):
371
- return 'tutorial'
372
- elif any(word in text for word in ['review', 'unboxing', 'test', 'comparison', 'vs']):
373
- return 'review'
374
- elif any(word in text for word in ['vlog', 'daily', 'routine', 'day in', 'morning', 'skincare']):
375
- return 'vlog'
376
- elif any(word in text for word in ['music', 'song', 'cover', 'lyrics', 'dance']):
377
- return 'music'
378
- elif any(word in text for word in ['comedy', 'funny', 'prank', 'challenge', 'reaction']):
379
- return 'entertainment'
380
- elif any(word in text for word in ['news', 'breaking', 'update', 'report']):
381
- return 'news'
382
- elif any(word in text for word in ['cooking', 'recipe', 'food', 'kitchen']):
383
- return 'cooking'
384
- elif any(word in text for word in ['workout', 'fitness', 'exercise', 'yoga']):
385
- return 'fitness'
386
- else:
387
- return 'general'
388
-
389
- def generate_contextual_description(self, scene_index, total_scenes, video_type, uploader, title):
390
- """Generate contextual descriptions based on video type and scene position"""
391
-
392
- # Common elements
393
- presenter_desc = f"The content creator"
394
- if 'woman' in title.lower() or 'girl' in title.lower():
395
- presenter_desc = "A woman"
396
- elif 'man' in title.lower() or 'guy' in title.lower():
397
- presenter_desc = "A man"
398
-
399
- # Position-based descriptions
400
- if scene_index == 0:
401
- # Opening scene
402
- if video_type == 'tutorial':
403
- return f"{presenter_desc} appears on screen, likely introducing themselves and the topic. They may be in a well-lit indoor setting, wearing casual clothing, and addressing the camera directly with a welcoming gesture."
404
- elif video_type == 'vlog':
405
- return f"{presenter_desc} greets the camera with a smile, possibly waving. They appear to be in their usual filming location, wearing their typical style, and beginning their introduction to today's content."
406
- elif video_type == 'review':
407
- return f"{presenter_desc} introduces the product or topic they'll be reviewing, likely holding or displaying the item. The setting appears organized, possibly with the product prominently featured."
408
- else:
409
- return f"{presenter_desc} appears on screen to begin the video, introducing the topic with engaging body language and clear speech directed at the audience."
410
-
411
- elif scene_index == total_scenes - 1:
412
- # Closing scene
413
- if video_type == 'tutorial':
414
- return f"{presenter_desc} concludes the tutorial, possibly showing the final result. They may be thanking viewers, asking for engagement (likes/comments), and suggesting related content."
415
- elif video_type == 'vlog':
416
- return f"{presenter_desc} wraps up their vlog, possibly reflecting on the day's events. They appear relaxed and are likely saying goodbye to viewers with a friendly gesture."
417
- else:
418
- return f"{presenter_desc} concludes the video with final thoughts, thanking viewers for watching, and encouraging engagement through likes, comments, and subscriptions."
419
-
420
- else:
421
- # Middle scenes - content-specific
422
- if video_type == 'tutorial':
423
- step_num = scene_index
424
- return f"{presenter_desc} demonstrates step {step_num} of the process, showing specific techniques and explaining the procedure. They may be using tools or materials, with close-up shots of their hands working."
425
-
426
- elif video_type == 'review':
427
- return f"{presenter_desc} examines different aspects of the product, pointing out features and sharing their opinions. They may be holding, using, or demonstrating the item while speaking to the camera."
428
-
429
- elif video_type == 'vlog':
430
- return f"{presenter_desc} continues sharing their experience, possibly showing different locations or activities. The scene captures candid moments with natural lighting and casual interactions."
431
-
432
- elif video_type == 'cooking':
433
- return f"{presenter_desc} works in the kitchen, preparing ingredients or cooking. They demonstrate techniques while explaining each step, with kitchen tools and ingredients visible on the counter."
434
-
435
- elif video_type == 'fitness':
436
- return f"{presenter_desc} demonstrates exercise movements, likely in workout attire in a gym or home setting. They show proper form while providing instruction and motivation."
437
-
438
- else:
439
- return f"{presenter_desc} continues with the main content, engaging with the audience through clear explanations and demonstrations. The setting remains consistent with good lighting and clear audio."
440
 
441
- def detect_video_type(self, title, description):
442
- """Detect video type based on title and description"""
443
- text = (title + " " + description).lower()
444
-
445
- if any(word in text for word in ['music', 'song', 'album', 'artist', 'band', 'lyrics']):
446
- return "🎡 Music Video"
447
- elif any(word in text for word in ['tutorial', 'how to', 'guide', 'learn', 'teaching']):
448
- return "πŸ“š Tutorial/Educational"
449
- elif any(word in text for word in ['funny', 'comedy', 'entertainment', 'vlog', 'challenge']):
450
- return "🎭 Entertainment/Comedy"
451
- elif any(word in text for word in ['news', 'breaking', 'report', 'update']):
452
- return "πŸ“° News/Information"
453
- elif any(word in text for word in ['review', 'unboxing', 'test', 'comparison']):
454
- return "⭐ Review/Unboxing"
455
- elif any(word in text for word in ['commercial', 'ad', 'brand', 'product']):
456
- return "πŸ“Ί Commercial/Advertisement"
457
- else:
458
- return "🎬 General Content"
459
 
460
- def detect_background_music(self, video_info):
461
- """Detect background music style"""
462
- title = video_info.get('title', '').lower()
463
- description = video_info.get('description', '').lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
 
465
- if any(word in title for word in ['music', 'song', 'soundtrack']):
466
- return "🎡 Original Music/Soundtrack - Primary audio content"
467
- elif any(word in title for word in ['commercial', 'ad', 'brand']):
468
- return "🎢 Upbeat Commercial Music - Designed to enhance brand appeal"
469
- elif any(word in title for word in ['tutorial', 'how to', 'guide']):
470
- return "πŸ”‡ Minimal/No Background Music - Focus on instruction"
471
- elif any(word in title for word in ['vlog', 'daily', 'life']):
472
- return "🎼 Ambient Background Music - Complementary to narration"
473
- else:
474
- return "🎡 Background Music - Complementing video mood and pacing"
475
-
476
- def detect_influencer_status(self, video_info):
477
- """Detect influencer status"""
478
- subscriber_count = video_info.get('channel_followers', 0)
479
- view_count = video_info.get('view_count', 0)
480
 
481
- if subscriber_count > 10000000:
482
- return "🌟 Mega Influencer (10M+ subscribers)"
483
- elif subscriber_count > 1000000:
484
- return "⭐ Major Influencer (1M+ subscribers)"
485
- elif subscriber_count > 100000:
486
- return "🎯 Mid-tier Influencer (100K+ subscribers)"
487
- elif subscriber_count > 10000:
488
- return "πŸ“ˆ Micro Influencer (10K+ subscribers)"
489
- elif view_count > 100000:
490
- return "πŸ”₯ Viral Content Creator"
491
- else:
492
- return "πŸ‘€ Regular Content Creator"
493
-
494
- def format_number(self, num):
495
- if num is None or num == 0:
496
- return "0"
497
- if num >= 1_000_000_000:
498
- return f"{num/1_000_000_000:.1f}B"
499
- elif num >= 1_000_000:
500
- return f"{num/1_000_000:.1f}M"
501
- elif num >= 1_000:
502
- return f"{num/1_000:.1f}K"
503
- return str(num)
504
 
505
- def format_video_info(self, video_info):
506
- """Compact video information formatting with tabular layout"""
507
- if not video_info:
508
- return "❌ No video information available."
509
-
510
- # Basic information
511
- title = video_info.get("title", "Unknown")
512
- uploader = video_info.get("uploader", "Unknown")
513
- duration = video_info.get("duration", 0)
514
- duration_str = f"{duration//60}:{duration%60:02d}" if duration else "Unknown"
515
- view_count = video_info.get("view_count", 0)
516
- like_count = video_info.get("like_count", 0)
517
- comment_count = video_info.get("comment_count", 0)
518
- upload_date = video_info.get("upload_date", "Unknown")
519
-
520
- # Format upload date
521
- if len(upload_date) == 8:
522
- formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}"
523
- else:
524
- formatted_date = upload_date
525
 
526
- # Generate enhanced analysis
527
- scene_descriptions = self.generate_scene_breakdown_gemini(video_info)
528
- scene_table_html = format_scene_breakdown(scene_descriptions)
529
- video_type = self.detect_video_type(title, video_info.get('description', ''))
530
- background_music = self.detect_background_music(video_info)
531
- influencer_status = self.detect_influencer_status(video_info)
532
 
533
- # Calculate engagement metrics
534
- engagement_rate = (like_count / view_count) * 100 if view_count > 0 else 0
535
 
536
- # Generate compact report with contrasting background
537
- report = f"""
538
- <div style='font-family: Arial, sans-serif; background: linear-gradient(135deg, #2d3748, #1a202c); padding: 20px; border-radius: 15px; border: 2px solid #FF8C00; box-shadow: 0 8px 32px rgba(255, 140, 0, 0.3);'>
539
-
540
- <div style='text-align: center; margin-bottom: 20px;'>
541
- <h2 style='color: #87CEEB; font-size: 24px; margin: 0; text-shadow: 2px 2px 4px rgba(0,0,0,0.5);'>🎬 YouTube Video Analysis Report</h2>
542
- <div style='height: 3px; background: linear-gradient(90deg, #FF8C00, #87CEEB); margin: 10px 0; border-radius: 5px;'></div>
543
- </div>
544
 
545
- <!-- Compact Information Grid -->
546
- <div style='display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 15px; margin-bottom: 20px;'>
547
-
548
- <!-- Basic Information Card -->
549
- <div style='background: rgba(135, 206, 235, 0.1); padding: 15px; border-radius: 10px; border-left: 4px solid #87CEEB;'>
550
- <h3 style='color: #87CEEB; margin: 0 0 10px 0; font-size: 16px;'>πŸ“‹ Basic Info</h3>
551
- <table style='width: 100%; font-size: 14px;'>
552
- <tr><td style='color: #87CEEB; font-weight: bold; padding: 4px 0;'>πŸ“Ή Title:</td></tr>
553
- <tr><td style='color: #FFFFFF; padding: 4px 0 8px 0; word-wrap: wrap-word; white-space: normal; max-width: 200px;'>{title}</td></tr>
554
- <tr><td style='color: #87CEEB; font-weight: bold; padding: 4px 0;'>πŸ‘€ Creator:</td><td style='color: #FFFFFF; padding: 2px 0;'>{uploader[:20]}{'...' if len(uploader) > 20 else ''}</td></tr>
555
- <tr><td style='color: #87CEEB; font-weight: bold; padding: 4px 0;'>πŸ“… Date:</td><td style='color: #FFFFFF; padding: 2px 0;'>{formatted_date}</td></tr>
556
- <tr><td style='color: #87CEEB; font-weight: bold; padding: 4px 0;'>⏱️ Duration:</td><td style='color: #FFFFFF; padding: 2px 0;'>{duration_str}</td></tr>
557
- </table>
558
- </div>
559
 
560
- <!-- Performance Metrics Card -->
561
- <div style='background: rgba(135, 206, 235, 0.1); padding: 15px; border-radius: 10px; border-left: 4px solid #FF8C00;border: 1px solid #444'>
562
- <h3 style='color: #87CEEB; margin: 0 0 10px 0; font-size: 16px;'>πŸ“Š Metrics</h3>
563
- <table style='width: 100%; font-size: 12px;'>
564
- <tr><td style='color: #87CEEB; font-weight: bold; padding: 4px 0;'>πŸ‘€ Views:</td><td style='color: #FFFFFF; padding: 4px 0;'>{self.format_number(view_count)}</td></tr>
565
- <tr><td style='color: #87CEEB; font-weight: bold; padding: 4px 0;'>πŸ‘ Likes:</td><td style='color: #FFFFFF; padding: 4px 0;'>{self.format_number(like_count)}</td></tr>
566
- <tr><td style='color: #87CEEB; font-weight: bold; padding: 4px 0;'>πŸ’¬ Comments:</td><td style='color: #FFFFFF; padding: 4px 0;'>{self.format_number(comment_count)}</td></tr>
567
- <tr><td style='color: #87CEEB; font-weight: bold; padding: 4px 0;'>πŸ“ˆ Engagement:</td><td style='color: #FFFFFF; padding: 4px 0;'>{engagement_rate:.2f}%</td></tr>
568
- </table>
569
- </div>
570
 
571
- <!-- Content Analysis Card -->
572
- <div style='background:rgba(135, 206, 235, 0.1); padding: 15px; border-radius: 10px; border-left: 4px solid #87CEEB;border: 1px solid #444'>
573
- <h3 style='color:#87CEEB; margin: 0 0 10px 0; font-size: 16px;'>🎯 Analysis</h3>
574
- <table style='width: 100%; font-size: 12px;'>
575
- <tr><td style='color: #87CEEB; font-weight: bold; padding: 4px 0;'>πŸ“‚ Type:</td></tr>
576
- <tr><td style='color: #FFFFFF; padding: 4px 0 8px 0; word-break: break-word;'>{video_type}</td></tr>
577
- <tr><td style='color: #87CEEB; font-weight: bold; padding: 4px 0;'>🎡 Music:</td></tr>
578
- <tr><td style='color: #FFFFFF; padding: 4px 0 8px 0; word-break: break-word;'>{background_music[:30]}{'...' if len(background_music) > 30 else ''}</td></tr>
579
- <tr><td style='color: #87CEEB; font-weight: bold; padding: 4px 0;'>πŸ‘‘ Status:</td></tr>
580
- <tr><td style='color: #FFFFFF; padding: 4px 0; word-break: break-word;'>{influencer_status[:25]}{'...' if len(influencer_status) > 25 else ''}</td></tr>
581
- </table>
582
- </div>
583
- </div>
584
 
585
- <!-- Scene Breakdown Section -->
586
- <div style='background: rgba(0, 0, 0, 0.3); padding: 15px; border-radius: 10px; border: 1px solid #444;'>
587
- <h3 style='color: #87CEEB; margin: 0 0 15px 0; font-size: 18px; text-align: center;'>🎬 Scene-by-Scene Breakdown</h3>
588
- {scene_table_html}
589
- </div>
590
 
591
- </div>
592
- """
593
-
594
- return report.strip()
595
-
596
- def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
597
- """Extract video information"""
598
- if not url or not url.strip():
599
- return None, "❌ Please enter a YouTube URL"
600
-
601
- if not self.is_valid_youtube_url(url):
602
- return None, "❌ Invalid YouTube URL format"
603
-
604
- try:
605
- progress(0.1, desc="Initializing YouTube extractor...")
606
-
607
- ydl_opts = {
608
- 'noplaylist': True,
609
- 'extract_flat': False,
610
- }
611
-
612
- if cookiefile and os.path.exists(cookiefile):
613
- ydl_opts['cookiefile'] = cookiefile
614
-
615
- progress(0.5, desc="Extracting video metadata...")
616
-
617
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
618
- info = ydl.extract_info(url, download=False)
619
-
620
- progress(1.0, desc="βœ… Analysis complete!")
621
-
622
- return info, "βœ… Video information extracted successfully"
623
-
624
- except Exception as e:
625
- return None, f"❌ Error: {str(e)}"
626
-
627
- def download_video(self, url, quality="best", audio_only=False, progress=gr.Progress(), cookiefile=None):
628
- """Download video with progress tracking"""
629
- if not url or not url.strip():
630
- return None, "❌ Please enter a YouTube URL"
631
-
632
- if not self.is_valid_youtube_url(url):
633
- return None, "❌ Invalid YouTube URL format"
634
-
635
- try:
636
- progress(0.1, desc="Preparing download...")
637
-
638
- # Create unique filename
639
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
640
-
641
- # Download to temp directory first (Gradio compatible)
642
- ydl_opts = {
643
- 'outtmpl': os.path.join(self.temp_downloads, f'%(title)s_{timestamp}.%(ext)s'),
644
- 'noplaylist': True,
645
- }
646
-
647
- if audio_only:
648
- ydl_opts['format'] = 'bestaudio/best'
649
- ydl_opts['postprocessors'] = [{
650
- 'key': 'FFmpegExtractAudio',
651
- 'preferredcodec': 'mp3',
652
- 'preferredquality': '192',
653
- }]
654
- else:
655
- if quality == "best":
656
- ydl_opts['format'] = 'best[height<=1080]'
657
- elif quality == "720p":
658
- ydl_opts['format'] = 'best[height<=720]'
659
- elif quality == "480p":
660
- ydl_opts['format'] = 'best[height<=480]'
661
- else:
662
- ydl_opts['format'] = 'best'
663
-
664
- if cookiefile and os.path.exists(cookiefile):
665
- ydl_opts['cookiefile'] = cookiefile
666
-
667
- # Progress hook
668
- def progress_hook(d):
669
- if d['status'] == 'downloading':
670
- if 'total_bytes' in d:
671
- percent = (d['downloaded_bytes'] / d['total_bytes']) * 100
672
- progress(0.1 + (percent / 100) * 0.7, desc=f"Downloading... {percent:.1f}%")
673
- else:
674
- progress(0.5, desc="Downloading...")
675
- elif d['status'] == 'finished':
676
- progress(0.8, desc="Processing download...")
677
-
678
- ydl_opts['progress_hooks'] = [progress_hook]
679
-
680
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
681
- info = ydl.extract_info(url, download=True)
682
-
683
- progress(0.9, desc="Copying to Downloads folder...")
684
-
685
- # Find the downloaded file in temp directory
686
- downloaded_file_temp = None
687
-
688
- for file in os.listdir(self.temp_downloads):
689
- if timestamp in file:
690
- downloaded_file_temp = os.path.join(self.temp_downloads, file)
691
- break
692
-
693
- if not downloaded_file_temp:
694
- return None, "❌ Downloaded file not found in temp directory"
695
-
696
- # Copy to user's Downloads folder
697
- final_filename = os.path.basename(downloaded_file_temp)
698
- final_path = os.path.join(self.downloads_folder, final_filename)
699
-
700
- try:
701
- shutil.copy2(downloaded_file_temp, final_path)
702
- copy_success = True
703
- except Exception as e:
704
- print(f"Warning: Could not copy to Downloads folder: {e}")
705
- copy_success = False
706
- final_path = "File downloaded to temp location only"
707
-
708
- progress(1.0, desc="βœ… Download complete!")
709
-
710
- success_msg = f"""βœ… Download successful!
711
- πŸ“ Temp file (for download): {os.path.basename(downloaded_file_temp)}
712
- πŸ“ Permanent location: {final_path if copy_success else 'Copy failed'}
713
- 🎯 File size: {os.path.getsize(downloaded_file_temp) / (1024*1024):.1f} MB"""
714
-
715
- return downloaded_file_temp, success_msg
716
-
717
- except Exception as e:
718
- return None, f"❌ Download failed: {str(e)}"
719
-
720
- # Initialize global downloader
721
- downloader = YouTubeDownloader()
722
-
723
- def configure_api_key(api_key):
724
- """Configure Gemini API key"""
725
- if not api_key or not api_key.strip():
726
- return "❌ Please enter a valid Google API key", gr.update(visible=False)
727
-
728
- success, message = downloader.configure_gemini(api_key.strip())
729
-
730
- if success:
731
- return message, gr.update(visible=True)
732
- else:
733
- return message, gr.update(visible=False)
734
-
735
- def analyze_with_cookies(url, cookies_file, progress=gr.Progress()):
736
- """Main analysis function"""
737
- try:
738
- progress(0.05, desc="Starting analysis...")
739
-
740
- cookiefile = None
741
- if cookies_file and os.path.exists(cookies_file):
742
- cookiefile = cookies_file
743
-
744
- info, msg = downloader.get_video_info(url, progress=progress, cookiefile=cookiefile)
745
-
746
- if info:
747
- progress(0.95, desc="Generating comprehensive report...")
748
- formatted_info = downloader.format_video_info(info)
749
- progress(1.0, desc="βœ… Complete!")
750
- return formatted_info
751
- else:
752
- return f"❌ Analysis Failed: {msg}"
753
-
754
- except Exception as e:
755
- return f"❌ System Error: {str(e)}"
756
-
757
-
758
- def analyze_and_generate_pdf(url, cookies_file, progress=gr.Progress()):
759
- try:
760
- progress(0.1, desc="Extracting video info...")
761
- cookiefile = cookies_file if cookies_file and os.path.exists(cookies_file) else None
762
- info, _ = downloader.get_video_info(url, progress=progress, cookiefile=cookiefile)
763
-
764
- if not info:
765
- print("⚠️ No video info returned.")
766
- return None # This is the problem - returns None instead of Gradio update
767
-
768
- progress(0.6, desc="Generating HTML report...")
769
- report_html = downloader.format_video_info(info)
770
-
771
- progress(0.8, desc="Creating PDF...")
772
- pdf_buffer = generate_pdf_from_html(report_html)
773
-
774
- if pdf_buffer:
775
- pdf_path = os.path.join(downloader.downloads_folder, f"analysis_report_{uuid.uuid4().hex}.pdf")
776
- with open(pdf_path, "wb") as f:
777
- f.write(pdf_buffer.read())
778
- print("βœ… PDF generated at:", pdf_path)
779
- print("File exists:", os.path.exists(pdf_path))
780
-
781
- progress(1.0, desc="βœ… PDF ready!")
782
- return pdf_path # This should return a Gradio update
783
- else:
784
- print("❌ PDF buffer is empty.")
785
- return None # This should return a Gradio update
786
-
787
- except Exception as e:
788
- print(f"PDF generation error: {e}")
789
- return None # This should return a Gradio update
790
-
791
- def generate_pdf_from_html(html_content):
792
- """Generate PDF with simplified HTML that works better with xhtml2pdf"""
793
  try:
794
- # Create a simplified version of the HTML for PDF generation
795
- # Remove complex CSS that xhtml2pdf can't handle
796
- simplified_html = html_content.replace(
797
- "background: linear-gradient(135deg, #2d3748, #1a202c);",
798
- "background-color: #f5f5f5;"
799
- ).replace(
800
- "background: linear-gradient(90deg, #FF8C00, #87CEEB);",
801
- "background-color: #FF8C00;"
802
- ).replace(
803
- "rgba(135, 206, 235, 0.1)",
804
- "#f9f9f9"
805
- ).replace(
806
- "rgba(0, 0, 0, 0.3)",
807
- "#ffffff"
808
- ).replace(
809
- "text-shadow: 2px 2px 4px rgba(0,0,0,0.5);",
810
- ""
811
- ).replace(
812
- "box-shadow: 0 8px 32px rgba(255, 140, 0, 0.3);",
813
- ""
814
- ).replace(
815
- "box-shadow: 0 4px 8px rgba(0,0,0,0.3);",
816
- ""
817
- ).replace(
818
- "display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 15px;",
819
- "display: block;"
820
- ).replace(
821
- "background-color:#1a1a1a;",
822
- "background-color:#ffffff;"
823
- ).replace(
824
- "color: #FFFFFF;",
825
- "color: #000000;"
826
- ).replace(
827
- "background-color:#FF8C00; color: #000000;",
828
- "background-color:#FF8C00; color: #000000;"
829
- ).replace(
830
- "color: #87CEEB;",
831
- "color: #000080;"
832
- ).replace(
833
- "border: 2px solid #FF8C00;",
834
- "border: 1px solid #FF8C00;"
835
- )
836
 
837
- # Remove table styling that causes issues
838
- simplified_html = re.sub(r"style='[^']*background-color:#1a1a1a[^']*'", "style='background-color:#ffffff;'", simplified_html)
839
- simplified_html = re.sub(r"style='[^']*color: #87CEEB[^']*'", "style='color: #000080; padding: 8px;'", simplified_html)
840
 
841
- # Wrap in a complete HTML document with PDF-friendly CSS
842
- pdf_html = f"""
843
- <!DOCTYPE html>
844
- <html>
845
- <head>
846
- <meta charset="UTF-8">
847
- <style>
848
- @page {{
849
- size: A4;
850
- margin: 1cm;
851
- }}
852
- body {{
853
- font-family: Arial, sans-serif;
854
- font-size: 12px;
855
- line-height: 1.4;
856
- color: #000000;
857
- background-color: #ffffff;
858
- }}
859
- .report-container {{
860
- background-color: #ffffff;
861
- padding: 15px;
862
- border: 2px solid #FF8C00;
863
- border-radius: 8px;
864
- }}
865
- .header {{
866
- text-align: center;
867
- color: #FF8C00;
868
- font-size: 20px;
869
- font-weight: bold;
870
- margin-bottom: 15px;
871
- border-bottom: 2px solid #FF8C00;
872
- padding-bottom: 8px;
873
- }}
874
- .info-card {{
875
- background-color: #f9f9f9;
876
- padding: 12px;
877
- margin: 8px 0;
878
- border-left: 3px solid #87CEEB;
879
- border-radius: 4px;
880
- page-break-inside: avoid;
881
- }}
882
- .info-title {{
883
- color: #000080;
884
- font-size: 14px;
885
- font-weight: bold;
886
- margin-bottom: 8px;
887
- }}
888
- table {{
889
- width: 100%;
890
- border-collapse: collapse;
891
- margin: 8px 0;
892
- page-break-inside: avoid;
893
- }}
894
- th, td {{
895
- padding: 6px 8px;
896
- border: 1px solid #cccccc;
897
- text-align: left;
898
- vertical-align: top;
899
- font-size: 11px;
900
- }}
901
- th {{
902
- background-color: #FF8C00;
903
- color: #000000;
904
- font-weight: bold;
905
- }}
906
- tr:nth-child(even) {{
907
- background-color: #f9f9f9;
908
- }}
909
- .scene-table {{
910
- margin-top: 15px;
911
- }}
912
- .scene-header {{
913
- color: #000080;
914
- font-size: 16px;
915
- font-weight: bold;
916
- text-align: center;
917
- margin-bottom: 10px;
918
- }}
919
- div[style*="display: grid"] {{
920
- display: block !important;
921
- }}
922
- div[style*="grid-template-columns"] > div {{
923
- display: block !important;
924
- margin-bottom: 10px !important;
925
- width: 100% !important;
926
- }}
927
- </style>
928
- </head>
929
- <body>
930
- <div class="report-container">
931
- {simplified_html}
932
- </div>
933
- </body>
934
- </html>
935
- """
936
 
937
- result = BytesIO()
938
- pisa_status = pisa.CreatePDF(pdf_html, dest=result)
 
939
 
940
- if pisa_status.err:
941
- print(f"PDF generation error: {pisa_status.err}")
942
- return None
943
-
944
- result.seek(0)
945
- return result
946
 
947
- except Exception as e:
948
- print(f"PDF generation exception: {e}")
949
- return None
950
-
951
-
952
-
953
- def download_with_cookies(url, quality, audio_only, cookies_file, progress=gr.Progress()):
954
- """Main download function"""
955
- try:
956
- progress(0.05, desc="Preparing download...")
957
 
958
- cookiefile = None
959
- if cookies_file and os.path.exists(cookies_file):
960
- cookiefile = cookies_file
961
 
962
- file_path, msg = downloader.download_video(url, quality, audio_only, progress=progress, cookiefile=cookiefile)
 
963
 
964
- if file_path:
965
- return file_path, msg
966
- else:
967
- return None, msg
968
-
969
  except Exception as e:
970
- return None, f"❌ System Error: {str(e)}"
971
-
972
- def create_interface():
973
- """Create and configure the Gradio interface"""
974
- with gr.Blocks(
975
- css="""
976
- /* Main dark theme background and text */
977
- .gradio-container, .app, body {
978
- background-color: #1a1a1a !important;
979
- color: #87CEEB !important;
980
- font-weight: bold !important;
981
- }
982
- /* πŸ”΅ Dark blue overrides for key labels */
983
- h3, .gr-group h3, .gradio-container h3 {
984
- color: #87CEEB !important;
985
- }
986
- label, .gr-textbox label, .gr-file label, .gr-dropdown label, .gr-checkbox label {
987
- color: #00008B !important;
988
- font-weight: bold !important;
989
- }
990
- .gr-file .file-name {
991
- color: #00008B !important;
992
- font-weight: bold !important;
993
- }
994
- /* Make tab labels dark blue too */
995
- .gr-tab-nav button {
996
- color: #00008B !important;
997
- }
998
- .gr-tab-nav button.selected {
999
- background-color: #FF8C00 !important;
1000
- color: #000000 !important;
1001
- }
1002
- /* Light blue text for API status */
1003
- .light-blue-text textarea {
1004
- color: #87CEEB !important;
1005
- background-color: #2a2a2a !important;
1006
- }
1007
- .gr-file {
1008
- background-color: #2a2a2a !important;
1009
- border: 2px dashed #444 !important;
1010
- }
1011
- .gr-group, .gr-form, .gr-row {
1012
- background-color: #1a1a1a !important;
1013
- border: 1px solid #444 !important;
1014
- border-radius: 10px;
1015
- padding: 15px;
1016
- }
1017
- """,
1018
- theme=gr.themes.Soft(),
1019
- title="πŸ“Š YouTube Video Analyzer & Downloader"
1020
- ) as demo:
1021
-
1022
- # API Key Configuration Section
1023
- with gr.Group():
1024
- gr.HTML("<h3>πŸ”‘ Google Gemini API Configuration</h3>")
1025
- with gr.Row():
1026
- api_key_input = gr.Textbox(
1027
- label="πŸ”‘ Google API Key",
1028
- placeholder="Enter your Google API Key for enhanced AI analysis...",
1029
- type="password",
1030
- value=""
1031
- )
1032
- configure_btn = gr.Button("πŸ”§ Configure API", variant="secondary")
1033
-
1034
- api_status = gr.Textbox(
1035
- label="API Status",
1036
- value="❌ Gemini API not configured - Using fallback analysis",
1037
- interactive=False,
1038
- lines=1,
1039
- elem_classes="light-blue-text"
1040
  )
1041
 
1042
- # Main Interface (initially hidden until API is configured)
1043
- main_interface = gr.Group(visible=False)
 
 
 
 
 
1044
 
1045
- with main_interface:
1046
- with gr.Row():
1047
- url_input = gr.Textbox(
1048
- label="πŸ”— YouTube URL",
1049
- placeholder="Paste your YouTube video URL here...",
1050
- value=""
1051
- )
1052
-
1053
- cookies_input = gr.File(
1054
- label="πŸͺ Upload cookies.txt (Mandatory)",
1055
- file_types=[".txt"],
1056
- type="filepath"
1057
- )
1058
-
1059
- with gr.Tabs():
1060
- with gr.TabItem("πŸ“Š Video Analysis"):
1061
- analyze_btn = gr.Button("πŸ” Analyze Video", variant="primary")
1062
-
1063
- analysis_output = gr.HTML(
1064
- label="πŸ“Š Analysis Report",
1065
- )
1066
- download_pdf_btn = gr.Button("πŸ“„ Download Report as PDF", variant="secondary")
1067
- pdf_file_output = gr.File(label="πŸ“₯ PDF Report", visible=False,interactive=False)
1068
-
1069
- analyze_btn.click(
1070
- fn=analyze_with_cookies,
1071
- inputs=[url_input, cookies_input],
1072
- outputs=analysis_output,
1073
- show_progress=True
1074
- )
1075
- download_pdf_btn.click(
1076
- fn=analyze_and_generate_pdf,
1077
- inputs=[url_input, cookies_input],
1078
- outputs=pdf_file_output,
1079
- show_progress=True
1080
- )
1081
-
1082
-
1083
- with gr.TabItem("⬇️ Video Download"):
1084
- with gr.Row():
1085
- quality_dropdown = gr.Dropdown(
1086
- choices=["best", "720p", "480p"],
1087
- value="best",
1088
- label="πŸ“Ί Video Quality"
1089
- )
1090
-
1091
- audio_only_checkbox = gr.Checkbox(
1092
- label="🎡 Audio Only (MP3)",
1093
- value=False
1094
- )
1095
-
1096
- download_btn = gr.Button("⬇️ Download Video", variant="primary")
1097
-
1098
- download_status = gr.Textbox(
1099
- label="πŸ“₯ Download Status",
1100
- lines=5,
1101
- show_copy_button=True
1102
- )
1103
-
1104
- download_file = gr.File(
1105
- label="πŸ“ Downloaded File",
1106
- visible=False
1107
- )
1108
-
1109
- def download_and_update(url, quality, audio_only, cookies_file, progress=gr.Progress()):
1110
- file_path, status = download_with_cookies(url, quality, audio_only, cookies_file, progress)
1111
- if file_path and os.path.exists(file_path):
1112
- return status, gr.update(value=file_path, visible=True)
1113
- else:
1114
- return status, gr.update(visible=False)
1115
-
1116
- download_btn.click(
1117
- fn=download_and_update,
1118
- inputs=[url_input, quality_dropdown, audio_only_checkbox, cookies_input],
1119
- outputs=[download_status, download_file],
1120
- show_progress=True
1121
- )
1122
 
1123
- # Configure API key button action
1124
- configure_btn.click(
1125
- fn=configure_api_key,
1126
- inputs=[api_key_input],
1127
- outputs=[api_status, main_interface]
1128
  )
1129
 
1130
- # Always show interface option (for fallback mode)
1131
- with gr.Row():
1132
- show_interface_btn = gr.Button("πŸš€ Use Without Gemini API (Fallback Mode)", variant="secondary")
1133
-
1134
- def show_fallback_interface():
1135
- return "⚠️ Using fallback analysis mode", gr.update(visible=True)
1136
-
1137
- show_interface_btn.click(
1138
- fn=show_fallback_interface,
1139
- outputs=[api_status, main_interface]
1140
- )
1141
 
1142
- gr.HTML("""
1143
- <div style="margin-top: 20px; padding: 15px; background-color: #2a2a2a; border-radius: 10px; border-left: 5px solid #FF8C00; color: #87CEEB !important;">
1144
- <h3 style="color: #87CEEB !important; font-weight: bold;">πŸ”‘ How to Get Google API Key:</h3>
1145
- <ol style="color: #87CEEB !important; font-weight: bold;">
1146
- <li style="color: #87CEEB !important;">Go to <a href="https://console.cloud.google.com/" target="_blank" style="color: #87CEEB !important;">Google Cloud Console</a></li>
1147
- <li style="color: #87CEEB !important;">Create a new project or select an existing one</li>
1148
- <li style="color: #87CEEB !important;">Enable the "Generative Language API"</li>
1149
- <li style="color: #87CEEB !important;">Go to "Credentials" and create an API key</li>
1150
- <li style="color: #87CEEB !important;">Copy the API key and paste it above</li>
1151
- </ol>
1152
- <h3 style="color: #87CEEB !important; font-weight: bold;">✨ Benefits of using Gemini API:</h3>
1153
- <ul style="color: #87CEEB !important; font-weight: bold;">
1154
- <li style="color: #87CEEB !important;">πŸ€– AI-powered scene descriptions with contextual understanding</li>
1155
- <li style="color: #87CEEB !important;">🎯 More accurate content type detection</li>
1156
- <li style="color: #87CEEB !important;">πŸ“Š Enhanced analysis based on video content</li>
1157
- <li style="color: #87CEEB !important;">⏰ Intelligent timestamp segmentation</li>
1158
- </ul>
1159
- </div>
1160
  """)
1161
 
1162
- return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1163
  if __name__ == "__main__":
1164
- demo = create_interface()
1165
- import atexit
1166
- atexit.register(downloader.cleanup)
1167
- demo.launch(debug=True, show_error=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import tempfile
3
+ import gradio as gr
 
4
  import re
5
+ import sys
6
+ import shutil
 
 
 
 
 
7
 
8
+ # Try to import required packages with error handling
9
+ try:
10
+ from yt_dlp import YoutubeDL
11
+ YT_DLP_AVAILABLE = True
12
+ except ImportError as e:
13
+ YT_DLP_AVAILABLE = False
14
+ print(f"yt-dlp import error: {e}")
15
+
16
+ try:
17
+ import whisper
18
+ WHISPER_AVAILABLE = True
19
+ except ImportError as e:
20
+ WHISPER_AVAILABLE = False
21
+ print(f"whisper import error: {e}")
22
+
23
+ print(f"Python version: {sys.version}")
24
+ print(f"yt-dlp available: {YT_DLP_AVAILABLE}")
25
+ print(f"whisper available: {WHISPER_AVAILABLE}")
26
+
27
+ def download_audio(url, cookies_file_path=None):
28
+ """Download audio from YouTube URL and return the file path"""
29
+ if not YT_DLP_AVAILABLE:
30
+ raise Exception("yt-dlp is not available. Please check the installation.")
31
+
32
  try:
33
+ # Create a temporary directory for downloads
34
+ temp_dir = tempfile.mkdtemp()
35
+ output_path = os.path.join(temp_dir, "audio")
36
+
37
+ ydl_opts = {
38
+ 'format': 'bestaudio[ext=m4a]/bestaudio/best',
39
+ 'outtmpl': output_path + '.%(ext)s',
40
+ 'quiet': True,
41
+ 'no_warnings': True,
42
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ # Add cookies file if provided
45
+ if cookies_file_path and os.path.exists(cookies_file_path):
46
+ ydl_opts['cookiefile'] = cookies_file_path
47
+ print(f"Using cookies file: {cookies_file_path}")
48
 
49
+ with YoutubeDL(ydl_opts) as ydl:
50
+ info_dict = ydl.extract_info(url, download=True)
51
+ filename = ydl.prepare_filename(info_dict)
52
 
53
+ # Find the downloaded file
54
+ for ext in ['.m4a', '.webm', '.mp4', '.mp3']:
55
+ potential_file = output_path + ext
56
+ if os.path.exists(potential_file):
57
+ return potential_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ raise FileNotFoundError(f"Downloaded audio file not found")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ except Exception as e:
62
+ raise Exception(f"Failed to download audio: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ def transcribe_audio(file_path):
65
+ """Transcribe audio file using Whisper"""
66
+ if not WHISPER_AVAILABLE:
67
+ raise Exception("OpenAI Whisper is not available. Please check the installation.")
68
+
69
+ try:
70
+ # Use the smallest model to reduce memory usage
71
+ model = whisper.load_model("tiny")
72
+ result = model.transcribe(file_path)
73
+ return result["text"]
74
+ except Exception as e:
75
+ raise Exception(f"Failed to transcribe audio: {str(e)}")
 
 
 
 
 
 
76
 
77
+ def extract_stock_info_simple(text):
78
+ """Extract stock information using simple pattern matching"""
79
+ try:
80
+ stock_info = []
81
+
82
+ # Simple patterns to look for stock-related information
83
+ stock_patterns = [
84
+ r'\b[A-Z]{1,5}\b(?:\s+stock|\s+shares|\s+symbol)', # Stock symbols
85
+ r'(?:buy|sell|target|price)\s+[A-Z]{1,5}',
86
+ r'\$\d+(?:\.\d{2})?', # Dollar amounts
87
+ r'\b(?:bullish|bearish|buy|sell|hold)\b',
88
+ ]
89
+
90
+ # Look for company names and stock mentions
91
+ companies = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Inc|Corp|Company|Ltd)\.?)?', text)
92
+ symbols = re.findall(r'\b[A-Z]{2,5}\b', text)
93
+ prices = re.findall(r'\$\d+(?:\.\d{2})?', text)
94
+ actions = re.findall(r'\b(?:buy|sell|hold|bullish|bearish|target|stop\s+loss)\b', text, re.IGNORECASE)
95
+
96
+ # Format the extracted information
97
+ result = "=== EXTRACTED STOCK INFORMATION ===\n\n"
98
+
99
+ if companies:
100
+ result += f"πŸ“Š Mentioned Companies: {', '.join(set(companies[:10]))}\n\n"
101
+
102
+ if symbols:
103
+ result += f"πŸ”€ Potential Stock Symbols: {', '.join(set(symbols[:10]))}\n\n"
104
+
105
+ if prices:
106
+ result += f"πŸ’° Price Mentions: {', '.join(set(prices[:10]))}\n\n"
107
+
108
+ if actions:
109
+ result += f"πŸ“ˆ Trading Actions: {', '.join(set(actions[:10]))}\n\n"
110
+
111
+ # Look for specific recommendation patterns
112
+ recommendations = []
113
+ sentences = text.split('.')
114
+ for sentence in sentences:
115
+ if any(word in sentence.lower() for word in ['buy', 'sell', 'target', 'recommendation']):
116
+ if any(symbol in sentence for symbol in symbols[:5]):
117
+ recommendations.append(sentence.strip())
118
+
119
+ if recommendations:
120
+ result += "🎯 Potential Recommendations:\n"
121
+ for rec in recommendations[:5]:
122
+ result += f"β€’ {rec}\n"
123
+
124
+ if not any([companies, symbols, prices, actions]):
125
+ result += "⚠️ No clear stock recommendations found in the transcript.\n"
126
+ result += "This might be because:\n"
127
+ result += "β€’ The video doesn't contain stock recommendations\n"
128
+ result += "β€’ The audio quality was poor\n"
129
+ result += "β€’ The content is not in English\n"
130
 
131
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
+ except Exception as e:
134
+ return f"Error extracting stock info: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ def cleanup_file(file_path):
137
+ """Clean up temporary files"""
138
+ try:
139
+ if file_path and os.path.exists(file_path):
140
+ os.remove(file_path)
141
+ # Also try to remove the directory if it's empty
142
+ try:
143
+ os.rmdir(os.path.dirname(file_path))
144
+ except:
145
+ pass
146
+ except:
147
+ pass
148
+
149
+ def process_cookies_file(cookies_file):
150
+ """Process uploaded cookies file and return the path"""
151
+ if cookies_file is None:
152
+ return None
 
 
 
153
 
154
+ try:
155
+ # Create a temporary file for cookies
156
+ temp_cookies_path = tempfile.mktemp(suffix='.txt')
 
 
 
157
 
158
+ # Copy the uploaded file to temp location
159
+ shutil.copy2(cookies_file.name, temp_cookies_path)
160
 
161
+ return temp_cookies_path
162
+ except Exception as e:
163
+ print(f"Error processing cookies file: {e}")
164
+ return None
165
+
166
+ def process_video(url, cookies_file, progress=gr.Progress()):
167
+ """Main function to process YouTube video"""
 
168
 
169
+ # Check if required packages are available
170
+ if not YT_DLP_AVAILABLE:
171
+ return "Error: yt-dlp is not installed properly. Please check the requirements.", "", "❌ Error: Missing yt-dlp"
 
 
 
 
 
 
 
 
 
 
 
172
 
173
+ if not WHISPER_AVAILABLE:
174
+ return "Error: OpenAI Whisper is not installed properly. Please check the requirements.", "", "❌ Error: Missing Whisper"
 
 
 
 
 
 
 
 
175
 
176
+ if not url or not url.strip():
177
+ return "Please provide a valid YouTube URL", "", "❌ Error: Invalid URL"
 
 
 
 
 
 
 
 
 
 
 
178
 
179
+ audio_path = None
180
+ cookies_temp_path = None
 
 
 
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  try:
183
+ # Validate URL
184
+ if not any(domain in url.lower() for domain in ['youtube.com', 'youtu.be']):
185
+ return "Please provide a valid YouTube URL", "", "❌ Error: Invalid URL"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
+ # Process cookies file if provided
188
+ progress(0.05, desc="Processing cookies...")
189
+ cookies_temp_path = process_cookies_file(cookies_file)
190
 
191
+ status_msg = "βœ… Cookies loaded" if cookies_temp_path else "⚠️ No cookies (may encounter bot detection)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
+ # Download audio
194
+ progress(0.2, desc="Downloading audio...")
195
+ audio_path = download_audio(url, cookies_temp_path)
196
 
197
+ # Transcribe audio
198
+ progress(0.6, desc="Transcribing audio...")
199
+ transcript = transcribe_audio(audio_path)
 
 
 
200
 
201
+ if not transcript.strip():
202
+ return "No speech detected in the video", "", "❌ No speech detected"
 
 
 
 
 
 
 
 
203
 
204
+ # Extract stock information
205
+ progress(0.9, desc="Extracting stock information...")
206
+ stock_details = extract_stock_info_simple(transcript)
207
 
208
+ progress(1.0, desc="Complete!")
209
+ return transcript, stock_details, "βœ… Processing completed successfully"
210
 
 
 
 
 
 
211
  except Exception as e:
212
+ error_msg = f"Error processing video: {str(e)}"
213
+ return error_msg, "", f"❌ Error: {str(e)}"
214
+
215
+ finally:
216
+ # Clean up temporary files
217
+ cleanup_file(audio_path)
218
+ cleanup_file(cookies_temp_path)
219
+
220
+ # Create Gradio interface
221
+ with gr.Blocks(
222
+ title="Stock Recommendation Extractor",
223
+ theme=gr.themes.Soft(),
224
+ css="""
225
+ .gradio-container {
226
+ max-width: 1400px;
227
+ margin: auto;
228
+ }
229
+ .status-box {
230
+ padding: 10px;
231
+ border-radius: 5px;
232
+ margin: 10px 0;
233
+ }
234
+ """
235
+ ) as demo:
236
+
237
+ gr.Markdown("""
238
+ # πŸ“ˆ Stock Recommendation Extractor from YouTube
239
+
240
+ Extract stock recommendations and trading information from YouTube videos using AI transcription.
241
+
242
+ **How it works:**
243
+ 1. Upload your cookies.txt file (optional but recommended to avoid bot detection)
244
+ 2. Paste YouTube video URL
245
+ 3. Downloads audio from YouTube video
246
+ 4. Transcribes using OpenAI Whisper
247
+ 5. Extracts stock-related information
248
+
249
+ **⚠️ Disclaimer:** This is for educational purposes only. Always do your own research!
250
+ """)
251
+
252
+ with gr.Row():
253
+ with gr.Column(scale=1):
254
+ # Cookies file upload
255
+ cookies_input = gr.File(
256
+ label="πŸͺ Upload Cookies File (cookies.txt)",
257
+ file_types=[".txt"],
258
+ file_count="single"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  )
260
 
261
+ gr.Markdown("""
262
+ **How to get cookies.txt:**
263
+ 1. Install browser extension like "Get cookies.txt LOCALLY"
264
+ 2. Visit YouTube in your browser (logged in)
265
+ 3. Export cookies for youtube.com
266
+ 4. Upload the downloaded cookies.txt file here
267
+ """)
268
 
269
+ url_input = gr.Textbox(
270
+ label="πŸ“Ί YouTube URL",
271
+ placeholder="https://www.youtube.com/watch?v=...",
272
+ lines=2
273
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
+ process_btn = gr.Button(
276
+ "πŸš€ Extract Stock Information",
277
+ variant="primary",
278
+ size="lg"
 
279
  )
280
 
281
+ # Status display
282
+ status_output = gr.Textbox(
283
+ label="πŸ“Š Status",
284
+ lines=1,
285
+ interactive=False
286
+ )
 
 
 
 
 
287
 
288
+ gr.Markdown("""
289
+ ### πŸ’‘ Tips:
290
+ - Upload cookies.txt to avoid bot detection
291
+ - Works best with financial YouTube channels
292
+ - Ensure video has clear audio
293
+ - English content works best
 
 
 
 
 
 
 
 
 
 
 
 
294
  """)
295
 
296
+ with gr.Row():
297
+ with gr.Column():
298
+ transcript_output = gr.Textbox(
299
+ label="πŸ“ Full Transcript",
300
+ lines=15,
301
+ max_lines=20,
302
+ show_copy_button=True
303
+ )
304
+
305
+ with gr.Column():
306
+ stock_info_output = gr.Textbox(
307
+ label="πŸ“Š Extracted Stock Information",
308
+ lines=15,
309
+ max_lines=20,
310
+ show_copy_button=True
311
+ )
312
+
313
+ # Event handlers
314
+ process_btn.click(
315
+ fn=process_video,
316
+ inputs=[url_input, cookies_input],
317
+ outputs=[transcript_output, stock_info_output, status_output],
318
+ show_progress=True
319
+ )
320
+
321
+ # Example section
322
+ gr.Markdown("### πŸ“‹ Example URLs (Replace with actual financial videos)")
323
+ gr.Examples(
324
+ examples=[
325
+ ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"],
326
+ ],
327
+ inputs=[url_input],
328
+ label="Click to try example"
329
+ )
330
+
331
+ gr.Markdown("""
332
+ ### πŸ”§ Troubleshooting:
333
+ - **Bot Detection Error**: Upload your cookies.txt file
334
+ - **No Audio Found**: Check if video has audio track
335
+ - **Transcription Failed**: Video might be too long or audio quality poor
336
+ - **No Stock Info**: Video might not contain financial content
337
+ """)
338
+
339
  if __name__ == "__main__":
340
+ demo.launch()