acecalisto3 commited on
Commit
c8ff505
·
verified ·
1 Parent(s): 874379f

Update app2.py

Browse files
Files changed (1) hide show
  1. app2.py +660 -1190
app2.py CHANGED
@@ -1,1245 +1,715 @@
1
- import base64
 
 
 
 
 
 
 
2
  import gradio as gr
3
- import hashlib
4
- import io
5
- import json
6
  import logging
7
- import mimetypes
8
  import os
9
- from PIL import Image
10
- import qrcode# Setup logging
11
- import random
12
- import re
13
- import requests
14
- import tempfile
15
  import time
16
- import validators
17
- import zipfile
18
- import zxing
19
- from bs4 import BeautifulSoup
20
- from cleantext import clean
21
  from datetime import datetime
22
- from fake_useragent import UserAgent
23
- from selenium import webdriver
24
  from pathlib import Path
25
- from typing import List, Dict, Optional, Union, Any
26
- from urllib.parse import urlparse
27
-
28
- # Configure logging
29
- import logging
30
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
31
- logger = logging.getLogger('App')
32
-
33
- # URLProcessor class
34
- # ===================
35
- class URLProcessor:
36
- """Class to handle URL processing with advanced features"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- def __init__(self, request_delay: float = 1.0, timeout: int = 30, max_retries: int = 3, respect_robots: bool = True):
39
- self.request_delay = request_delay
40
- self.timeout = timeout
41
- self.max_retries = max_retries
42
- self.respect_robots = respect_robots
43
- self.rate_limits = {} # Domain -> (last_access_time, count)
44
-
45
- # Initialize session with rotating user agents
46
- self.session = requests.Session()
47
- self.update_user_agent()
48
 
49
- # Selenium driver (lazy initialization)
50
- self._driver = None
51
-
52
- def update_user_agent(self):
53
- """Rotate user agent to avoid detection"""
54
- user_agents = [
55
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36',
56
- 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15',
57
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
58
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0'
59
- ]
60
- self.session.headers.update({
61
- 'User-Agent': random.choice(user_agents),
62
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
63
- 'Accept-Language': 'en-US,en;q=0.5',
64
- 'Connection': 'keep-alive',
65
- 'Upgrade-Insecure-Requests': '1',
66
- 'Pragma': 'no-cache',
67
- 'Cache-Control': 'no-cache',
68
- })
69
 
70
- def get_selenium_driver(self):
71
- """Get or create Selenium WebDriver with proper settings"""
72
- if self._driver is not None:
73
- return self._driver
74
-
75
- try:
76
- from selenium.webdriver.chrome.options import Options
77
- from selenium.webdriver.chrome.service import Service
78
- from webdriver_manager.chrome import ChromeDriverManager
79
-
80
- options = Options()
81
- options.add_argument('--headless')
82
- options.add_argument('--no-sandbox')
83
- options.add_argument('--disable-dev-shm-usage')
84
- options.add_argument('--disable-gpu')
85
- options.add_argument('--window-size=1920,1080')
86
- options.add_argument(f'user-agent={self.session.headers["User-Agent"]}')
87
-
88
- service = Service(ChromeDriverManager().install())
89
- self._driver = webdriver.Chrome(service=service, options=options)
90
- return self._driver
91
- except Exception as e:
92
- logger.error(f"Failed to initialize Selenium: {e}")
93
- return None
94
-
95
- def close(self):
96
- """Close resources"""
97
- if self._driver is not None:
98
- self._driver.quit()
99
- self._driver = None
100
-
101
- def handle_rate_limits(self, url: str):
102
- """Implement rate limiting per domain"""
103
- parsed_url = urlparse(url)
104
- parsed_domain = parsed_url.netloc
105
 
106
- current_time = time.time()
107
- if parsed_domain in self.rate_limits:
108
- last_access, count = self.rate_limits[parsed_domain]
109
-
110
- # Determine appropriate delay based on domain
111
- min_delay = self.request_delay
112
- if "linkedin.com" in parsed_domain:
113
- min_delay = 5.0 # LinkedIn is sensitive to scraping
114
- elif "gov" in parsed_domain:
115
- min_delay = 2.0 # Be respectful with government sites
116
- else:
117
- min_delay = self.request_delay
118
-
119
- # Exponential backoff if we're making many requests
120
- if count > 10:
121
- min_delay *= 2
122
-
123
- # Wait if needed
124
- elapsed = current_time - last_access
125
- if elapsed < min_delay:
126
- time.sleep(min_delay - elapsed)
127
-
128
- # Update count
129
- self.rate_limits[parsed_domain] = (time.time(), count + 1)
130
- else:
131
- # First time accessing this domain
132
- self.rate_limits[parsed_domain] = (current_time, 1)
133
 
134
- def handle_interactive_site(self, url):
135
- """Handle sites that require interaction to bypass blocks"""
136
- driver = self.get_selenium_driver()
137
- if not driver:
138
- return None
139
-
140
  try:
141
- driver.get(url)
142
-
143
- # Wait for page to load
144
- import time
145
- time.sleep(3)
146
-
147
- # Handle different types of sites
148
- if "facebook.com" in url or "instagram.com" in url:
149
- self._handle_social_media_site(driver)
150
- elif "google.com" in url:
151
- self._handle_google_site(driver)
152
-
153
- # Get the page source after interaction
154
- page_source = driver.page_source
155
-
156
- return {
157
- 'content': page_source,
158
- 'content_type': 'text/html',
159
- 'url': url,
160
- 'title': driver.title
161
- }
162
- except Exception as e:
163
- logger.error(f"Error handling interactive site {url}: {e}")
164
- return None
165
-
166
- def _handle_social_media_site(self, driver):
167
- """Handle Facebook/Instagram login walls"""
168
- from selenium.webdriver.common.by import By
169
- from selenium.webdriver.common.keys import Keys
170
- from selenium.webdriver.support.ui import WebDriverWait
171
- from selenium.webdriver.support import expected_conditions as EC
172
-
173
- try:
174
- # Try to find and close login popups
175
- close_buttons = driver.find_elements(By.XPATH, "//button[contains(@aria-label, 'Close')]")
176
- if close_buttons:
177
- close_buttons[0].click()
178
- time.sleep(1)
179
-
180
- # Press ESC key to dismiss popups
181
- webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
182
- time.sleep(1)
183
-
184
- # Scroll down to load more content
185
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight/2);")
186
- time.sleep(2)
187
  except Exception as e:
188
- logger.warning(f"Error handling social media site: {e}")
189
 
190
- def _handle_google_site(self, driver):
191
- """Handle Google authentication and consent pages"""
192
- from selenium.webdriver.common.by import By
193
-
194
  try:
195
- # Look for consent buttons
196
- consent_buttons = driver.find_elements(By.XPATH, "//button[contains(text(), 'Accept all')]")
197
- if consent_buttons:
198
- consent_buttons[0].click()
199
- time.sleep(1)
200
 
201
- # Look for "I agree" buttons
202
- agree_buttons = driver.find_elements(By.XPATH, "//button[contains(text(), 'I agree')]")
203
- if agree_buttons:
204
- agree_buttons[0].click()
205
- time.sleep(1)
 
 
 
 
 
 
 
 
 
 
 
206
  except Exception as e:
207
- logger.warning(f"Error handling Google site: {e}")
 
208
 
209
- def check_robots_txt(self, url: str) -> bool:
210
- """Check if URL is allowed by robots.txt"""
211
- if not self.respect_robots:
212
- return True
213
-
214
- try:
215
- from urllib.parse import urlparse
216
- from urllib.robotparser import RobotFileParser
217
-
218
- parsed_url = urlparse(url)
219
- robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt"
220
-
221
- rp = RobotFileParser()
222
- rp.set_url(robots_url)
223
- rp.read()
224
-
225
- return rp.can_fetch(self.session.headers['User-Agent'], url)
226
- except Exception as e:
227
- logger.warning(f"Error checking robots.txt: {e}")
228
- return True
229
-
230
- def fetch_content(self, url: str) -> Optional[Dict]:
231
- """Universal content fetcher with special case handling"""
232
- try:
233
- if 'drive.google.com' in url:
234
- return self._handle_google_drive(url)
235
- if 'calendar.google.com' in url and 'ical' in url:
236
- return self._handle_google_calendar(url)
237
- return self._fetch_html_content(url)
238
- except Exception as e:
239
- logger.error(f"Content fetch failed: {e}")
240
- return None
241
-
242
- def _handle_google_drive(self, url: str) -> Optional[Dict]:
243
- """Process Google Drive file links"""
244
- try:
245
- file_id = re.search(r'/file/d/([a-zA-Z0-9_-]+)', url)
246
- if not file_id:
247
- logger.error(f"Invalid Google Drive URL: {url}")
248
- return None
249
-
250
- direct_url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
251
- response = self.session.get(direct_url, timeout=self.timeout)
252
- response.raise_for_status()
253
-
254
- return {
255
- 'content': response.text,
256
- 'content_type': response.headers.get('Content-Type', ''),
257
- 'timestamp': datetime.now().isoformat()
258
- }
259
- except Exception as e:
260
- logger.error(f"Google Drive processing failed: {e}")
261
- return None
262
-
263
- def _handle_google_calendar(self, url: str) -> Optional[Dict]:
264
- """Process Google Calendar ICS feeds"""
265
  try:
266
- response = self.session.get(url, timeout=self.timeout)
267
- response.raise_for_status()
268
- return {
269
- 'content': response.text,
270
- 'content_type': 'text/calendar',
271
- 'timestamp': datetime.now().isoformat()
272
- }
273
- except Exception as e:
274
- logger.error(f"Calendar fetch failed: {e}")
275
- return None
276
-
277
- def _fetch_html_content(self, url: str) -> Optional[Dict]:
278
- """Enhanced HTML content processing to extract everything"""
279
- try:
280
- response = self.session.get(url, timeout=self.timeout)
281
- response.raise_for_status()
282
 
283
- # Store the original HTML
284
- original_html = response.text
285
 
286
- # Parse with BeautifulSoup
287
- soup = BeautifulSoup(response.text, 'html.parser')
288
 
289
- # Extract all text content
290
- text_content = soup.get_text(separator='\n', strip=True)
 
 
 
 
 
 
 
291
 
292
- # Extract all links
293
- links = []
294
- for link in soup.find_all('a', href=True):
295
- href = link['href']
296
- # Convert relative URLs to absolute
297
- if href.startswith('/'):
298
- from urllib.parse import urlparse, urljoin
299
- parsed_url = urlparse(url)
300
- base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
301
- href = urljoin(base_url, href)
302
-
303
- link_text = link.get_text(strip=True)
304
- links.append({
305
- 'url': href,
306
- 'text': link_text if link_text else '[No text]'
307
- })
308
 
309
- # Extract all images
310
- images = []
311
- for img in soup.find_all('img', src=True):
312
- src = img['src']
313
- # Convert relative URLs to absolute
314
- if src.startswith('/'):
315
- from urllib.parse import urlparse, urljoin
316
- parsed_url = urlparse(url)
317
- base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
318
- src = urljoin(base_url, src)
319
-
320
- alt_text = img.get('alt', '')
321
- images.append({
322
- 'src': src,
323
- 'alt': alt_text if alt_text else '[No alt text]'
324
- })
325
 
326
- # Extract all scripts
327
- scripts = []
328
- for script in soup.find_all('script'):
329
- script_content = script.string
330
- if script_content:
331
- scripts.append(script_content)
332
 
333
- # Extract all styles
334
- styles = []
335
- for style in soup.find_all('style'):
336
- style_content = style.string
337
- if style_content:
338
- styles.append(style_content)
339
 
340
- # Extract metadata
341
- metadata = {}
342
- for meta in soup.find_all('meta'):
343
- if meta.get('name') and meta.get('content'):
344
- metadata[meta['name']] = meta['content']
345
- elif meta.get('property') and meta.get('content'):
346
- metadata[meta['property']] = meta['content']
347
 
348
- # Extract title
349
- title = soup.title.string if soup.title else ''
 
 
 
 
350
 
351
- # Return comprehensive data
352
- return {
353
- 'url': url,
354
- 'title': title,
355
- 'metadata': metadata,
356
- 'content': text_content,
357
- 'html': original_html,
358
- 'links': links,
359
- 'images': images,
360
- 'scripts': scripts,
361
- 'styles': styles,
362
- 'content_type': response.headers.get('Content-Type', ''),
363
- 'timestamp': datetime.now().isoformat()
364
- }
365
  except Exception as e:
366
- logger.error(f"HTML processing failed: {e}")
367
- return None
368
-
369
- def advanced_text_cleaning(self, text: str) -> str:
370
- """Robust text cleaning with version compatibility"""
371
- try:
372
- # Try to use cleantext if available
373
- import importlib.util
374
- if importlib.util.find_spec("cleantext") is not None:
375
- from cleantext import clean
376
- cleaned_text = clean(
377
- text,
378
- fix_unicode=True,
379
- to_ascii=True,
380
- lower=True,
381
- no_line_breaks=True,
382
- no_urls=True,
383
- no_emails=True,
384
- no_phone_numbers=True,
385
- no_numbers=False,
386
- no_digits=False,
387
- no_currency_symbols=True,
388
- no_punct=False
389
- ).strip()
390
- return cleaned_text
391
- else:
392
- # Fallback cleaning
393
- text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text)
394
- text = text.encode('ascii', 'ignore').decode('ascii')
395
- text = re.sub(r'\s+', ' ', text)
396
- return text.strip()
397
- except Exception as e:
398
- logger.warning(f"Text cleaning error: {e}")
399
- return text.strip() if text else ""
400
 
401
- def process_urls(self, urls: List[str], mode: str = 'basic') -> List[Dict]:
402
- """Process a list of URLs with different modes"""
403
- results = []
 
 
 
 
 
 
 
404
 
405
- for url in urls:
406
- # Validate URL
407
- if not validators.url(url):
408
- results.append({
409
- 'url': url,
410
- 'error': 'Invalid URL format',
411
- 'timestamp': datetime.now().isoformat()
412
- })
413
- continue
414
-
415
- # Check robots.txt
416
- if not self.check_robots_txt(url):
417
- results.append({
418
- 'url': url,
419
- 'error': 'Access disallowed by robots.txt',
420
- 'timestamp': datetime.now().isoformat()
421
- })
422
- continue
423
-
424
- # Apply rate limiting
425
- self.handle_rate_limits(url)
426
-
427
- # Process based on mode
428
- try:
429
- if mode == 'basic':
430
- content = self.fetch_content(url)
431
- if content:
432
- results.append(content)
433
- else:
434
- results.append({
435
- 'url': url,
436
- 'error': 'Failed to fetch content',
437
- 'timestamp': datetime.now().isoformat()
438
- })
439
-
440
- elif mode == 'interactive':
441
- content = self.handle_interactive_site(url)
442
- if content:
443
- results.append(content)
444
- else:
445
- # Fallback to basic mode
446
- content = self.fetch_content(url)
447
- if content:
448
- results.append(content)
449
- else:
450
- results.append({
451
- 'url': url,
452
- 'error': 'Failed to fetch content in interactive mode',
453
- 'timestamp': datetime.now().isoformat()
454
- })
455
-
456
- elif mode == 'deep':
457
- # Deep mode: get main content and follow some links
458
- main_content = self.fetch_content(url)
459
- if not main_content:
460
- results.append({
461
- 'url': url,
462
- 'error': 'Failed to fetch main content',
463
- 'timestamp': datetime.now().isoformat()
464
- })
465
- continue
466
-
467
- results.append(main_content)
468
-
469
- # Follow up to 5 links from the main page
470
- if 'links' in main_content and main_content['links']:
471
- followed_count = 0
472
- for link_data in main_content['links'][:10]: # Consider first 10 links
473
- link_url = link_data['url']
474
-
475
- # Skip external links and non-http(s) links
476
- if not link_url.startswith(('http://', 'https://')):
477
- continue
478
-
479
- # Skip if not same domain
480
- main_domain = urlparse(url).netloc
481
- link_domain = urlparse(link_url).netloc
482
- if main_domain != link_domain:
483
- continue
484
-
485
- # Apply rate limiting
486
- self.handle_rate_limits(link_url)
487
-
488
- # Fetch the linked content
489
- link_content = self.fetch_content(link_url)
490
- if link_content:
491
- results.append(link_content)
492
- followed_count += 1
493
-
494
- # Limit to 5 followed links
495
- if followed_count >= 5:
496
- break
497
-
498
- except Exception as e:
499
- logger.error(f"Error processing URL {url}: {e}")
500
- results.append({
501
- 'url': url,
502
- 'error': f"Processing error: {str(e)}",
503
- 'timestamp': datetime.now().isoformat()
504
- })
505
 
506
-
507
- # FileProcessor class
508
- # ===================
509
- class FileProcessor:
510
- """Class to handle file processing with enhanced capabilities"""
511
 
512
- def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024): # 2GB default
513
- self.max_file_size = max_file_size
514
- self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml', '.html', '.htm', '.js', '.css', '.py', '.java', '.c', '.cpp', '.h', '.rb', '.php', '.sql', '.yaml', '.yml', '.ini', '.cfg', '.conf', '.log', '.sh', '.bat', '.ps1'}
515
- self.supported_binary_extensions = {'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.zip', '.tar', '.gz', '.rar', '.7z', '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.mp3', '.mp4', '.avi', '.mov', '.wmv', '.flv', '.wav', '.ogg'}
516
 
517
- def is_text_file(self, filepath: str) -> bool:
518
- """Check if file is a text file"""
519
- try:
520
- mime_type, _ = mimetypes.guess_type(filepath)
521
- ext = os.path.splitext(filepath)[1].lower()
 
 
 
 
 
522
 
523
- # Check by extension first
524
- if ext in self.supported_text_extensions:
525
- return True
526
-
527
- # Then check by mime type
528
- if mime_type and mime_type.startswith('text/'):
529
- return True
530
-
531
- # Try to read the file as text
532
- if os.path.exists(filepath) and os.path.getsize(filepath) < 1024 * 1024: # Only try for files < 1MB
533
- try:
534
- with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
535
- sample = f.read(1024) # Read first 1KB
536
- # Check if it's mostly printable ASCII
537
- printable_ratio = sum(c.isprintable() for c in sample) / len(sample) if sample else 0
538
- return printable_ratio > 0.8
539
- except Exception:
540
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
 
542
- return False
543
- except Exception as e:
544
- logger.error(f"Error checking if file is text: {e}")
545
- return False
546
-
547
- def process_file(self, file) -> List[Dict]:
548
- """Process uploaded file with enhanced error handling and binary support"""
549
- if not file:
550
- return [{"error": "No file provided"}]
551
-
552
- dataset = []
553
- try:
554
- file_size = os.path.getsize(file.name)
555
- if file_size > self.max_file_size:
556
- logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
557
- return [{"error": f"File size ({file_size} bytes) exceeds maximum allowed size of {self.max_file_size} bytes"}]
558
-
559
- with tempfile.TemporaryDirectory() as temp_dir:
560
- # Check if it's an archive file
561
- if zipfile.is_zipfile(file.name):
562
- dataset.extend(self._process_zip_file(file.name, temp_dir))
563
- elif file.name.endswith('.tar.gz') or file.name.endswith('.tgz'):
564
- dataset.extend(self._process_tar_file(file.name, temp_dir))
565
- elif file.name.endswith('.rar'):
566
- dataset.extend(self._process_rar_file(file.name, temp_dir))
567
- elif file.name.endswith('.7z'):
568
- dataset.extend(self._process_7z_file(file.name, temp_dir))
569
- # Check if it's a document file
570
- elif file.name.endswith(('.doc', '.docx')):
571
- dataset.extend(self._process_word_file(file.name))
572
- elif file.name.endswith(('.xls', '.xlsx')):
573
- dataset.extend(self._process_excel_file(file.name))
574
- elif file.name.endswith(('.ppt', '.pptx')):
575
- dataset.extend(self._process_powerpoint_file(file.name))
576
- elif file.name.endswith('.pdf'):
577
- dataset.extend(self._process_pdf_file(file.name))
578
- # Check if it's an image file
579
- elif file.name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff')):
580
- dataset.extend(self._process_image_file(file.name))
581
- # Check if it's an audio/video file
582
- elif file.name.endswith(('.mp3', '.wav', '.ogg', '.mp4', '.avi', '.mov', '.wmv', '.flv')):
583
- dataset.extend(self._process_media_file(file.name))
584
- # Default to text file processing
585
- else:
586
- dataset.extend(self._process_single_file(file))
587
-
588
- if not dataset:
589
- return [{"warning": "No extractable content found in the file"}]
590
-
591
- except Exception as e:
592
- logger.error(f"Error processing file: {str(e)}")
593
- return [{"error": f"Error processing file: {str(e)}"}]
594
-
595
- return dataset
596
-
597
- def _process_zip_file(self, zip_path: str, temp_dir: str) -> List[Dict]:
598
- """Process ZIP file contents with enhanced extraction"""
599
- results = []
600
- try:
601
- with zipfile.ZipFile(zip_path, 'r') as zip_ref:
602
- # Get file list first
603
- file_list = zip_ref.namelist()
604
- total_files = len(file_list)
605
-
606
- # Extract all files
607
- zip_ref.extractall(temp_dir)
608
-
609
- # Process each file
610
- processed_count = 0
611
- for root, dirs, files in os.walk(temp_dir):
612
- for filename in files:
613
- filepath = os.path.join(root, filename)
614
- rel_path = os.path.relpath(filepath, temp_dir)
615
 
616
- # Get file info from zip
617
- try:
618
- zip_info = zip_ref.getinfo(rel_path.replace('\\', '/'))
619
- file_size = zip_info.file_size
620
- compressed_size = zip_info.compress_size
621
- compression_ratio = (1 - compressed_size / file_size) * 100 if file_size > 0 else 0
622
- except Exception:
623
- file_size = os.path.getsize(filepath)
624
- compressed_size = None
625
- compression_ratio = None
626
 
627
- # Process based on file type
628
- if self.is_text_file(filepath):
629
- try:
630
- with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
631
- content = f.read()
632
-
633
- results.append({
634
- "source": "zip",
635
- "archive": os.path.basename(zip_path),
636
- "filename": filename,
637
- "path": rel_path,
638
- "size": file_size,
639
- "compressed_size": compressed_size,
640
- "compression_ratio": f"{compression_ratio:.2f}%" if compression_ratio is not None else None,
641
- "content": content,
642
- "timestamp": datetime.now().isoformat()
643
- })
644
- processed_count += 1
645
- except Exception as e:
646
- logger.error(f"Error reading file {filename}: {str(e)}")
647
- else:
648
- # For binary files, just record metadata
649
- mime_type, _ = mimetypes.guess_type(filepath)
650
- results.append({
651
- "source": "zip",
652
- "archive": os.path.basename(zip_path),
653
- "filename": filename,
654
- "path": rel_path,
655
- "size": file_size,
656
- "compressed_size": compressed_size,
657
- "compression_ratio": f"{compression_ratio:.2f}%" if compression_ratio is not None else None,
658
- "mime_type": mime_type,
659
- "content": f"[Binary file: {mime_type or 'unknown type'}]",
660
- "timestamp": datetime.now().isoformat()
661
- })
662
- processed_count += 1
663
-
664
- # Add summary
665
- results.append({
666
- "source": "zip_summary",
667
- "archive": os.path.basename(zip_path),
668
- "total_files": total_files,
669
- "processed_files": processed_count,
670
- "timestamp": datetime.now().isoformat()
671
- })
672
-
673
- except Exception as e:
674
- logger.error(f"Error processing ZIP file: {str(e)}")
675
- results.append({"error": f"Error processing ZIP file: {str(e)}"})
676
-
677
- return results
678
-
679
- def _process_tar_file(self, tar_path: str, temp_dir: str) -> List[Dict]:
680
- """Process TAR/GZ file contents"""
681
- results = []
682
- try:
683
- import tarfile
684
- with tarfile.open(tar_path, 'r:*') as tar:
685
- # Get file list
686
- file_list = tar.getnames()
687
- total_files = len(file_list)
688
-
689
- # Extract all files
690
- tar.extractall(temp_dir)
691
-
692
- # Process each file
693
- processed_count = 0
694
- for root, dirs, files in os.walk(temp_dir):
695
- for filename in files:
696
- filepath = os.path.join(root, filename)
697
- rel_path = os.path.relpath(filepath, temp_dir)
698
 
699
- # Process based on file type
700
- if self.is_text_file(filepath):
701
- try:
702
- with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
703
- content = f.read()
704
 
705
- results.append({
706
- "source": "tar",
707
- "archive": os.path.basename(tar_path),
708
- "filename": filename,
709
- "path": rel_path,
710
- "size": os.path.getsize(filepath),
711
- "content": content,
712
- "timestamp": datetime.now().isoformat()
713
- })
714
- processed_count += 1
715
- except Exception as e:
716
- logger.error(f"Error reading file {filename}: {str(e)}")
717
- else:
718
- # For binary files, just record metadata
719
- mime_type, _ = mimetypes.guess_type(filepath)
720
- results.append({
721
- "source": "tar",
722
- "archive": os.path.basename(tar_path),
723
- "filename": filename,
724
- "path": rel_path,
725
- "size": os.path.getsize(filepath),
726
- "mime_type": mime_type,
727
- "content": f"[Binary file: {mime_type or 'unknown type'}]",
728
- "timestamp": datetime.now().isoformat()
729
- })
730
- processed_count += 1
731
-
732
- # Add summary
733
- results.append({
734
- "source": "tar_summary",
735
- "archive": os.path.basename(tar_path),
736
- "total_files": total_files,
737
- "processed_files": processed_count,
738
- "timestamp": datetime.now().isoformat()
739
- })
740
-
741
- except Exception as e:
742
- logger.error(f"Error processing TAR file: {str(e)}")
743
- results.append({"error": f"Error processing TAR file: {str(e)}"})
744
-
745
- return results
746
-
747
- def _process_single_file(self, file) -> List[Dict]:
748
- """Process a single file with enhanced metadata extraction"""
749
- try:
750
- file_stat = os.stat(file.name)
751
- file_path = file.name
752
- filename = os.path.basename(file_path)
753
- mime_type, _ = mimetypes.guess_type(file_path)
754
-
755
- # For text files
756
- if self.is_text_file(file_path):
757
- if file_stat.st_size > 100 * 1024 * 1024: # 100MB
758
- logger.info(f"Processing large file: {file_path} ({file_stat.st_size} bytes)")
759
 
760
- content = ""
761
- with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
762
- content = f.read(1 * 1024 * 1024) # First 1MB
763
- content += "\n...[Content truncated due to large file size]...\n"
 
764
 
765
- f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
766
- content += f.read() # Last 1MB
767
- else:
768
- with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
769
- content = f.read()
770
-
771
- return [{
772
- 'source': 'file',
773
- 'filename': filename,
774
- 'file_size': file_stat.st_size,
775
- 'mime_type': mime_type,
776
- 'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
777
- 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
778
- 'content': content,
779
- 'timestamp': datetime.now().isoformat()
780
- }]
781
- else:
782
- # For binary files, extract metadata and try specialized extraction
783
- if file_path.endswith(('.pdf', '.doc', '.docx')):
784
- return self._process_document_file(file_path)
785
- elif file_path.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp')):
786
- return self._process_image_file(file_path)
787
- elif file_path.endswith(('.mp3', '.wav', '.ogg', '.mp4', '.avi', '.mov')):
788
- return self._process_media_file(file_path)
789
- else:
790
- # Generic binary file handling
791
- return [{
792
- 'source': 'binary_file',
793
- 'filename': filename,
794
- 'file_size': file_stat.st_size,
795
- 'mime_type': mime_type,
796
- 'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
797
- 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
798
- 'content': f"[Binary file: {mime_type or 'unknown type'}]",
799
- 'timestamp': datetime.now().isoformat()
800
- }]
801
- except Exception as e:
802
- logger.error(f"File processing error: {e}")
803
- return [{
804
- 'source': 'error',
805
- 'filename': os.path.basename(file.name) if file else 'unknown',
806
- 'error': str(e),
807
- 'timestamp': datetime.now().isoformat()
808
- }]
809
-
810
- def _process_pdf_file(self, file_path: str) -> List[Dict]:
811
- """Extract text from PDF files"""
812
- try:
813
- # Try to import PyPDF2 module
814
- import importlib.util
815
- if importlib.util.find_spec("PyPDF2") is None:
816
- return [{
817
- "error": "PDF processing requires the 'PyPDF2' module. Install with 'pip install PyPDF2'."
818
- }]
819
-
820
- import PyPDF2
821
-
822
- with open(file_path, 'rb') as file:
823
- reader = PyPDF2.PdfReader(file)
824
- num_pages = len(reader.pages)
825
-
826
- # Extract text from each page
827
- all_text = ""
828
- page_texts = []
829
-
830
- for i in range(num_pages):
831
- page = reader.pages[i]
832
- text = page.extract_text()
833
- all_text += text + "\n\n"
834
- page_texts.append({
835
- "page_number": i + 1,
836
- "content": text
837
- })
838
-
839
- # Get file metadata
840
- file_stat = os.stat(file_path)
841
-
842
- return [{
843
- "source": "pdf",
844
- "filename": os.path.basename(file_path),
845
- "file_size": file_stat.st_size,
846
- "mime_type": "application/pdf",
847
- "created": datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
848
- "modified": datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
849
- "num_pages": num_pages,
850
- "content": all_text,
851
- "pages": page_texts,
852
- "timestamp": datetime.now().isoformat()
853
- }]
854
- except Exception as e:
855
- logger.error(f"Error processing PDF file: {str(e)}")
856
- return [{
857
- "source": "error",
858
- "filename": os.path.basename(file_path),
859
- "error": f"Error processing PDF file: {str(e)}",
860
- "timestamp": datetime.now().isoformat()
861
- }]
862
-
863
- def _process_image_file(self, file_path: str) -> List[Dict]:
864
- """Extract metadata and attempt OCR on image files"""
865
- try:
866
- # Try to import PIL module
867
- import importlib.util
868
- if importlib.util.find_spec("PIL") is None:
869
- return [{
870
- "error": "Image processing requires the 'Pillow' module. Install with 'pip install Pillow'."
871
- }]
872
-
873
- from PIL import Image
874
-
875
- # Open image and get basic metadata
876
- with Image.open(file_path) as img:
877
- width, height = img.size
878
- format_name = img.format
879
- mode = img.mode
880
-
881
- # Extract EXIF data if available
882
- exif_data = {}
883
- if hasattr(img, '_getexif') and img._getexif():
884
- exif = img._getexif()
885
- if exif:
886
- for tag_id, value in exif.items():
887
- tag_name = f"tag_{tag_id}"
888
- exif_data[tag_name] = str(value)
889
-
890
- # Try OCR if pytesseract is available
891
- ocr_text = None
892
- if importlib.util.find_spec("pytesseract") is not None:
893
- try:
894
- import pytesseract
895
- ocr_text = pytesseract.image_to_string(img)
896
- except Exception as e:
897
- logger.warning(f"OCR failed: {e}")
898
-
899
- # Get file metadata
900
- file_stat = os.stat(file_path)
901
-
902
- return [{
903
- "source": "image",
904
- "filename": os.path.basename(file_path),
905
- "file_size": file_stat.st_size,
906
- "mime_type": f"image/{format_name.lower()}" if format_name else "image/unknown",
907
- "created": datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
908
- "modified": datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
909
- "width": width,
910
- "height": height,
911
- "format": format_name,
912
- "mode": mode,
913
- "exif": exif_data,
914
- "ocr_text": ocr_text,
915
- "content": ocr_text if ocr_text else f"[Image: {width}x{height} {format_name}]",
916
- "timestamp": datetime.now().isoformat()
917
- }]
918
- except Exception as e:
919
- logger.error(f"Error processing image file: {str(e)}")
920
- return [{
921
- "source": "error",
922
- "filename": os.path.basename(file_path),
923
- "error": f"Error processing image file: {str(e)}",
924
- "timestamp": datetime.now().isoformat()
925
- }]
926
-
927
- def _process_media_file(self, file_path: str) -> List[Dict]:
928
- """Extract metadata from audio/video files"""
929
- try:
930
- # Try to import mutagen module
931
- import importlib.util
932
- if importlib.util.find_spec("mutagen") is None:
933
- return [{
934
- "error": "Media processing requires the 'mutagen' module. Install with 'pip install mutagen'."
935
- }]
936
-
937
- import mutagen
938
-
939
- # Get file metadata
940
- file_stat = os.stat(file_path)
941
- mime_type, _ = mimetypes.guess_type(file_path)
942
-
943
- # Extract media metadata
944
- media_info = mutagen.File(file_path)
945
-
946
- metadata = {}
947
- if media_info:
948
- # Extract common metadata
949
- if hasattr(media_info, 'info') and hasattr(media_info.info, 'length'):
950
- metadata['duration'] = media_info.info.length
951
-
952
- # Extract tags
953
- for key, value in media_info.items():
954
- if isinstance(value, list) and len(value) == 1:
955
- metadata[key] = str(value[0])
956
- else:
957
- metadata[key] = str(value)
958
-
959
- return [{
960
- "source": "media",
961
- "filename": os.path.basename(file_path),
962
- "file_size": file_stat.st_size,
963
- "mime_type": mime_type,
964
- "created": datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
965
- "modified": datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
966
- "metadata": metadata,
967
- "content": f"[Media file: {mime_type or 'unknown type'}]",
968
- "timestamp": datetime.now().isoformat()
969
- }]
970
- except Exception as e:
971
- logger.error(f"Error processing media file: {str(e)}")
972
- return [{
973
- "source": "error",
974
- "filename": os.path.basename(file_path),
975
- "error": f"Error processing media file: {str(e)}",
976
- "timestamp": datetime.now().isoformat()
977
- }]
978
-
979
- # QRProcessor class
980
- # =================
981
- class QRProcessor:
982
- """Class to handle QR code processing"""
983
-
984
- def __init__(self):
985
- # Check for required libraries
986
- self._check_dependencies()
987
-
988
- def _check_dependencies(self):
989
- """Check if required libraries are installed"""
990
- try:
991
- import importlib.util
992
-
993
- # Check for pyzbar
994
- if importlib.util.find_spec("pyzbar") is None:
995
- logger.warning("pyzbar library not found. QR code detection will not work. Install with 'pip install pyzbar'")
996
-
997
- # Check for qrcode
998
- if importlib.util.find_spec("qrcode") is None:
999
- logger.warning("qrcode library not found. QR code generation will not work. Install with 'pip install qrcode'")
1000
-
1001
- except ImportError as e:
1002
- logger.error(f"Error checking dependencies: {e}")
1003
-
1004
- def detect_qr_codes(self, image_path: str) -> List[Dict]:
1005
- """Detect QR codes in an image"""
1006
- try:
1007
- import importlib.util
1008
- if importlib.util.find_spec("pyzbar") is None:
1009
- return [{"error": "pyzbar library not found. Install with 'pip install pyzbar'"}]
1010
-
1011
- from pyzbar.pyzbar import decode
1012
- from PIL import Image
1013
-
1014
- # Open the image
1015
- image = Image.open(image_path)
1016
-
1017
- # Decode QR codes
1018
- decoded_objects = decode(image)
1019
-
1020
- results = []
1021
- for obj in decoded_objects:
1022
- # Get the bounding box
1023
- rect = obj.rect
1024
- bbox = {
1025
- 'left': rect.left,
1026
- 'top': rect.top,
1027
- 'width': rect.width,
1028
- 'height': rect.height
1029
- }
1030
-
1031
- # Get the data
1032
- data = obj.data.decode('utf-8', errors='replace')
1033
-
1034
- # Get the type
1035
- qr_type = obj.type
1036
-
1037
- results.append({
1038
- 'type': qr_type,
1039
- 'data': data,
1040
- 'bbox': bbox,
1041
- 'timestamp': datetime.now().isoformat()
1042
- })
1043
-
1044
- if not results:
1045
- results.append({
1046
- 'warning': 'No QR codes detected in the image',
1047
- 'timestamp': datetime.now().isoformat()
1048
- })
1049
-
1050
- return results
1051
-
1052
- except Exception as e:
1053
- logger.error(f"Error detecting QR codes: {e}")
1054
- return [{"error": f"Error detecting QR codes: {str(e)}"}]
1055
-
1056
- def generate_qr_code(self, data: str, output_path: Optional[str] = None, size: int = 10) -> Dict:
1057
- """Generate a QR code from data"""
1058
- try:
1059
- import importlib.util
1060
- if importlib.util.find_spec("qrcode") is None:
1061
- return {"error": "qrcode library not found. Install with 'pip install qrcode'"}
1062
-
1063
- import qrcode
1064
-
1065
- # Create QR code instance
1066
- qr = qrcode.QRCode(
1067
- version=1,
1068
- error_correction=qrcode.constants.ERROR_CORRECT_L,
1069
- box_size=size,
1070
- border=4,
1071
  )
1072
 
1073
- # Add data
1074
- qr.add_data(data)
1075
- qr.make(fit=True)
1076
-
1077
- # Create an image from the QR Code instance
1078
- img = qr.make_image(fill_color="black", back_color="white")
1079
-
1080
- # Save the image if output path is provided
1081
- if output_path:
1082
- img.save(output_path)
1083
- return {
1084
- 'success': True,
1085
- 'data': data,
1086
- 'output_path': output_path,
1087
- 'timestamp': datetime.now().isoformat()
1088
- }
1089
- else:
1090
- # Save to a temporary file
1091
- with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
1092
- temp_path = tmp.name
1093
- img.save(temp_path)
1094
- return {
1095
- 'success': True,
1096
- 'data': data,
1097
- 'output_path': temp_path,
1098
- 'timestamp': datetime.now().isoformat()
1099
- }
1100
-
1101
- except Exception as e:
1102
- logger.error(f"Error generating QR code: {e}")
1103
- return {"error": f"Error generating QR code: {str(e)}"}
1104
-
1105
- def extract_qr_from_url(self, url_processor, url: str) -> List[Dict]:
1106
- """Extract QR codes from an image URL"""
1107
- try:
1108
- # Fetch the image from the URL
1109
- response = url_processor.session.get(url, stream=True)
1110
- response.raise_for_status()
1111
-
1112
- # Save to a temporary file
1113
- with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
1114
- temp_path = tmp.name
1115
- for chunk in response.iter_content(chunk_size=128):
1116
- tmp.write(chunk)
1117
-
1118
- # Process the image
1119
- results = self.detect_qr_codes(temp_path)
1120
 
1121
- # Add source information
1122
- for result in results:
1123
- result['source_url'] = url
 
 
 
 
 
 
 
1124
 
1125
- # Clean up
1126
- os.unlink(temp_path)
 
 
 
 
 
 
 
1127
 
1128
- return results
 
 
 
 
1129
 
1130
- except Exception as e:
1131
- logger.error(f"Error extracting QR from URL: {e}")
1132
- return [{"error": f"Error extracting QR from URL: {str(e)}"}]
1133
-
1134
- def batch_process_images(self, image_paths: List[str]) -> Dict[str, List[Dict]]:
1135
- """Process multiple images for QR codes"""
1136
- results = {}
1137
-
1138
- for image_path in image_paths:
1139
- try:
1140
- if os.path.exists(image_path):
1141
- image_results = self.detect_qr_codes(image_path)
1142
- results[image_path] = image_results
1143
- else:
1144
- results[image_path] = [{"error": f"Image file not found: {image_path}"}]
1145
- except Exception as e:
1146
- logger.error(f"Error processing image {image_path}: {e}")
1147
- results[image_path] = [{"error": f"Processing error: {str(e)}"}]
1148
 
1149
- def create_interface():
1150
- """Create a comprehensive Gradio interface with advanced features"""
1151
- css = """
1152
- .container { max-width: 1200px; margin: auto; }
1153
- .warning { background-color: #fff3cd; color: #856404; }
1154
- .error { background-color: #f8d7da; color: #721c24; }
1155
- """
1156
-
1157
- with gr.Blocks(css=css, title="Advanced Text & URL Processor") as interface:
1158
- gr.Markdown("# 🌐 Advanced URL & Text Processing Toolkit")
1159
-
1160
- with gr.Tab("URL Processing"):
1161
- url_input = gr.Textbox(
1162
- label="Enter URLs (comma or newline separated)",
1163
- lines=5,
1164
- placeholder="https://example1.com\nhttps://example2.com"
1165
- )
1166
-
1167
- with gr.Tab("File Input"):
1168
- file_input = gr.File(
1169
- label="Upload text file or ZIP archive",
1170
- file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"]
1171
- )
1172
-
1173
- with gr.Tab("Text Input"):
1174
- text_input = gr.Textbox(
1175
- label="Raw Text Input",
1176
- lines=5,
1177
- placeholder="Paste your text here..."
1178
- )
1179
-
1180
- with gr.Tab("JSON Editor"):
1181
- json_editor = gr.Textbox(
1182
- label="JSON Editor",
1183
- lines=20,
1184
- placeholder="View and edit your JSON data here...",
1185
- interactive=True,
1186
- elem_id="json-editor" # Optional: for custom styling
1187
- )
1188
-
1189
- with gr.Tab("Scratchpad"):
1190
- scratchpad = gr.Textbox(
1191
- label="Scratchpad",
1192
- lines=10,
1193
- placeholder="Quick notes or text collections...",
1194
- interactive=True
1195
- )
1196
-
1197
- process_btn = gr.Button("Process Input", variant="primary")
1198
- qr_btn = gr.Button("Generate QR Code", variant="secondary")
1199
-
1200
- output_text = gr.Textbox(label="Processing Results", interactive=False)
1201
- output_file = gr.File(label="Processed Output")
1202
- qr_output = gr.Image(label="QR Code", type="filepath") # To display the generated QR code
1203
-
1204
- process_btn.click(
1205
- process_all_inputs,
1206
- inputs=[url_input, file_input, text_input, scratchpad],
1207
- outputs=[output_file, output_text, json_editor] # Update outputs to include JSON editor
1208
- )
1209
-
1210
- qr_btn.click(
1211
- generate_qr_code,
1212
- inputs=json_editor,
1213
- outputs=qr_output
1214
- )
1215
-
1216
- gr.Markdown("""
1217
- ### Usage Guidelines
1218
- - **URL Processing**: Enter valid HTTP/HTTPS URLs
1219
- - **File Input**: Upload text files or ZIP archives
1220
- - ** Text Input**: Direct text processing
1221
- - **JSON Editor**: View and edit your JSON data
1222
- - **Scratchpad**: Quick notes or text collections
1223
- - Advanced cleaning and validation included
1224
- """)
1225
- return interface
1226
 
1227
  def main():
1228
- # Configure system settings
1229
- mimetypes.init()
1230
-
1231
- # Create and launch interface
1232
- interface = create_interface()
1233
-
1234
- # Launch with proper configuration
1235
- interface.launch(
1236
- server_name="0.0.0.0",
1237
- server_port=7860,
1238
- show_error=True,
1239
- share=False,
1240
- inbrowser=True,
1241
- debug=True
1242
- )
 
 
 
 
 
 
1243
 
1244
  if __name__ == "__main__":
1245
  main()
 
1
+ """
2
+ Advanced URL & Text Processing Suite - Professional Edition
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+
5
+ Enterprise-grade application with advanced features, real-time analytics,
6
+ parallel processing, and sophisticated UI components.
7
+ """
8
+
9
  import gradio as gr
 
 
 
10
  import logging
11
+ import json
12
  import os
13
+ import sys
14
+ import threading
15
+ import queue
 
 
 
16
  import time
17
+ from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
 
 
 
 
18
  from datetime import datetime
 
 
19
  from pathlib import Path
20
+ from typing import Dict, List, Optional, Union, Any, Tuple
21
+ from dataclasses import dataclass, asdict
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ from url_processor import URLProcessor
26
+ from file_processor import FileProcessor
27
+ from qr_processor import QRProcessor
28
+
29
+ # Configure advanced logging with rotation
30
+ from logging.handlers import RotatingFileHandler
31
+ log_handler = RotatingFileHandler(
32
+ 'urld_pro.log',
33
+ maxBytes=10*1024*1024, # 10MB
34
+ backupCount=5
35
+ )
36
+ log_handler.setFormatter(
37
+ logging.Formatter('%(asctime)s.%(msecs)03d [%(levelname)s] %(name)s - %(message)s')
38
+ )
39
+ logger = logging.getLogger(__name__)
40
+ logger.addHandler(log_handler)
41
+ logger.setLevel(logging.INFO)
42
+
43
+ # Advanced Theme Configuration
44
+ THEME = gr.themes.Soft(
45
+ primary_hue=gr.themes.colors.indigo,
46
+ secondary_hue=gr.themes.colors.blue,
47
+ neutral_hue=gr.themes.colors.slate,
48
+ spacing_size=gr.themes.sizes.spacing_lg,
49
+ radius_size=gr.themes.sizes.radius_lg,
50
+ text_size=gr.themes.sizes.text_lg,
51
+ ).set(
52
+ body_background_fill="*background_fill_secondary",
53
+ button_primary_background_fill="*primary_500",
54
+ button_primary_background_fill_hover="*primary_600",
55
+ button_primary_text_color="white",
56
+ button_primary_border_color="*primary_500",
57
+ button_secondary_background_fill="*secondary_500",
58
+ button_secondary_background_fill_hover="*secondary_600",
59
+ button_secondary_text_color="white",
60
+ button_secondary_border_color="*secondary_500",
61
+ )
62
+
63
+ # Enhanced CSS with advanced animations and modern design
64
+ CUSTOM_CSS = """
65
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
66
+
67
+ :root {
68
+ --primary-color: #4f46e5;
69
+ --secondary-color: #2563eb;
70
+ --success-color: #059669;
71
+ --error-color: #dc2626;
72
+ --warning-color: #d97706;
73
+ --info-color: #2563eb;
74
+ }
75
+
76
+ body {
77
+ font-family: 'Inter', sans-serif;
78
+ }
79
+
80
+ .container {
81
+ max-width: 1400px !important;
82
+ margin: auto !important;
83
+ padding: 2rem !important;
84
+ }
85
+
86
+ .pro-header {
87
+ text-align: center;
88
+ margin-bottom: 2rem;
89
+ padding: 2rem;
90
+ background: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 100%);
91
+ border-radius: 1rem;
92
+ color: white;
93
+ box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
94
+ }
95
+
96
+ .pro-header h1 {
97
+ font-size: 2.5rem;
98
+ font-weight: 700;
99
+ margin-bottom: 1rem;
100
+ }
101
+
102
+ .pro-header p {
103
+ font-size: 1.25rem;
104
+ opacity: 0.9;
105
+ }
106
+
107
+ .dashboard {
108
+ display: grid;
109
+ grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
110
+ gap: 1rem;
111
+ margin-bottom: 2rem;
112
+ }
113
+
114
+ .stat-card {
115
+ background: white;
116
+ padding: 1.5rem;
117
+ border-radius: 1rem;
118
+ box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
119
+ transition: transform 0.2s ease;
120
+ }
121
+
122
+ .stat-card:hover {
123
+ transform: translateY(-2px);
124
+ }
125
+
126
+ .chart-container {
127
+ background: white;
128
+ padding: 1rem;
129
+ border-radius: 1rem;
130
+ box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1);
131
+ margin-bottom: 1rem;
132
+ }
133
+
134
+ .pro-tab {
135
+ transition: all 0.3s ease;
136
+ }
137
+
138
+ .pro-tab:hover {
139
+ transform: translateY(-1px);
140
+ }
141
+
142
+ .pro-button {
143
+ transition: all 0.2s ease;
144
+ }
145
+
146
+ .pro-button:hover {
147
+ transform: translateY(-1px);
148
+ box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1);
149
+ }
150
+
151
+ .pro-button:active {
152
+ transform: translateY(0);
153
+ }
154
+
155
+ .status-message {
156
+ padding: 1rem;
157
+ border-radius: 0.5rem;
158
+ margin: 1rem 0;
159
+ animation: slideIn 0.3s ease;
160
+ }
161
+
162
+ .status-message.success {
163
+ background: #ecfdf5;
164
+ border: 1px solid var(--success-color);
165
+ color: var(--success-color);
166
+ }
167
+
168
+ .status-message.error {
169
+ background: #fef2f2;
170
+ border: 1px solid var(--error-color);
171
+ color: var(--error-color);
172
+ }
173
+
174
+ .status-message.warning {
175
+ background: #fffbeb;
176
+ border: 1px solid var(--warning-color);
177
+ color: var(--warning-color);
178
+ }
179
+
180
+ @keyframes slideIn {
181
+ from {
182
+ opacity: 0;
183
+ transform: translateY(-10px);
184
+ }
185
+ to {
186
+ opacity: 1;
187
+ transform: translateY(0);
188
+ }
189
+ }
190
+
191
+ .loading {
192
+ position: relative;
193
+ }
194
+
195
+ .loading::after {
196
+ content: '';
197
+ position: absolute;
198
+ top: 0;
199
+ left: 0;
200
+ width: 100%;
201
+ height: 100%;
202
+ background: linear-gradient(
203
+ 90deg,
204
+ rgba(255,255,255,0) 0%,
205
+ rgba(255,255,255,0.2) 50%,
206
+ rgba(255,255,255,0) 100%
207
+ );
208
+ animation: shimmer 1.5s infinite;
209
+ }
210
+
211
+ @keyframes shimmer {
212
+ 0% {
213
+ transform: translateX(-100%);
214
+ }
215
+ 100% {
216
+ transform: translateX(100%);
217
+ }
218
+ }
219
+
220
+ .pro-footer {
221
+ text-align: center;
222
+ margin-top: 3rem;
223
+ padding: 2rem;
224
+ background: #f8fafc;
225
+ border-radius: 1rem;
226
+ box-shadow: 0 -1px 3px 0 rgb(0 0 0 / 0.1);
227
+ }
228
+ """
229
+
230
+ @dataclass
231
+ class ProcessingStats:
232
+ """Data class for tracking processing statistics"""
233
+ total_urls: int = 0
234
+ successful_urls: int = 0
235
+ failed_urls: int = 0
236
+ total_files: int = 0
237
+ successful_files: int = 0
238
+ failed_files: int = 0
239
+ total_qr_codes: int = 0
240
+ successful_qr_codes: int = 0
241
+ failed_qr_codes: int = 0
242
+ processing_time: float = 0.0
243
+ last_updated: str = datetime.now().isoformat()
244
+
245
+ class AdvancedProInterface:
246
+ """Professional interface with advanced features and real-time analytics"""
247
 
248
+ def __init__(self):
249
+ """Initialize the professional interface with advanced components"""
250
+ self.url_processor = URLProcessor()
251
+ self.file_processor = FileProcessor()
252
+ self.qr_processor = QRProcessor()
253
+ self.stats = ProcessingStats()
254
+ self.processing_queue = queue.Queue()
255
+ self.thread_pool = ThreadPoolExecutor(max_workers=10)
256
+ self.process_pool = ProcessPoolExecutor(max_workers=4)
257
+ self.processing_history: List[Dict] = []
258
 
259
+ # Initialize real-time monitoring
260
+ self._start_monitoring()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
+ def _start_monitoring(self):
263
+ """Start background monitoring thread"""
264
+ def monitor():
265
+ while True:
266
+ try:
267
+ # Update statistics
268
+ self.stats.last_updated = datetime.now().isoformat()
269
+
270
+ # Process queued items
271
+ while not self.processing_queue.empty():
272
+ item = self.processing_queue.get_nowait()
273
+ self._process_queued_item(item)
274
+
275
+ time.sleep(1)
276
+ except Exception as e:
277
+ logger.error(f"Monitoring error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
279
+ threading.Thread(target=monitor, daemon=True).start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
+ def _process_queued_item(self, item: Dict):
282
+ """Process queued items with advanced error handling"""
 
 
 
 
283
  try:
284
+ item_type = item.get('type')
285
+ if item_type == 'url':
286
+ self._process_url_item(item)
287
+ elif item_type == 'file':
288
+ self._process_file_item(item)
289
+ elif item_type == 'qr':
290
+ self._process_qr_item(item)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  except Exception as e:
292
+ logger.error(f"Queue processing error: {e}")
293
 
294
+ def _process_url_item(self, item: Dict):
295
+ """Process URL items with advanced features"""
 
 
296
  try:
297
+ start_time = time.time()
298
+ results = self.url_processor.process_urls([item['url']], mode=item['mode'])
299
+ processing_time = time.time() - start_time
 
 
300
 
301
+ self.stats.total_urls += 1
302
+ if any('error' in r for r in results):
303
+ self.stats.failed_urls += 1
304
+ else:
305
+ self.stats.successful_urls += 1
306
+
307
+ self.stats.processing_time += processing_time
308
+
309
+ # Update history
310
+ self.processing_history.append({
311
+ 'timestamp': datetime.now().isoformat(),
312
+ 'type': 'url',
313
+ 'url': item['url'],
314
+ 'results': results,
315
+ 'processing_time': processing_time
316
+ })
317
  except Exception as e:
318
+ logger.error(f"URL processing error: {e}")
319
+ self.stats.failed_urls += 1
320
 
321
+ async def process_urls_parallel(self, urls: str, mode: str) -> Tuple[str, str, str, Dict]:
322
+ """Process URLs in parallel with advanced features"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  try:
324
+ url_list = [url.strip() for url in urls.split('\n') if url.strip()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
 
326
+ if not url_list:
327
+ return "", "⚠️ No valid URLs provided", "", {}
328
 
329
+ start_time = time.time()
 
330
 
331
+ # Process URLs in parallel
332
+ futures = []
333
+ for url in url_list:
334
+ future = self.thread_pool.submit(
335
+ self.url_processor.process_urls,
336
+ [url],
337
+ mode=mode
338
+ )
339
+ futures.append(future)
340
 
341
+ # Collect results
342
+ results = []
343
+ for future in futures:
344
+ try:
345
+ result = future.result(timeout=30)
346
+ results.extend(result)
347
+ except Exception as e:
348
+ logger.error(f"URL processing error: {e}")
349
+ results.append({
350
+ 'error': str(e),
351
+ 'timestamp': datetime.now().isoformat()
352
+ })
 
 
 
 
353
 
354
+ processing_time = time.time() - start_time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
 
356
+ # Update statistics
357
+ self.stats.total_urls += len(url_list)
358
+ self.stats.successful_urls += len([r for r in results if 'error' not in r])
359
+ self.stats.failed_urls += len([r for r in results if 'error' in r])
360
+ self.stats.processing_time += processing_time
 
361
 
362
+ # Generate analytics
363
+ analytics = self._generate_analytics(results, processing_time)
 
 
 
 
364
 
365
+ # Format output
366
+ formatted_results = json.dumps(results, indent=2)
367
+ summary = self._generate_summary(results)
 
 
 
 
368
 
369
+ return (
370
+ formatted_results,
371
+ f"✅ Processed {len(url_list)} URLs in {processing_time:.2f}s",
372
+ summary,
373
+ analytics
374
+ )
375
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  except Exception as e:
377
+ logger.error(f"Parallel processing error: {e}")
378
+ return "", f"❌ Error: {str(e)}", "", {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
 
380
+ def _generate_analytics(self, results: List[Dict], processing_time: float) -> Dict:
381
+ """Generate detailed analytics from processing results"""
382
+ analytics = {
383
+ 'processing_time': processing_time,
384
+ 'total_items': len(results),
385
+ 'success_rate': len([r for r in results if 'error' not in r]) / len(results) if results else 0,
386
+ 'error_rate': len([r for r in results if 'error' in r]) / len(results) if results else 0,
387
+ 'average_time_per_item': processing_time / len(results) if results else 0,
388
+ 'timestamp': datetime.now().isoformat()
389
+ }
390
 
391
+ # Add historical trends
392
+ if self.processing_history:
393
+ historical_success_rates = [
394
+ len([r for r in h['results'] if 'error' not in r]) / len(h['results'])
395
+ for h in self.processing_history[-10:] # Last 10 operations
396
+ ]
397
+ analytics['historical_success_rate'] = sum(historical_success_rates) / len(historical_success_rates)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
 
399
+ return analytics
 
 
 
 
400
 
401
+ def create_interface(self) -> gr.Blocks:
402
+ """Create an advanced professional interface with real-time analytics"""
 
 
403
 
404
+ with gr.Blocks(theme=THEME, css=CUSTOM_CSS) as interface:
405
+ # Professional Header
406
+ gr.Markdown(
407
+ """
408
+ <div class="pro-header">
409
+ <h1>🌐 Advanced URL & Text Processing Suite - Pro</h1>
410
+ <p>Enterprise-grade toolkit with advanced features and real-time analytics</p>
411
+ </div>
412
+ """
413
+ )
414
 
415
+ # Real-time Dashboard
416
+ with gr.Row(elem_classes="dashboard"):
417
+ with gr.Column(elem_classes="stat-card"):
418
+ url_stats = gr.JSON(
419
+ value={
420
+ "Total URLs": 0,
421
+ "Success Rate": "0%",
422
+ "Avg. Processing Time": "0ms"
423
+ },
424
+ label="URL Processing Stats"
425
+ )
426
+
427
+ with gr.Column(elem_classes="stat-card"):
428
+ file_stats = gr.JSON(
429
+ value={
430
+ "Total Files": 0,
431
+ "Success Rate": "0%",
432
+ "Avg. Processing Time": "0ms"
433
+ },
434
+ label="File Processing Stats"
435
+ )
436
+
437
+ with gr.Column(elem_classes="stat-card"):
438
+ qr_stats = gr.JSON(
439
+ value={
440
+ "Total QR Codes": 0,
441
+ "Success Rate": "0%",
442
+ "Avg. Processing Time": "0ms"
443
+ },
444
+ label="QR Code Stats"
445
+ )
446
+
447
+ # Main Interface Tabs
448
+ with gr.Tabs() as tabs:
449
+ # Advanced URL Processing Tab
450
+ with gr.Tab("🔗 URL Processing", elem_classes="pro-tab"):
451
+ with gr.Row():
452
+ with gr.Column(scale=2):
453
+ url_input = gr.Textbox(
454
+ label="URLs",
455
+ placeholder="Enter URLs (one per line)",
456
+ lines=5
457
+ )
458
+
459
+ with gr.Row():
460
+ mode = gr.Radio(
461
+ choices=["basic", "interactive", "deep"],
462
+ value="basic",
463
+ label="Processing Mode"
464
+ )
465
+ parallel = gr.Checkbox(
466
+ label="Enable Parallel Processing",
467
+ value=True
468
+ )
469
+
470
+ with gr.Row():
471
+ process_btn = gr.Button(
472
+ "🚀 Process URLs",
473
+ elem_classes="pro-button"
474
+ )
475
+ clear_btn = gr.Button(
476
+ "🗑️ Clear",
477
+ elem_classes="pro-button"
478
+ )
479
+
480
+ with gr.Column(scale=1):
481
+ gr.Markdown(
482
+ """
483
+ ### 📊 Processing Modes
484
+
485
+ #### Basic Mode
486
+ - Fast content retrieval
487
+ - Basic metadata extraction
488
+ - Suitable for simple URLs
489
+
490
+ #### Interactive Mode
491
+ - Handles JavaScript-rendered content
492
+ - Social media support
493
+ - Advanced rate limiting
494
+
495
+ #### Deep Mode
496
+ - Full content analysis
497
+ - Link following
498
+ - Comprehensive metadata
499
+ """
500
+ )
501
 
502
+ with gr.Row():
503
+ status_output = gr.Textbox(
504
+ label="Status",
505
+ interactive=False
506
+ )
507
+
508
+ with gr.Tabs():
509
+ with gr.Tab("Results"):
510
+ json_output = gr.JSON(
511
+ label="Detailed Results"
512
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
 
514
+ with gr.Tab("Summary"):
515
+ summary_output = gr.Textbox(
516
+ label="Processing Summary",
517
+ interactive=False,
518
+ lines=10
519
+ )
 
 
 
 
520
 
521
+ with gr.Tab("Analytics"):
522
+ analytics_output = gr.Plot(
523
+ label="Processing Analytics"
524
+ )
525
+
526
+ # Advanced File Processing Tab
527
+ with gr.Tab("📁 File Processing", elem_classes="pro-tab"):
528
+ with gr.Row():
529
+ with gr.Column(scale=2):
530
+ file_input = gr.File(
531
+ label="Upload Files",
532
+ file_types=[
533
+ ".txt", ".pdf", ".doc", ".docx",
534
+ ".zip", ".tar.gz", ".jpg", ".png"
535
+ ],
536
+ multiple=True
537
+ )
538
+
539
+ with gr.Row():
540
+ process_file_btn = gr.Button(
541
+ "📥 Process Files",
542
+ elem_classes="pro-button"
543
+ )
544
+ batch_size = gr.Slider(
545
+ minimum=1,
546
+ maximum=10,
547
+ value=3,
548
+ step=1,
549
+ label="Batch Size"
550
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
 
552
+ with gr.Column(scale=1):
553
+ gr.Markdown(
554
+ """
555
+ ### 📑 Supported Formats
 
556
 
557
+ #### Documents
558
+ - PDF files (.pdf)
559
+ - Word documents (.doc, .docx)
560
+ - Text files (.txt)
561
+
562
+ #### Archives
563
+ - ZIP archives (.zip)
564
+ - TAR archives (.tar.gz)
565
+
566
+ #### Media
567
+ - Images (.jpg, .png)
568
+ - And more...
569
+ """
570
+ )
571
+
572
+ with gr.Row():
573
+ file_status = gr.Textbox(
574
+ label="Status",
575
+ interactive=False
576
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577
 
578
+ with gr.Tabs():
579
+ with gr.Tab("Results"):
580
+ file_output = gr.File(
581
+ label="Processed Files"
582
+ )
583
 
584
+ with gr.Tab("Details"):
585
+ file_json_output = gr.JSON(
586
+ label="Processing Details"
587
+ )
588
+
589
+ with gr.Tab("Analytics"):
590
+ file_analytics = gr.Plot(
591
+ label="File Processing Analytics"
592
+ )
593
+
594
+ # Advanced QR Code Tab
595
+ with gr.Tab("📱 QR Code", elem_classes="pro-tab"):
596
+ with gr.Row():
597
+ with gr.Column(scale=2):
598
+ qr_input = gr.Textbox(
599
+ label="Data",
600
+ placeholder="Enter data to encode",
601
+ lines=3
602
+ )
603
+
604
+ with gr.Row():
605
+ qr_size = gr.Slider(
606
+ minimum=5,
607
+ maximum=50,
608
+ value=10,
609
+ step=1,
610
+ label="QR Code Size"
611
+ )
612
+ qr_correction = gr.Dropdown(
613
+ choices=["L", "M", "Q", "H"],
614
+ value="M",
615
+ label="Error Correction"
616
+ )
617
+
618
+ with gr.Row():
619
+ generate_qr_btn = gr.Button(
620
+ "✨ Generate QR",
621
+ elem_classes="pro-button"
622
+ )
623
+ customize_btn = gr.Button(
624
+ "🎨 Customize",
625
+ elem_classes="pro-button"
626
+ )
627
+
628
+ with gr.Column(scale=1):
629
+ qr_output = gr.Image(
630
+ label="Generated QR Code"
631
+ )
632
+ qr_status = gr.Textbox(
633
+ label="Status",
634
+ interactive=False
635
+ )
636
+
637
+ # Professional Footer
638
+ gr.Markdown(
639
+ """
640
+ <div class="pro-footer">
641
+ <p>Advanced URL & Text Processing Suite - Professional Edition</p>
642
+ <p style="font-size: 0.9rem;">Version 1.0.0 Pro | © 2024 Advanced URL Processing Team</p>
643
+ </div>
644
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
645
  )
646
 
647
+ # Event Handlers
648
+ process_btn.click(
649
+ fn=self.process_urls_parallel,
650
+ inputs=[url_input, mode],
651
+ outputs=[
652
+ json_output,
653
+ status_output,
654
+ summary_output,
655
+ analytics_output
656
+ ]
657
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658
 
659
+ clear_btn.click(
660
+ fn=lambda: ("", "", "", None),
661
+ inputs=[],
662
+ outputs=[
663
+ url_input,
664
+ status_output,
665
+ summary_output,
666
+ analytics_output
667
+ ]
668
+ )
669
 
670
+ process_file_btn.click(
671
+ fn=self.process_file,
672
+ inputs=[file_input],
673
+ outputs=[
674
+ file_json_output,
675
+ file_status,
676
+ file_output
677
+ ]
678
+ )
679
 
680
+ generate_qr_btn.click(
681
+ fn=self.generate_qr,
682
+ inputs=[qr_input, qr_size],
683
+ outputs=[qr_output, qr_status]
684
+ )
685
 
686
+ # Update statistics periodically
687
+ gr.Markdown.update(every=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
688
 
689
+ return interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
690
 
691
  def main():
692
+ """Main entry point with advanced error handling"""
693
+ try:
694
+ # Initialize interface
695
+ app = AdvancedProInterface()
696
+ interface = app.create_interface()
697
+
698
+ # Launch with professional configuration
699
+ interface.launch(
700
+ server_name="0.0.0.0",
701
+ server_port=8000,
702
+ share=False,
703
+ debug=True,
704
+ enable_queue=True,
705
+ max_threads=40,
706
+ auth=None, # Add authentication if needed
707
+ ssl_keyfile=None, # Add SSL if needed
708
+ ssl_certfile=None
709
+ )
710
+ except Exception as e:
711
+ logger.error(f"Application startup error: {e}", exc_info=True)
712
+ sys.exit(1)
713
 
714
  if __name__ == "__main__":
715
  main()