acecalisto3 commited on
Commit
ac0ad33
Β·
verified Β·
1 Parent(s): 9814d50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +522 -417
app.py CHANGED
@@ -6,10 +6,11 @@ import logging
6
  import mimetypes
7
  import zipfile
8
  import tempfile
 
9
  from datetime import datetime
10
- from typing import List, Dict, Optional, Union
11
  from pathlib import Path
12
- from urllib.parse import urlparse
13
 
14
  import requests
15
  import validators
@@ -19,8 +20,10 @@ from bs4 import BeautifulSoup
19
  from fake_useragent import UserAgent
20
  from cleantext import clean
21
  import qrcode
 
 
22
 
23
- # Setup logging
24
  logging.basicConfig(
25
  level=logging.INFO,
26
  format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s',
@@ -31,229 +34,164 @@ logging.basicConfig(
31
  )
32
  logger = logging.getLogger(__name__)
33
 
34
- # Ensure output directories exist
35
- Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
 
 
 
 
36
 
37
- class URLProcessor:
 
 
38
  def __init__(self):
39
  self.session = requests.Session()
40
- self.timeout = 10 # seconds
 
 
 
 
41
  self.session.headers.update({
42
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
43
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
44
- 'Accept-Language': 'en-US,en;q=0.5',
45
  'Accept-Encoding': 'gzip, deflate, br',
46
  'Connection': 'keep-alive',
47
- 'Upgrade-Insecure-Requests': '1'
 
 
 
 
 
48
  })
49
 
50
  def validate_url(self, url: str) -> Dict:
51
- """Validate URL format and accessibility"""
52
  try:
53
  if not validators.url(url):
54
- return {'is_valid': False, 'message': 'Invalid URL format'}
55
 
56
- # Try a simple GET request to check if the URL is accessible
57
- response = self.session.get(url, timeout=self.timeout)
58
- response.raise_for_status()
59
- return {'is_valid': True, 'message': 'URL is valid and accessible'}
60
- except Exception as e:
61
- return {'is_valid': False, 'message': f'URL validation failed: {str(e)}'}
 
 
 
 
 
 
62
 
63
- def fetch_content(self, url: str) -> Optional[Dict]:
64
- """Universal content fetcher with special case handling"""
65
- try:
66
- # Google Drive document handling
67
- if 'drive.google.com' in url:
68
- return self._handle_google_drive(url)
69
-
70
- # Google Calendar ICS handling
71
- if 'calendar.google.com' in url and ' ics' in url:
72
- return self._handle_google_calendar(url)
73
-
74
- logger.info(f"Fetching content from URL: {url}")
75
- response = self.session.get(url, timeout=self.timeout)
76
- response.raise_for_status()
77
-
78
- # Return the raw HTML content
79
  return {
80
- 'content': response.text,
81
- 'content_type': response.headers.get('Content-Type', ''),
82
- 'timestamp': datetime.now().isoformat()
 
 
 
 
83
  }
84
  except Exception as e:
85
- logger.error(f"Content fetch failed: {e}")
86
- return None
87
-
88
- def process_all_inputs(urls, file, text, combine):
89
- """Process all input types and generate QR codes"""
90
- try:
91
- results = []
92
- file_processor = FileProcessor() # Initialize file_processor here
93
-
94
- # Process text input first (since it's direct JSON)
95
- if text and text.strip():
96
- try:
97
- json_data = json.loads(text)
98
- if isinstance(json_data, list):
99
- results.extend(json_data)
100
- else:
101
- results.append(json_data)
102
- except json.JSONDecodeError as e:
103
- return None, [], f"❌ Invalid JSON format: {str(e)}"
104
-
105
- # Process URLs if provided
106
- if urls and urls.strip():
107
- processor = URLProcessor()
108
- url_list = re.split(r'[,\n]', urls)
109
- url_list = [url.strip() for url in url_list if url.strip()]
110
-
111
- for url in url_list:
112
- validation = processor.validate_url(url)
113
- if validation.get('is_valid'):
114
- content = processor.fetch_content(url)
115
- if content:
116
- # Convert HTML content to a proper JSON object
117
- url_data = {
118
- 'source': 'url',
119
- 'url': url,
120
- 'content': content.get('content', ''),
121
- 'content_type': content.get('content_type', ''),
122
- 'timestamp': datetime.now().isoformat()
123
- }
124
- results.append(url_data)
125
- else:
126
- logger.warning(f"Invalid URL: {url} - {validation.get('message')}")
127
-
128
- # Process files if provided
129
- if file:
130
- file_results = file_processor.process_file(file)
131
- if file_results:
132
- results.extend(file_results)
133
-
134
- # Generate QR codes
135
- if results:
136
- if combine:
137
- combined_data = {
138
- 'type': 'combined_data',
139
- 'items': results,
140
- 'timestamp': datetime.now().isoformat()
141
- }
142
- qr_paths = generate_qr_code(combined_data, combined=True)
143
- else:
144
- qr_paths = []
145
- for item in results:
146
- item_paths = generate_qr_code(item, combined=True)
147
- if item_paths:
148
- qr_paths.extend(item_paths)
149
-
150
- if qr_paths:
151
- return (
152
- results,
153
- [str(path) for path in qr_paths],
154
- f"βœ… Successfully processed {len(results)} items and generated {len(qr_paths)} QR code(s)!"
155
- )
156
- else:
157
- return None, [], "❌ Failed to generate QR codes. Please check the input data."
158
- else:
159
- return None, [], "⚠️ No valid content to process. Please provide some input data."
160
-
161
- except Exception as e:
162
- logger.error(f"Processing error: {e}")
163
- import traceback
164
- logger.error(traceback.format_exc()) # Print the full stack trace
165
- return None, [], f"❌ Error: {str(e)}"
166
 
167
-
168
- def _handle_google_drive(self, url: str) -> Optional[Dict]:
169
- """Process Google Drive file links"""
170
  try:
171
- file_id = re.search(r'/file/d/([a-zA-Z0-9_-]+)', url)
172
- if not file_id:
173
- logger.error(f"Invalid Google Drive URL: {url}")
174
- return None
175
-
176
- direct_url = f"https://drive.google.com/uc?export=download&id={file_id.group(1)}"
177
- response = self.session.get(direct_url, timeout=self.timeout)
178
  response.raise_for_status()
 
 
 
 
 
 
179
 
180
- return {
181
- 'content': response.text,
 
 
 
 
 
 
 
 
 
182
  'content_type': response.headers.get('Content-Type', ''),
183
- 'timestamp': datetime.now().isoformat()
 
 
184
  }
185
- except Exception as e:
186
- logger.error (f"Google Drive processing failed: {e}")
187
- return None
188
 
189
- def _handle_google_calendar(self, url: str) -> Optional[Dict]:
190
- """Process Google Calendar ICS feeds"""
191
- try:
192
- response = self.session.get(url, timeout=self.timeout)
193
- response.raise_for_status()
 
 
194
  return {
195
- 'content': response.text,
196
- 'content_type': 'text/calendar',
197
- 'timestamp': datetime.now().isoformat()
198
  }
 
 
 
 
 
 
 
 
199
  except Exception as e:
200
- logger.error(f"Calendar fetch failed: {e}")
201
  return None
202
 
203
- def _fetch_html_content(self, url: str) -> Optional[Dict]:
204
- """Standard HTML content processing"""
205
  try:
206
- response = self.session.get(url, timeout=self.timeout)
207
- response.raise_for_status()
208
-
209
- soup = BeautifulSoup(response.text, 'html.parser')
210
-
211
- # Remove unwanted elements
212
- for element in soup(['script', 'style', 'nav', 'footer', 'header', 'meta', 'link']):
213
- element.decompose()
214
-
215
- # Extract main content
216
- main_content = soup.find('main') or soup.find('article') or soup.body
217
-
218
- if main_content is None:
219
- logger.warning(f"No main content found for URL: {url}")
220
- return {
221
- 'content': '',
222
- 'content_type': response.headers.get('Content-Type', ''),
223
- 'timestamp': datetime.now().isoformat()
224
- }
225
-
226
- # Clean and structure content
227
- text_content = main_content.get_text(separator='\n', strip=True)
228
- cleaned_content = self.advanced_text_cleaning(text_content)
229
-
230
- return {
231
- 'content': cleaned_content,
232
- 'content_type': response.headers.get('Content-Type', ''),
233
- 'timestamp': datetime.now().isoformat()
234
- }
235
  except Exception as e:
236
- logger.error(f"HTML processing failed: {e}")
237
- return None
238
 
239
- class FileProcessor:
240
- """Class to handle file processing"""
241
 
242
- def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024): # 2GB default
243
  self.max_file_size = max_file_size
244
- self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml', '.zip', '.pdf', '.rtf', '.tar'}
245
-
246
- def is_text_file(self, filepath: str) -> bool:
247
- """Check if file is a text file"""
248
- try:
249
- mime_type, _ = mimetypes.guess_type(filepath)
250
- return (mime_type and mime_type.startswith('text/')) or \
251
- (os.path.splitext(filepath)[1].lower() in self.supported_text_extensions)
252
- except Exception:
253
- return False
254
 
255
  def process_file(self, file) -> List[Dict]:
256
- """Process uploaded file with enhanced error handling"""
257
  if not file:
258
  return []
259
 
@@ -265,8 +203,11 @@ class FileProcessor:
265
  return []
266
 
267
  with tempfile.TemporaryDirectory() as temp_dir:
268
- if zipfile.is_zipfile(file.name):
269
- dataset.extend(self._process_zip_file(file.name, temp_dir))
 
 
 
270
  else:
271
  dataset.extend(self._process_single_file(file))
272
 
@@ -276,261 +217,419 @@ class FileProcessor:
276
 
277
  return dataset
278
 
 
 
 
 
 
 
279
  def _process_single_file(self, file) -> List[Dict]:
280
- """Process a single file"""
281
  try:
282
  file_stat = os.stat(file.name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
- # For very large files, read in chunks and summarize
285
- if file_stat.st_size > 100 * 1024 * 1024: # 100MB
286
- logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
287
-
288
- # Read first and last 1MB for extremely large files
289
- content = ""
290
- with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
291
- content = f.read(1 * 1024 * 1024) # First 1MB
292
- content += "\n...[Content truncated due to large file size]...\n"
293
-
294
- # Seek to the last 1MB
295
- f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
296
- content += f.read() # Last 1MB
297
- else:
298
- # Regular file processing
299
- with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
300
- content = f.read()
301
 
302
  return [{
303
- 'source': 'filename',
304
  'filename': os.path.basename(file.name),
305
- 'file_size': file_stat.st_size,
306
  'mime_type': mimetypes.guess_type(file.name)[0],
307
  'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
308
  'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
309
- 'content': content,
310
  'timestamp': datetime.now().isoformat()
311
  }]
312
  except Exception as e:
313
  logger.error(f"File processing error: {e}")
314
  return []
315
 
316
- def _process_zip_file(self, zip_file_path: str, extract_to: str) -> List[Dict]:
317
- """Process a zip file and extract its contents"""
318
  dataset = []
319
  try:
320
- if not os.path.isfile(zip_file_path):
321
- logger.error(f"Zip file does not exist: {zip_file_path}")
322
- return dataset
323
-
324
- with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
325
- zip_ref.extractall(extract_to)
326
- for file_info in zip_ref.infolist():
327
- if file_info.is_dir():
328
- continue
329
- extracted_file_path = os.path.join(extract_to, file_info.filename)
330
- dataset.extend(self._process_single_file(open(extracted_file_path, 'rb')))
331
- except FileNotFoundError as e:
332
- logger.error(f"File not found: {e}")
333
  except Exception as e:
334
- logger.error(f"Error processing zip file: {e}")
 
335
  return dataset
336
 
337
- def chunk_data(self, data, max_size=2953): # 2953 is the max size for version 1 QR code
338
- """Chunk data into smaller pieces if it exceeds max_size."""
339
- json_str = json.dumps(data, ensure_ascii=False)
340
- if len(json_str) <= max_size:
341
- return [json_str]
342
-
343
- # Split into chunks
344
- chunks = []
345
- while json_str:
346
- chunk = json_str[:max_size]
347
- chunks.append(chunk)
348
- json_str = json_str[max_size:]
349
-
350
- return chunks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
 
352
- def clean_json(data: Union[str, Dict]) -> Optional[Dict]:
353
- """Clean and validate JSON data"""
 
 
 
 
 
354
  try:
355
- if isinstance(data, str):
356
- data = data.strip()
357
- data = json.loads(data)
358
-
359
- cleaned = json.loads(json.dumps(data))
360
- return cleaned
361
- except json.JSONDecodeError as e:
362
- logger.error(f"JSON cleaning error: {e}")
363
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
  except Exception as e:
365
- logger.error(f"Unexpected error while cleaning JSON: {e}")
366
- return None
367
 
368
- def generate_qr_code(data: Union[str, Dict], combined: bool = True) -> List[str]:
369
- """Generate QR code(s) from data"""
370
  try:
371
- output_dir = Path('output/qr_codes')
372
- output_dir.mkdir(parents=True, exist_ok=True)
373
-
374
  if combined:
375
- cleaned_data = clean_json(data)
376
- if cleaned_data is None:
377
- logger.error("Failed to clean data for QR code generation.")
378
- return []
379
-
380
- qr = qrcode.QRCode(
381
- version=None,
382
- error_correction=qrcode.constants.ERROR_CORRECT_L,
383
- box_size=10,
384
- border=4,
385
- )
386
- json_str = json.dumps(cleaned_data, ensure_ascii=False)
387
- qr.add_data(json_str)
388
- qr.make(fit=True)
389
-
390
- img = qr.make_image(fill_color="black", back_color="white")
391
- output_path = output_dir / f'combined_qr_{int(time.time())}.png'
392
- img.save(str(output_path))
393
- return [str(output_path)]
394
  else:
 
395
  if isinstance(data, list):
396
- paths = []
397
  for idx, item in enumerate(data):
398
- cleaned_item = clean_json(item)
399
- if cleaned_item is None:
400
- logger.error(f"Failed to clean item {idx} for QR code generation.")
401
- continue
402
-
403
- qr = qrcode.QRCode(
404
- version=None,
405
- error_correction=qrcode.constants.ERROR_CORRECT_L,
406
- box_size=10,
407
- border=4,
408
- )
409
- json_str = json.dumps(cleaned_item, ensure_ascii=False)
410
- qr.add_data(json_str)
411
- qr.make(fit=True)
412
-
413
- img = qr.make_image(fill_color="black", back_color="white")
414
- output_path = output_dir / f'item_{idx}_qr_{int(time.time())}.png'
415
- img.save(str(output_path))
416
- paths.append(str(output_path))
417
- return paths
418
  else:
419
- cleaned_item = clean_json(data)
420
- if cleaned_item is None:
421
- logger.error("Failed to clean single item for QR code generation.")
422
- return []
423
-
424
- qr = qrcode.QRCode(
425
- version=None,
426
- error_correction=qrcode.constants.ERROR_CORRECT_L,
427
- box_size=10,
428
- border=4,
429
- )
430
- json_str = json.dumps(cleaned_item, ensure_ascii=False)
431
- qr.add_data(json_str)
432
- qr.make(fit=True)
433
-
434
- img = qr.make_image(fill_color="black", back_color="white")
435
- output_path = output_dir / f'single_qr_{int(time.time())}.png'
436
- img.save(str(output_path))
437
- return [str(output_path)]
438
-
439
- return []
440
  except Exception as e:
441
- logger.error(f"QR generation error: {e}")
442
  return []
443
 
444
- def create_interface():
445
- """Create a comprehensive Gradio interface with advanced features"""
446
-
 
447
  css = """
448
- .container { max-width: 1200px; margin: auto; }
449
- .warning { background-color: #fff3cd; color: #856404; padding: 10px; border-radius: 4px; }
450
- .error { background-color: #f8d7da; color: #721c24; padding: 10px; border-radius: 4px; }
451
- .success { background-color: #d4edda; color: #155724; padding: 10px; border-radius: 4px; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  """
453
 
 
454
  with gr.Blocks(css=css, title="Advanced Data Processor & QR Generator") as interface:
455
- gr.Markdown("# 🌐 Advanced Data Processing & QR Code Generator")
 
 
 
 
456
 
457
- with gr.Tab("URL Processing"):
458
  url_input = gr.Textbox(
459
- label="Enter URLs (comma or newline separated)",
460
  lines=5,
461
  placeholder="https://example1.com\nhttps://example2.com",
462
  value=""
463
  )
464
 
465
- with gr.Tab("File Input"):
466
  file_input = gr.File(
467
- label="Upload text file or ZIP archive",
468
- file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"]
 
469
  )
470
 
471
- with gr.Tab("Notepad"):
472
  text_input = gr.TextArea(
473
- label="JSON Data Input",
474
  lines=15,
475
  placeholder="Paste your JSON data here...",
476
  value=""
477
  )
478
 
479
  with gr.Row():
480
- example_btn = gr.Button("πŸ“ Load Example JSON", variant="secondary")
481
- clear_btn = gr.Button("πŸ—‘οΈ Clear Input", variant="secondary")
482
 
483
  with gr.Row():
484
  combine_data = gr.Checkbox(
485
- label="Combine all data into single QR code",
486
  value=True,
487
- info="Generate one QR code for all data, or separate QR codes for each item"
 
 
 
 
488
  )
489
- process_btn = gr.Button("πŸ”„ Process & Generate QR", variant="primary", scale=2)
490
 
491
- output_json = gr.JSON(label="Processed JSON Data")
492
- output_gallery = gr.Gallery(label="Generated QR Codes", columns=2, height=400)
493
- output_text = gr.Textbox(label="Processing Status", interactive=False)
 
 
 
 
 
 
 
 
 
494
 
 
495
  def load_example():
496
- example_json = {
497
  "type": "product_catalog",
498
  "items": [
499
  {
500
  "id": "123",
501
- "name": "Test Product",
502
- "description": "This is a test product description",
503
- "price": 29.99,
504
  "category": "electronics",
505
- "tags": ["test", "sample", "demo"]
506
  },
507
  {
508
  "id": "456",
509
- "name": "Another Product",
510
- "description": "Another test product description",
511
- "price": 49.99,
512
- "category": "accessories",
513
- "tags": ["sample", "test"]
514
  }
515
  ],
516
  "metadata": {
517
  "timestamp": datetime.now().isoformat(),
518
- "version": "1.0",
519
  "source": "example"
520
  }
521
  }
522
- return json.dumps(example_json, indent=2)
523
 
524
  def clear_input():
525
  return ""
526
 
527
- def process_all_inputs(urls, file, text, combine):
528
- """Process all input types and generate QR codes"""
529
  try:
530
  results = []
531
- file_processor = FileProcessor() # Initialize file_processor here
532
-
533
- # Process text input first (since it's direct JSON)
 
534
  if text and text.strip():
535
  try:
536
  json_data = json.loads(text)
@@ -540,17 +639,16 @@ def create_interface():
540
  results.append(json_data)
541
  except json.JSONDecodeError as e:
542
  return None, [], f"❌ Invalid JSON format: {str(e)}"
543
-
544
- # Process URLs if provided
545
  if urls and urls.strip():
546
- processor = URLProcessor()
547
  url_list = re.split(r'[,\n]', urls)
548
  url_list = [url.strip() for url in url_list if url.strip()]
549
-
550
  for url in url_list:
551
- validation = processor.validate_url(url)
552
- if validation.get('is_valid'):
553
- content = processor.fetch_content(url)
554
  if content:
555
  results.append({
556
  'source': 'url',
@@ -558,34 +656,28 @@ def create_interface():
558
  'content': content,
559
  'timestamp': datetime.now().isoformat()
560
  })
561
-
562
- # Process files if provided
563
- if file:
564
- file_results = file_processor.process_file(file)
565
- if file_results:
566
- results.extend(file_results)
567
-
 
568
  # Generate QR codes
569
  if results:
570
- if combine:
571
- combined_data = []
572
- for item in results:
573
- combined_data.extend(file_processor.chunk_data(item))
574
- qr_paths = generate_qr_code(combined_data, combined=False)
575
- else:
576
- qr_paths = generate_qr_code(results, combined=combine)
577
-
578
  if qr_paths:
579
  return (
580
  results,
581
  [str(path) for path in qr_paths],
582
- f"βœ… Successfully processed {len(results)} items and generated {len(qr_paths)} QR code(s)!"
583
  )
584
  else:
585
- return None, [], "❌ Failed to generate QR codes. Please check the input data."
586
  else:
587
- return None, [], "⚠️ No valid content to process. Please provide some input data."
588
-
589
  except Exception as e:
590
  logger.error(f"Processing error: {e}")
591
  return None, [], f"❌ Error: {str(e)}"
@@ -594,44 +686,57 @@ def create_interface():
594
  example_btn.click(load_example, outputs=[text_input])
595
  clear_btn.click(clear_input, outputs=[text_input])
596
  process_btn.click(
597
- process_all_inputs,
598
- inputs=[url_input, file_input, text_input, combine_data],
599
  outputs=[output_json, output_gallery, output_text]
600
  )
601
 
 
602
  gr.Markdown("""
603
- ### Features
604
- - **URL Processing**: Extract content from websites
605
- - **File Processing**: Handle text files and archives
606
- - **Notepad**: Direct JSON data input/manipulation
607
- - **JSON Cleaning**: Automatic JSON validation and formatting
608
- - **QR Generation**: Generate QR codes with embedded JSON data
609
- - **Flexible Output**: Choose between combined or separate QR codes
610
 
611
- ### Usage Tips
612
- 1. Use the **Notepad** tab for direct JSON input
613
- 2. Click "Load Example JSON" to see a sample format
614
- 3. Choose whether to combine all data into a single QR code
615
- 4. The generated QR codes will contain the complete JSON data
 
 
 
 
 
 
 
 
616
  """)
617
 
618
  return interface
619
 
620
  def main():
621
- # Configure system settings
622
- mimetypes.init()
623
-
624
- # Create output directories
625
- Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
626
-
627
- # Create and launch interface
628
- interface = create_interface()
629
 
630
- # Launch with proper configuration for Hugging Face
631
- interface.launch(
632
- share=False,
633
- debug=False # Set to False for production
634
- )
 
 
 
 
 
 
 
 
635
 
636
  if __name__ == "__main__":
637
  main()
 
6
  import mimetypes
7
  import zipfile
8
  import tempfile
9
+ import chardet
10
  from datetime import datetime
11
+ from typing import List, Dict, Optional, Union, Tuple
12
  from pathlib import Path
13
+ from urllib.parse import urlparse, urljoin
14
 
15
  import requests
16
  import validators
 
20
  from fake_useragent import UserAgent
21
  from cleantext import clean
22
  import qrcode
23
+ from PIL import Image, ImageDraw, ImageFont
24
+ import numpy as np
25
 
26
+ # Setup enhanced logging with more detailed formatting
27
  logging.basicConfig(
28
  level=logging.INFO,
29
  format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s',
 
34
  )
35
  logger = logging.getLogger(__name__)
36
 
37
+ # Ensure output directories exist with modern structure
38
+ OUTPUTS_DIR = Path('output')
39
+ QR_CODES_DIR = OUTPUTS_DIR / 'qr_codes'
40
+ TEMP_DIR = OUTPUTS_DIR / 'temp'
41
+ for directory in [OUTPUTS_DIR, QR_CODES_DIR, TEMP_DIR]:
42
+ directory.mkdir(parents=True, exist_ok=True)
43
 
44
+ class EnhancedURLProcessor:
45
+ """Advanced URL processing with complete content extraction"""
46
+
47
  def __init__(self):
48
  self.session = requests.Session()
49
+ self.timeout = 15 # Extended timeout for larger content
50
+ self.max_retries = 3
51
+ self.user_agent = UserAgent()
52
+
53
+ # Enhanced headers for better site compatibility
54
  self.session.headers.update({
55
+ 'User-Agent': self.user_agent.random,
56
+ 'Accept': '*/*', # Accept all content types
57
+ 'Accept-Language': 'en-US,en;q=0.9',
58
  'Accept-Encoding': 'gzip, deflate, br',
59
  'Connection': 'keep-alive',
60
+ 'Upgrade-Insecure-Requests': '1',
61
+ 'Sec-Fetch-Dest': 'document',
62
+ 'Sec-Fetch-Mode': 'navigate',
63
+ 'Sec-Fetch-Site': 'none',
64
+ 'Sec-Fetch-User': '?1',
65
+ 'DNT': '1'
66
  })
67
 
68
  def validate_url(self, url: str) -> Dict:
69
+ """Enhanced URL validation with detailed feedback"""
70
  try:
71
  if not validators.url(url):
72
+ return {'is_valid': False, 'message': 'Invalid URL format', 'details': 'URL must begin with http:// or https://'}
73
 
74
+ parsed = urlparse(url)
75
+ if not all([parsed.scheme, parsed.netloc]):
76
+ return {'is_valid': False, 'message': 'Incomplete URL', 'details': 'Missing scheme or domain'}
77
+
78
+ # Try HEAD request first to check accessibility
79
+ try:
80
+ head_response = self.session.head(url, timeout=5)
81
+ head_response.raise_for_status()
82
+ except requests.exceptions.RequestException:
83
+ # If HEAD fails, try GET as some servers don't support HEAD
84
+ response = self.session.get(url, timeout=self.timeout)
85
+ response.raise_for_status()
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  return {
88
+ 'is_valid': True,
89
+ 'message': 'URL is valid and accessible',
90
+ 'details': {
91
+ 'content_type': head_response.headers.get('Content-Type', 'unknown'),
92
+ 'server': head_response.headers.get('Server', 'unknown'),
93
+ 'size': head_response.headers.get('Content-Length', 'unknown')
94
+ }
95
  }
96
  except Exception as e:
97
+ return {'is_valid': False, 'message': f'URL validation failed: {str(e)}', 'details': str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ def fetch_content(self, url: str, retry_count: int = 0) -> Optional[Dict]:
100
+ """Enhanced content fetcher with retry mechanism and complete character extraction"""
 
101
  try:
102
+ logger.info(f"Fetching content from URL: {url} (Attempt {retry_count + 1}/{self.max_retries})")
103
+
104
+ # Update User-Agent randomly for each request
105
+ self.session.headers.update({'User-Agent': self.user_agent.random})
106
+
107
+ response = self.session.get(url, timeout=self.timeout)
 
108
  response.raise_for_status()
109
+
110
+ # Detect encoding
111
+ if response.encoding is None:
112
+ encoding = chardet.detect(response.content)['encoding'] or 'utf-8'
113
+ else:
114
+ encoding = response.encoding
115
 
116
+ # Decode content with fallback
117
+ try:
118
+ raw_content = response.content.decode(encoding, errors='replace')
119
+ except (UnicodeDecodeError, LookupError):
120
+ raw_content = response.content.decode('utf-8', errors='replace')
121
+
122
+ # Extract metadata
123
+ metadata = {
124
+ 'url': url,
125
+ 'timestamp': datetime.now().isoformat(),
126
+ 'encoding': encoding,
127
  'content_type': response.headers.get('Content-Type', ''),
128
+ 'content_length': len(response.content),
129
+ 'headers': dict(response.headers),
130
+ 'status_code': response.status_code
131
  }
 
 
 
132
 
133
+ # Process based on content type
134
+ content_type = response.headers.get('Content-Type', '').lower()
135
+ if 'text/html' in content_type:
136
+ processed_content = self._process_html_content(raw_content, url)
137
+ else:
138
+ processed_content = raw_content
139
+
140
  return {
141
+ 'content': processed_content,
142
+ 'raw_content': raw_content,
143
+ 'metadata': metadata
144
  }
145
+
146
+ except requests.exceptions.RequestException as e:
147
+ if retry_count < self.max_retries - 1:
148
+ logger.warning(f"Retry {retry_count + 1}/{self.max_retries} for URL: {url}")
149
+ time.sleep(2 ** retry_count) # Exponential backoff
150
+ return self.fetch_content(url, retry_count + 1)
151
+ logger.error(f"Failed to fetch content after {self.max_retries} attempts: {e}")
152
+ return None
153
  except Exception as e:
154
+ logger.error(f"Unexpected error while fetching content: {e}")
155
  return None
156
 
157
+ def _process_html_content(self, content: str, base_url: str) -> str:
158
+ """Process HTML content while preserving all characters"""
159
  try:
160
+ soup = BeautifulSoup(content, 'html.parser')
161
+
162
+ # Convert relative URLs to absolute
163
+ for tag in soup.find_all(['a', 'img', 'link', 'script']):
164
+ for attr in ['href', 'src']:
165
+ if tag.get(attr):
166
+ try:
167
+ tag[attr] = urljoin(base_url, tag[attr])
168
+ except Exception:
169
+ pass
170
+
171
+ # Extract all text content
172
+ text_parts = []
173
+ for element in soup.stripped_strings:
174
+ text_parts.append(str(element))
175
+
176
+ return '\n'.join(text_parts)
 
 
 
 
 
 
 
 
 
 
 
 
177
  except Exception as e:
178
+ logger.error(f"HTML processing error: {e}")
179
+ return content
180
 
181
+ class EnhancedFileProcessor:
182
+ """Advanced file processing with complete content extraction"""
183
 
184
+ def __init__(self, max_file_size: int = 5 * 1024 * 1024 * 1024): # 5GB default
185
  self.max_file_size = max_file_size
186
+ self.supported_extensions = {
187
+ '.txt', '.md', '.csv', '.json', '.xml', '.html', '.htm',
188
+ '.log', '.yml', '.yaml', '.ini', '.conf', '.cfg',
189
+ '.zip', '.tar', '.gz', '.bz2', '.7z', '.rar',
190
+ '.pdf', '.doc', '.docx', '.rtf', '.odt'
191
+ }
 
 
 
 
192
 
193
  def process_file(self, file) -> List[Dict]:
194
+ """Process uploaded file with enhanced error handling and complete extraction"""
195
  if not file:
196
  return []
197
 
 
203
  return []
204
 
205
  with tempfile.TemporaryDirectory() as temp_dir:
206
+ temp_dir_path = Path(temp_dir)
207
+
208
+ # Handle different archive types
209
+ if self._is_archive(file.name):
210
+ dataset.extend(self._process_archive(file.name, temp_dir_path))
211
  else:
212
  dataset.extend(self._process_single_file(file))
213
 
 
217
 
218
  return dataset
219
 
220
+ def _is_archive(self, filepath: str) -> bool:
221
+ """Check if file is an archive"""
222
+ return any(filepath.lower().endswith(ext) for ext in [
223
+ '.zip', '.tar', '.gz', '.bz2', '.7z', '.rar'
224
+ ])
225
+
226
  def _process_single_file(self, file) -> List[Dict]:
227
+ """Process a single file with enhanced character extraction"""
228
  try:
229
  file_stat = os.stat(file.name)
230
+ file_size = file_stat.st_size
231
+
232
+ # Initialize content storage
233
+ content_parts = []
234
+
235
+ # Process file in chunks for large files
236
+ chunk_size = 10 * 1024 * 1024 # 10MB chunks
237
+ with open(file.name, 'rb') as f:
238
+ while True:
239
+ chunk = f.read(chunk_size)
240
+ if not chunk:
241
+ break
242
+
243
+ # Detect encoding for each chunk
244
+ encoding = chardet.detect(chunk)['encoding'] or 'utf-8'
245
+ try:
246
+ decoded_chunk = chunk.decode(encoding, errors='replace')
247
+ content_parts.append(decoded_chunk)
248
+ except (UnicodeDecodeError, LookupError):
249
+ decoded_chunk = chunk.decode('utf-8', errors='replace')
250
+ content_parts.append(decoded_chunk)
251
 
252
+ # Combine all chunks
253
+ complete_content = ''.join(content_parts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
  return [{
256
+ 'source': 'file',
257
  'filename': os.path.basename(file.name),
258
+ 'file_size': file_size,
259
  'mime_type': mimetypes.guess_type(file.name)[0],
260
  'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
261
  'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
262
+ 'content': complete_content,
263
  'timestamp': datetime.now().isoformat()
264
  }]
265
  except Exception as e:
266
  logger.error(f"File processing error: {e}")
267
  return []
268
 
269
+ def _process_archive(self, archive_path: str, extract_to: Path) -> List[Dict]:
270
+ """Process an archive file with enhanced extraction"""
271
  dataset = []
272
  try:
273
+ # Handle ZIP archives
274
+ if zipfile.is_zipfile(archive_path):
275
+ with zipfile.ZipFile(archive_path, 'r') as zip_ref:
276
+ zip_ref.extractall(extract_to)
277
+ for file_info in zip_ref.infolist():
278
+ if file_info.file_size > 0 and not file_info.filename.endswith('/'):
279
+ extracted_path = extract_to / file_info.filename
280
+ if extracted_path.suffix.lower() in self.supported_extensions:
281
+ with open(extracted_path, 'rb') as f:
282
+ dataset.extend(self._process_single_file(f))
283
+
284
+ # TODO: Add support for other archive types (tar, 7z, etc.)
285
+
286
  except Exception as e:
287
+ logger.error(f"Archive processing error: {e}")
288
+
289
  return dataset
290
 
291
+ def chunk_data(self, data: Union[Dict, List], max_size: int = 2953) -> List[Dict]:
292
+ """Enhanced data chunking with sequence metadata"""
293
+ try:
294
+ # Convert data to JSON string
295
+ json_str = json.dumps(data, ensure_ascii=False)
296
+ total_length = len(json_str)
297
+
298
+ # Calculate overhead for metadata
299
+ metadata_template = {
300
+ "chunk_index": 0,
301
+ "total_chunks": 1,
302
+ "total_length": total_length,
303
+ "chunk_hash": "",
304
+ "data": ""
305
+ }
306
+ overhead = len(json.dumps(metadata_template)) + 20 # Extra padding for safety
307
+
308
+ # Calculate effective chunk size
309
+ effective_chunk_size = max_size - overhead
310
+
311
+ if total_length <= effective_chunk_size:
312
+ # Data fits in one chunk
313
+ chunk = {
314
+ "chunk_index": 0,
315
+ "total_chunks": 1,
316
+ "total_length": total_length,
317
+ "chunk_hash": hash(json_str) & 0xFFFFFFFF, # 32-bit hash
318
+ "data": json_str
319
+ }
320
+ return [chunk]
321
+
322
+ # Calculate number of chunks needed
323
+ num_chunks = -(-total_length // effective_chunk_size) # Ceiling division
324
+ chunk_size = -(-total_length // num_chunks) # Even distribution
325
+
326
+ chunks = []
327
+ for i in range(num_chunks):
328
+ start_idx = i * chunk_size
329
+ end_idx = min(start_idx + chunk_size, total_length)
330
+ chunk_data = json_str[start_idx:end_idx]
331
+
332
+ chunk = {
333
+ "chunk_index": i,
334
+ "total_chunks": num_chunks,
335
+ "total_length": total_length,
336
+ "chunk_hash": hash(chunk_data) & 0xFFFFFFFF,
337
+ "data": chunk_data
338
+ }
339
+ chunks.append(chunk)
340
+
341
+ return chunks
342
+
343
+ except Exception as e:
344
+ logger.error(f"Error chunking data: {e}")
345
+ return []
346
 
347
+ def generate_stylish_qr(data: Union[str, Dict],
348
+ filename: str,
349
+ size: int = 10,
350
+ border: int = 4,
351
+ fill_color: str = "#000000",
352
+ back_color: str = "#FFFFFF") -> str:
353
+ """Generate a stylish QR code with enhanced visual appeal"""
354
  try:
355
+ qr = qrcode.QRCode(
356
+ version=None,
357
+ error_correction=qrcode.constants.ERROR_CORRECT_H,
358
+ box_size=size,
359
+ border=border
360
+ )
361
+
362
+ # Add data to QR code
363
+ if isinstance(data, dict):
364
+ qr.add_data(json.dumps(data, ensure_ascii=False))
365
+ else:
366
+ qr.add_data(data)
367
+
368
+ qr.make(fit=True)
369
+
370
+ # Create QR code image with custom colors
371
+ qr_image = qr.make_image(fill_color=fill_color, back_color=back_color)
372
+
373
+ # Convert to RGBA for transparency support
374
+ qr_image = qr_image.convert('RGBA')
375
+
376
+ # Add subtle gradient overlay
377
+ gradient = Image.new('RGBA', qr_image.size, (0, 0, 0, 0))
378
+ draw = ImageDraw.Draw(gradient)
379
+ for i in range(qr_image.width):
380
+ alpha = int(255 * (1 - i/qr_image.width) * 0.1) # 10% maximum opacity
381
+ draw.line([(i, 0), (i, qr_image.height)], fill=(255, 255, 255, alpha))
382
+
383
+ # Combine images
384
+ final_image = Image.alpha_composite(qr_image, gradient)
385
+
386
+ # Save the image
387
+ output_path = QR_CODES_DIR / filename
388
+ final_image.save(output_path, quality=95)
389
+
390
+ return str(output_path)
391
+
392
  except Exception as e:
393
+ logger.error(f"QR generation error: {e}")
394
+ return ""
395
 
396
+ def generate_qr_codes(data: Union[str, Dict, List], combined: bool = True) -> List[str]:
397
+ """Generate QR codes with enhanced visual appeal and metadata"""
398
  try:
399
+ file_processor = EnhancedFileProcessor()
400
+ paths = []
401
+
402
  if combined:
403
+ # Process combined data
404
+ chunks = file_processor.chunk_data(data)
405
+ for i, chunk in enumerate(chunks):
406
+ filename = f'combined_qr_{int(time.time())}_{i+1}_of_{len(chunks)}.png'
407
+ qr_path = generate_stylish_qr(
408
+ data=chunk,
409
+ filename=filename,
410
+ fill_color="#1a365d", # Deep blue
411
+ back_color="#ffffff"
412
+ )
413
+ if qr_path:
414
+ paths.append(qr_path)
 
 
 
 
 
 
 
415
  else:
416
+ # Process individual items
417
  if isinstance(data, list):
 
418
  for idx, item in enumerate(data):
419
+ chunks = file_processor.chunk_data(item)
420
+ for chunk_idx, chunk in enumerate(chunks):
421
+ filename = f'item_{idx+1}_chunk_{chunk_idx+1}_of_{len(chunks)}_{int(time.time())}.png'
422
+ qr_path = generate_stylish_qr(
423
+ data=chunk,
424
+ filename=filename,
425
+ fill_color="#1a365d", # Deep blue
426
+ back_color="#ffffff"
427
+ )
428
+ if qr_path:
429
+ paths.append(qr_path)
 
 
 
 
 
 
 
 
 
430
  else:
431
+ chunks = file_processor.chunk_data(data)
432
+ for i, chunk in enumerate(chunks):
433
+ filename = f'single_qr_{i+1}_of_{len(chunks)}_{int(time.time())}.png'
434
+ qr_path = generate_stylish_qr(
435
+ data=chunk,
436
+ filename=filename,
437
+ fill_color="#1a365d", # Deep blue
438
+ back_color="#ffffff"
439
+ )
440
+ if qr_path:
441
+ paths.append(qr_path)
442
+
443
+ return paths
 
 
 
 
 
 
 
 
444
  except Exception as e:
445
+ logger.error(f"QR code generation error: {e}")
446
  return []
447
 
448
+ def create_modern_interface():
449
+ """Create a modern and visually appealing Gradio interface"""
450
+
451
+ # Modern CSS styling
452
  css = """
453
+ /* Modern color scheme */
454
+ :root {
455
+ --primary-color: #1a365d;
456
+ --secondary-color: #2d3748;
457
+ --accent-color: #4299e1;
458
+ --background-color: #f7fafc;
459
+ --success-color: #48bb78;
460
+ --error-color: #f56565;
461
+ --warning-color: #ed8936;
462
+ }
463
+
464
+ /* Container styling */
465
+ .container {
466
+ max-width: 1200px;
467
+ margin: auto;
468
+ padding: 2rem;
469
+ background-color: var(--background-color);
470
+ border-radius: 1rem;
471
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
472
+ }
473
+
474
+ /* Component styling */
475
+ .input-container {
476
+ background-color: white;
477
+ padding: 1.5rem;
478
+ border-radius: 0.5rem;
479
+ border: 1px solid #e2e8f0;
480
+ margin-bottom: 1rem;
481
+ }
482
+
483
+ /* Button styling */
484
+ .primary-button {
485
+ background-color: var(--primary-color);
486
+ color: white;
487
+ padding: 0.75rem 1.5rem;
488
+ border-radius: 0.375rem;
489
+ border: none;
490
+ cursor: pointer;
491
+ transition: all 0.2s;
492
+ }
493
+
494
+ .primary-button:hover {
495
+ background-color: var(--accent-color);
496
+ transform: translateY(-1px);
497
+ }
498
+
499
+ /* Status messages */
500
+ .status {
501
+ padding: 1rem;
502
+ border-radius: 0.375rem;
503
+ margin: 1rem 0;
504
+ }
505
+
506
+ .status.success { background-color: #f0fff4; color: var(--success-color); }
507
+ .status.error { background-color: #fff5f5; color: var(--error-color); }
508
+ .status.warning { background-color: #fffaf0; color: var(--warning-color); }
509
+
510
+ /* Gallery styling */
511
+ .gallery {
512
+ display: grid;
513
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
514
+ gap: 1rem;
515
+ padding: 1rem;
516
+ background-color: white;
517
+ border-radius: 0.5rem;
518
+ border: 1px solid #e2e8f0;
519
+ }
520
+
521
+ .gallery img {
522
+ width: 100%;
523
+ height: auto;
524
+ border-radius: 0.375rem;
525
+ transition: transform 0.2s;
526
+ }
527
+
528
+ .gallery img:hover {
529
+ transform: scale(1.05);
530
+ }
531
  """
532
 
533
+ # Create interface with modern design
534
  with gr.Blocks(css=css, title="Advanced Data Processor & QR Generator") as interface:
535
+ gr.Markdown("""
536
+ # 🌐 Advanced Data Processing & QR Code Generator
537
+
538
+ Transform your data into beautifully designed, sequenced QR codes with our cutting-edge processor.
539
+ """)
540
 
541
+ with gr.Tab("πŸ“ URL Processing"):
542
  url_input = gr.Textbox(
543
+ label="Enter URLs (comma or newline separated)",
544
  lines=5,
545
  placeholder="https://example1.com\nhttps://example2.com",
546
  value=""
547
  )
548
 
549
+ with gr.Tab("πŸ“ File Input"):
550
  file_input = gr.File(
551
+ label="Upload Files",
552
+ file_types=["text", "zip"],
553
+ file_count="multiple"
554
  )
555
 
556
+ with gr.Tab("πŸ“‹ JSON Input"):
557
  text_input = gr.TextArea(
558
+ label="Direct JSON Input",
559
  lines=15,
560
  placeholder="Paste your JSON data here...",
561
  value=""
562
  )
563
 
564
  with gr.Row():
565
+ example_btn = gr.Button("πŸ“ Load Example", variant="secondary")
566
+ clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
567
 
568
  with gr.Row():
569
  combine_data = gr.Checkbox(
570
+ label="Combine all data into sequence",
571
  value=True,
572
+ info="Generate sequential QR codes for combined data"
573
+ )
574
+ process_btn = gr.Button(
575
+ "πŸ”„ Process & Generate QR",
576
+ variant="primary"
577
  )
 
578
 
579
+ # Output components
580
+ output_json = gr.JSON(label="Processed Data")
581
+ output_gallery = gr.Gallery(
582
+ label="Generated QR Codes",
583
+ columns=3,
584
+ height=400,
585
+ show_label=True
586
+ )
587
+ output_text = gr.Textbox(
588
+ label="Processing Status",
589
+ interactive=False
590
+ )
591
 
592
+ # Load example data
593
  def load_example():
594
+ example = {
595
  "type": "product_catalog",
596
  "items": [
597
  {
598
  "id": "123",
599
+ "name": "Premium Widget",
600
+ "description": "High-quality widget with advanced features",
601
+ "price": 299.99,
602
  "category": "electronics",
603
+ "tags": ["premium", "featured", "new"]
604
  },
605
  {
606
  "id": "456",
607
+ "name": "Basic Widget",
608
+ "description": "Reliable widget for everyday use",
609
+ "price": 149.99,
610
+ "category": "electronics",
611
+ "tags": ["basic", "popular"]
612
  }
613
  ],
614
  "metadata": {
615
  "timestamp": datetime.now().isoformat(),
616
+ "version": "2.0",
617
  "source": "example"
618
  }
619
  }
620
+ return json.dumps(example, indent=2)
621
 
622
  def clear_input():
623
  return ""
624
 
625
+ def process_inputs(urls, files, text, combine):
626
+ """Process all inputs and generate QR codes"""
627
  try:
628
  results = []
629
+ url_processor = EnhancedURLProcessor()
630
+ file_processor = EnhancedFileProcessor()
631
+
632
+ # Process JSON input
633
  if text and text.strip():
634
  try:
635
  json_data = json.loads(text)
 
639
  results.append(json_data)
640
  except json.JSONDecodeError as e:
641
  return None, [], f"❌ Invalid JSON format: {str(e)}"
642
+
643
+ # Process URLs
644
  if urls and urls.strip():
 
645
  url_list = re.split(r'[,\n]', urls)
646
  url_list = [url.strip() for url in url_list if url.strip()]
647
+
648
  for url in url_list:
649
+ validation = url_processor.validate_url(url)
650
+ if validation['is_valid']:
651
+ content = url_processor.fetch_content(url)
652
  if content:
653
  results.append({
654
  'source': 'url',
 
656
  'content': content,
657
  'timestamp': datetime.now().isoformat()
658
  })
659
+
660
+ # Process files
661
+ if files:
662
+ for file in files:
663
+ file_results = file_processor.process_file(file)
664
+ if file_results:
665
+ results.extend(file_results)
666
+
667
  # Generate QR codes
668
  if results:
669
+ qr_paths = generate_qr_codes(results, combine)
 
 
 
 
 
 
 
670
  if qr_paths:
671
  return (
672
  results,
673
  [str(path) for path in qr_paths],
674
+ f"βœ… Successfully processed {len(results)} items and generated {len(qr_paths)} QR codes!"
675
  )
676
  else:
677
+ return None, [], "❌ Failed to generate QR codes"
678
  else:
679
+ return None, [], "⚠️ No valid content to process"
680
+
681
  except Exception as e:
682
  logger.error(f"Processing error: {e}")
683
  return None, [], f"❌ Error: {str(e)}"
 
686
  example_btn.click(load_example, outputs=[text_input])
687
  clear_btn.click(clear_input, outputs=[text_input])
688
  process_btn.click(
689
+ process_inputs,
690
+ inputs=[url_input, file_input, text_input, combine_data],
691
  outputs=[output_json, output_gallery, output_text]
692
  )
693
 
694
+ # Add helpful documentation
695
  gr.Markdown("""
696
+ ### πŸš€ Features
697
+
698
+ - **Complete URL Scraping**: Extracts every character from web pages
699
+ - **Advanced File Processing**: Full content extraction from text files and archives
700
+ - **Smart JSON Handling**: Processes any size JSON with automatic chunking
701
+ - **Sequential QR Codes**: Maintains data integrity across multiple codes
702
+ - **Modern Design**: Clean, responsive interface with visual feedback
703
 
704
+ ### πŸ’‘ Tips
705
+
706
+ 1. **URLs**: Enter multiple URLs separated by commas or newlines
707
+ 2. **Files**: Upload text files or ZIP archives containing text files
708
+ 3. **JSON**: Use the example button to see the expected format
709
+ 4. **QR Codes**: Choose whether to combine data into sequential codes
710
+ 5. **Processing**: Monitor the status for real-time feedback
711
+
712
+ ### 🎨 Output
713
+
714
+ - Generated QR codes are saved in the `output/qr_codes` directory
715
+ - Each QR code contains metadata for proper sequencing
716
+ - Hover over QR codes in the gallery to see details
717
  """)
718
 
719
  return interface
720
 
721
  def main():
722
+ """Initialize and launch the application"""
723
+ try:
724
+ # Configure system settings
725
+ mimetypes.init()
 
 
 
 
726
 
727
+ # Create and launch interface
728
+ interface = create_modern_interface()
729
+
730
+ # Launch with configuration
731
+ interface.launch(
732
+ share=False,
733
+ debug=False,
734
+ show_error=True,
735
+ show_api=False
736
+ )
737
+ except Exception as e:
738
+ logger.error(f"Application startup error: {e}")
739
+ raise
740
 
741
  if __name__ == "__main__":
742
  main()