acecalisto3 commited on
Commit
71b0a3f
Β·
verified Β·
1 Parent(s): 62b5581

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -91
app.py CHANGED
@@ -11,7 +11,6 @@ from datetime import datetime
11
  from typing import List, Dict, Optional, Union, Tuple
12
  from pathlib import Path
13
  from urllib.parse import urlparse, urljoin
14
-
15
  import requests
16
  import validators
17
  import gradio as gr
@@ -22,6 +21,8 @@ from cleantext import clean
22
  import qrcode
23
  from PIL import Image, ImageDraw, ImageFont
24
  import numpy as np
 
 
25
 
26
  # Setup enhanced logging with more detailed formatting
27
  logging.basicConfig(
@@ -30,8 +31,7 @@ logging.basicConfig(
30
  handlers=[
31
  logging.StreamHandler(),
32
  logging.FileHandler('app.log', encoding='utf-8')
33
- ]
34
- )
35
  logger = logging.getLogger(__name__)
36
 
37
  # Ensure output directories exist with modern structure
@@ -43,13 +43,13 @@ for directory in [OUTPUTS_DIR, QR_CODES_DIR, TEMP_DIR]:
43
 
44
  class EnhancedURLProcessor:
45
  """Advanced URL processing with complete content extraction"""
46
-
47
  def __init__(self):
48
  self.session = requests.Session()
49
  self.timeout = 15 # Extended timeout for larger content
50
  self.max_retries = 3
51
  self.user_agent = UserAgent()
52
-
53
  # Enhanced headers for better site compatibility
54
  self.session.headers.update({
55
  'User-Agent': self.user_agent.random,
@@ -70,11 +70,9 @@ class EnhancedURLProcessor:
70
  try:
71
  if not validators.url(url):
72
  return {'is_valid': False, 'message': 'Invalid URL format', 'details': 'URL must begin with http:// or https://'}
73
-
74
  parsed = urlparse(url)
75
  if not all([parsed.scheme, parsed.netloc]):
76
  return {'is_valid': False, 'message': 'Incomplete URL', 'details': 'Missing scheme or domain'}
77
-
78
  # Try HEAD request first to check accessibility
79
  try:
80
  head_response = self.session.head(url, timeout=5)
@@ -100,19 +98,18 @@ class EnhancedURLProcessor:
100
  """Enhanced content fetcher with retry mechanism and complete character extraction"""
101
  try:
102
  logger.info(f"Fetching content from URL: {url} (Attempt {retry_count + 1}/{self.max_retries})")
103
-
104
  # Update User-Agent randomly for each request
105
  self.session.headers.update({'User-Agent': self.user_agent.random})
106
-
107
  response = self.session.get(url, timeout=self.timeout)
108
  response.raise_for_status()
109
-
110
  # Detect encoding
111
  if response.encoding is None:
112
  encoding = chardet.detect(response.content)['encoding'] or 'utf-8'
113
  else:
114
  encoding = response.encoding
115
-
116
  # Decode content with fallback
117
  try:
118
  raw_content = response.content.decode(encoding, errors='replace')
@@ -136,13 +133,11 @@ class EnhancedURLProcessor:
136
  processed_content = self._process_html_content(raw_content, url)
137
  else:
138
  processed_content = raw_content
139
-
140
  return {
141
  'content': processed_content,
142
  'raw_content': raw_content,
143
  'metadata': metadata
144
  }
145
-
146
  except requests.exceptions.RequestException as e:
147
  if retry_count < self.max_retries - 1:
148
  logger.warning(f"Retry {retry_count + 1}/{self.max_retries} for URL: {url}")
@@ -158,7 +153,7 @@ class EnhancedURLProcessor:
158
  """Process HTML content while preserving all characters"""
159
  try:
160
  soup = BeautifulSoup(content, 'html.parser')
161
-
162
  # Convert relative URLs to absolute
163
  for tag in soup.find_all(['a', 'img', 'link', 'script']):
164
  for attr in ['href', 'src']:
@@ -167,12 +162,10 @@ class EnhancedURLProcessor:
167
  tag[attr] = urljoin(base_url, tag[attr])
168
  except Exception:
169
  pass
170
-
171
  # Extract all text content
172
  text_parts = []
173
  for element in soup.stripped_strings:
174
  text_parts.append(str(element))
175
-
176
  return '\n'.join(text_parts)
177
  except Exception as e:
178
  logger.error(f"HTML processing error: {e}")
@@ -184,7 +177,7 @@ class EnhancedFileProcessor:
184
  def __init__(self, max_file_size: int = 5 * 1024 * 1024 * 1024): # 5GB default
185
  self.max_file_size = max_file_size
186
  self.supported_extensions = {
187
- '.txt', '.md', '.csv', '.json', '.xml', '.html', '.htm',
188
  '.log', '.yml', '.yaml', '.ini', '.conf', '.cfg',
189
  '.zip', '.tar', '.gz', '.bz2', '.7z', '.rar',
190
  '.pdf', '.doc', '.docx', '.rtf', '.odt'
@@ -204,17 +197,18 @@ class EnhancedFileProcessor:
204
 
205
  with tempfile.TemporaryDirectory() as temp_dir:
206
  temp_dir_path = Path(temp_dir)
207
-
208
  # Handle different archive types
209
  if self._is_archive(file.name):
210
  dataset.extend(self._process_archive(file.name, temp_dir_path))
211
- else:
212
  dataset.extend(self._process_single_file(file))
 
 
213
 
214
  except Exception as e:
215
  logger.error(f"Error processing file: {str(e)}")
216
  return []
217
-
218
  return dataset
219
 
220
  def _is_archive(self, filepath: str) -> bool:
@@ -224,14 +218,14 @@ class EnhancedFileProcessor:
224
  ])
225
 
226
  def _process_single_file(self, file) -> List[Dict]:
227
- """Process a single file with enhanced character extraction"""
228
  try:
229
  file_stat = os.stat(file.name)
230
  file_size = file_stat.st_size
231
-
232
  # Initialize content storage
233
  content_parts = []
234
-
235
  # Process file in chunks for large files
236
  chunk_size = 10 * 1024 * 1024 # 10MB chunks
237
  with open(file.name, 'rb') as f:
@@ -239,7 +233,7 @@ class EnhancedFileProcessor:
239
  chunk = f.read(chunk_size)
240
  if not chunk:
241
  break
242
-
243
  # Detect encoding for each chunk
244
  encoding = chardet.detect(chunk)['encoding'] or 'utf-8'
245
  try:
@@ -252,6 +246,43 @@ class EnhancedFileProcessor:
252
  # Combine all chunks
253
  complete_content = ''.join(content_parts)
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  return [{
256
  'source': 'file',
257
  'filename': os.path.basename(file.name),
@@ -280,22 +311,46 @@ class EnhancedFileProcessor:
280
  if extracted_path.suffix.lower() in self.supported_extensions:
281
  with open(extracted_path, 'rb') as f:
282
  dataset.extend(self._process_single_file(f))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
- # TODO: Add support for other archive types (tar, 7z, etc.)
285
-
286
  except Exception as e:
287
  logger.error(f"Archive processing error: {e}")
288
-
289
  return dataset
290
 
291
  def chunk_data(self, data: Union[Dict, List], max_size: int = 2953) -> List[Dict]:
 
292
  try:
293
- # Convert data to JSON bytes
294
  json_str = json.dumps(data, ensure_ascii=False)
295
- json_bytes = json_str.encode('utf-8')
296
- total_length = len(json_bytes)
297
-
298
- # Calculate metadata overhead in bytes
299
  metadata_template = {
300
  "chunk_index": 0,
301
  "total_chunks": 1,
@@ -303,48 +358,32 @@ class EnhancedFileProcessor:
303
  "chunk_hash": "",
304
  "data": ""
305
  }
306
- overhead_bytes = len(json.dumps(metadata_template).encode('utf-8')) + 20 # Add padding
307
-
308
- effective_chunk_size = max_size - overhead_bytes
309
-
310
- if effective_chunk_size <= 0:
311
- raise ValueError("Max size is too small after accounting for metadata overhead")
312
-
313
- chunks = []
314
- start = 0
315
- while start < total_length:
316
- end = start + effective_chunk_size
317
- # Ensure valid Unicode by decoding
318
- chunk_str = json_bytes[start:end].decode('utf-8', errors='replace')
319
  chunk = {
320
- "chunk_index": len(chunks),
321
- "total_chunks": -1, # To be set later
322
  "total_length": total_length,
323
- "chunk_hash": hash(chunk_str) & 0xFFFFFFFF,
324
- "data": chunk_str
325
  }
326
- chunks.append(chunk)
327
- start = end
328
-
329
- # Update total_chunks in each chunk
330
- for i, chunk in enumerate(chunks):
331
- chunk["total_chunks"] = len(chunks)
332
-
333
- return chunks
334
- except Exception as e:
335
- logger.error(f"Error chunking data: {e}")
336
- return []
337
-
338
  # Calculate number of chunks needed
339
  num_chunks = -(-total_length // effective_chunk_size) # Ceiling division
340
  chunk_size = -(-total_length // num_chunks) # Even distribution
341
-
342
  chunks = []
343
  for i in range(num_chunks):
344
  start_idx = i * chunk_size
345
  end_idx = min(start_idx + chunk_size, total_length)
346
  chunk_data = json_str[start_idx:end_idx]
347
-
348
  chunk = {
349
  "chunk_index": i,
350
  "total_chunks": num_chunks,
@@ -353,58 +392,56 @@ class EnhancedFileProcessor:
353
  "data": chunk_data
354
  }
355
  chunks.append(chunk)
356
-
357
  return chunks
358
-
359
  except Exception as e:
360
  logger.error(f"Error chunking data: {e}")
361
  return []
362
 
363
- def generate_stylish_qr(data: Union[str, Dict],
364
- filename: str,
365
- size: int = 10,
366
- border: int = 4,
367
- fill_color: str = "#000000",
368
- back_color: str = "#FFFFFF") -> str:
369
  """Generate a stylish QR code with enhanced visual appeal"""
370
  try:
371
  qr = qrcode.QRCode(
372
  version=None,
373
- error_correction=qrcode.constants.ERROR_CORRECT_M,
374
  box_size=size,
375
  border=border
376
  )
377
-
378
  # Add data to QR code
379
  if isinstance(data, dict):
380
  qr.add_data(json.dumps(data, ensure_ascii=False))
381
  else:
382
  qr.add_data(data)
383
-
384
  qr.make(fit=True)
385
-
386
  # Create QR code image with custom colors
387
  qr_image = qr.make_image(fill_color=fill_color, back_color=back_color)
388
-
389
  # Convert to RGBA for transparency support
390
  qr_image = qr_image.convert('RGBA')
391
-
392
  # Add subtle gradient overlay
393
  gradient = Image.new('RGBA', qr_image.size, (0, 0, 0, 0))
394
  draw = ImageDraw.Draw(gradient)
395
  for i in range(qr_image.width):
396
  alpha = int(255 * (1 - i/qr_image.width) * 0.1) # 10% maximum opacity
397
  draw.line([(i, 0), (i, qr_image.height)], fill=(255, 255, 255, alpha))
398
-
399
  # Combine images
400
  final_image = Image.alpha_composite(qr_image, gradient)
401
-
402
  # Save the image
403
  output_path = QR_CODES_DIR / filename
404
  final_image.save(output_path, quality=95)
405
-
406
  return str(output_path)
407
-
408
  except Exception as e:
409
  logger.error(f"QR generation error: {e}")
410
  return ""
@@ -414,7 +451,7 @@ def generate_qr_codes(data: Union[str, Dict, List], combined: bool = True) -> Li
414
  try:
415
  file_processor = EnhancedFileProcessor()
416
  paths = []
417
-
418
  if combined:
419
  # Process combined data
420
  chunks = file_processor.chunk_data(data)
@@ -455,8 +492,7 @@ def generate_qr_codes(data: Union[str, Dict, List], combined: bool = True) -> Li
455
  )
456
  if qr_path:
457
  paths.append(qr_path)
458
-
459
- return paths
460
  except Exception as e:
461
  logger.error(f"QR code generation error: {e}")
462
  return []
@@ -553,9 +589,7 @@ def create_modern_interface():
553
  with gr.Tab("πŸ“ File Input"):
554
  file_input = gr.File(
555
  label="Upload Files",
556
- file_types=["text", ".zip", ".json",
557
- ".tar", ".gz", ".bz2", ".7z", ".rar",
558
- ".pdf", ".doc", ".docx", ".rtf", ".odt"],
559
  file_count="multiple"
560
  )
561
  with gr.Tab("πŸ“‹ JSON Input"):
@@ -695,15 +729,15 @@ def create_modern_interface():
695
  gr.Markdown("""
696
  ### πŸš€ Features
697
  - **Complete URL Scraping**: Extracts every character from web pages
698
- - **Advanced File Processing**: Full content extraction from various text-based files and common archives
699
- - **Smart JSON Handling**: Processes any size JSON with automatic chunking
700
  - **Sequential QR Codes**: Maintains data integrity across multiple codes
701
  - **Modern Design**: Clean, responsive interface with visual feedback
702
 
703
  ### πŸ’‘ Tips
704
  1. **URLs**: Enter multiple URLs separated by commas or newlines
705
- 2. **Files**: Upload various text-based files (e.g., .txt, .md, .csv), JSON files (.json), or common archives (.zip, .tar, .gz)
706
- 3. **JSON**: Use the example button to see the expected format or upload a .json file
707
  4. **QR Codes**: Choose whether to combine data into sequential codes
708
  5. **Processing**: Monitor the status for real-time feedback
709
 
 
11
  from typing import List, Dict, Optional, Union, Tuple
12
  from pathlib import Path
13
  from urllib.parse import urlparse, urljoin
 
14
  import requests
15
  import validators
16
  import gradio as gr
 
21
  import qrcode
22
  from PIL import Image, ImageDraw, ImageFont
23
  import numpy as np
24
+ import tarfile
25
+ import gzip
26
 
27
  # Setup enhanced logging with more detailed formatting
28
  logging.basicConfig(
 
31
  handlers=[
32
  logging.StreamHandler(),
33
  logging.FileHandler('app.log', encoding='utf-8')
34
+ ])
 
35
  logger = logging.getLogger(__name__)
36
 
37
  # Ensure output directories exist with modern structure
 
43
 
44
  class EnhancedURLProcessor:
45
  """Advanced URL processing with complete content extraction"""
46
+
47
  def __init__(self):
48
  self.session = requests.Session()
49
  self.timeout = 15 # Extended timeout for larger content
50
  self.max_retries = 3
51
  self.user_agent = UserAgent()
52
+
53
  # Enhanced headers for better site compatibility
54
  self.session.headers.update({
55
  'User-Agent': self.user_agent.random,
 
70
  try:
71
  if not validators.url(url):
72
  return {'is_valid': False, 'message': 'Invalid URL format', 'details': 'URL must begin with http:// or https://'}
 
73
  parsed = urlparse(url)
74
  if not all([parsed.scheme, parsed.netloc]):
75
  return {'is_valid': False, 'message': 'Incomplete URL', 'details': 'Missing scheme or domain'}
 
76
  # Try HEAD request first to check accessibility
77
  try:
78
  head_response = self.session.head(url, timeout=5)
 
98
  """Enhanced content fetcher with retry mechanism and complete character extraction"""
99
  try:
100
  logger.info(f"Fetching content from URL: {url} (Attempt {retry_count + 1}/{self.max_retries})")
101
+
102
  # Update User-Agent randomly for each request
103
  self.session.headers.update({'User-Agent': self.user_agent.random})
104
+
105
  response = self.session.get(url, timeout=self.timeout)
106
  response.raise_for_status()
107
+
108
  # Detect encoding
109
  if response.encoding is None:
110
  encoding = chardet.detect(response.content)['encoding'] or 'utf-8'
111
  else:
112
  encoding = response.encoding
 
113
  # Decode content with fallback
114
  try:
115
  raw_content = response.content.decode(encoding, errors='replace')
 
133
  processed_content = self._process_html_content(raw_content, url)
134
  else:
135
  processed_content = raw_content
 
136
  return {
137
  'content': processed_content,
138
  'raw_content': raw_content,
139
  'metadata': metadata
140
  }
 
141
  except requests.exceptions.RequestException as e:
142
  if retry_count < self.max_retries - 1:
143
  logger.warning(f"Retry {retry_count + 1}/{self.max_retries} for URL: {url}")
 
153
  """Process HTML content while preserving all characters"""
154
  try:
155
  soup = BeautifulSoup(content, 'html.parser')
156
+
157
  # Convert relative URLs to absolute
158
  for tag in soup.find_all(['a', 'img', 'link', 'script']):
159
  for attr in ['href', 'src']:
 
162
  tag[attr] = urljoin(base_url, tag[attr])
163
  except Exception:
164
  pass
 
165
  # Extract all text content
166
  text_parts = []
167
  for element in soup.stripped_strings:
168
  text_parts.append(str(element))
 
169
  return '\n'.join(text_parts)
170
  except Exception as e:
171
  logger.error(f"HTML processing error: {e}")
 
177
  def __init__(self, max_file_size: int = 5 * 1024 * 1024 * 1024): # 5GB default
178
  self.max_file_size = max_file_size
179
  self.supported_extensions = {
180
+ '.txt', '.md', '.csv', '.json', '.xml', '.html', '.htm',
181
  '.log', '.yml', '.yaml', '.ini', '.conf', '.cfg',
182
  '.zip', '.tar', '.gz', '.bz2', '.7z', '.rar',
183
  '.pdf', '.doc', '.docx', '.rtf', '.odt'
 
197
 
198
  with tempfile.TemporaryDirectory() as temp_dir:
199
  temp_dir_path = Path(temp_dir)
200
+
201
  # Handle different archive types
202
  if self._is_archive(file.name):
203
  dataset.extend(self._process_archive(file.name, temp_dir_path))
204
+ elif Path(file.name).suffix.lower() in self.supported_extensions:
205
  dataset.extend(self._process_single_file(file))
206
+ else:
207
+ logger.warning(f"Unsupported file type: {file.name}")
208
 
209
  except Exception as e:
210
  logger.error(f"Error processing file: {str(e)}")
211
  return []
 
212
  return dataset
213
 
214
  def _is_archive(self, filepath: str) -> bool:
 
218
  ])
219
 
220
  def _process_single_file(self, file) -> List[Dict]:
221
+ """Process a single file with enhanced character extraction and JSON handling"""
222
  try:
223
  file_stat = os.stat(file.name)
224
  file_size = file_stat.st_size
225
+
226
  # Initialize content storage
227
  content_parts = []
228
+
229
  # Process file in chunks for large files
230
  chunk_size = 10 * 1024 * 1024 # 10MB chunks
231
  with open(file.name, 'rb') as f:
 
233
  chunk = f.read(chunk_size)
234
  if not chunk:
235
  break
236
+
237
  # Detect encoding for each chunk
238
  encoding = chardet.detect(chunk)['encoding'] or 'utf-8'
239
  try:
 
246
  # Combine all chunks
247
  complete_content = ''.join(content_parts)
248
 
249
+ # Check if the content is valid JSON regardless of file extension
250
+ try:
251
+ if mimetypes.guess_type(file.name)[0] == 'application/json' or file.name.lower().endswith('.json'):
252
+ # It's a JSON file by type or extension
253
+ json_data = json.loads(complete_content)
254
+ return [{
255
+ 'source': 'json_file',
256
+ 'filename': os.path.basename(file.name),
257
+ 'file_size': file_size,
258
+ 'mime_type': 'application/json',
259
+ 'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
260
+ 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
261
+ 'content': json_data, # Store the parsed JSON object
262
+ 'raw_content': complete_content, # Store the original JSON string
263
+ 'timestamp': datetime.now().isoformat()
264
+ }]
265
+ else:
266
+ # Try to parse as JSON anyway
267
+ try:
268
+ json_data = json.loads(complete_content)
269
+ # If we get here, it's valid JSON despite the extension
270
+ return [{
271
+ 'source': 'json_content',
272
+ 'filename': os.path.basename(file.name),
273
+ 'file_size': file_size,
274
+ 'mime_type': 'application/json',
275
+ 'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
276
+ 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
277
+ 'content': json_data, # Store the parsed JSON object
278
+ 'raw_content': complete_content, # Store the original JSON string
279
+ 'timestamp': datetime.now().isoformat()
280
+ }]
281
+ except json.JSONDecodeError:
282
+ logger.warning(f"File {file.name} is not valid JSON.")
283
+ except Exception as e:
284
+ logger.error(f"Error during JSON processing: {e}")
285
+
286
  return [{
287
  'source': 'file',
288
  'filename': os.path.basename(file.name),
 
311
  if extracted_path.suffix.lower() in self.supported_extensions:
312
  with open(extracted_path, 'rb') as f:
313
  dataset.extend(self._process_single_file(f))
314
+ # Handle TAR archives
315
+ elif archive_path.lower().endswith(('.tar', '.tar.gz', '.tgz')):
316
+ try:
317
+ with tarfile.open(archive_path, 'r:*') as tar_ref:
318
+ for member in tar_ref.getmembers():
319
+ if member.isfile():
320
+ extracted_path = extract_to / member.name
321
+ tar_ref.extract(member, path=extract_to)
322
+ if extracted_path.suffix.lower() in self.supported_extensions:
323
+ with open(extracted_path, 'rb') as f:
324
+ dataset.extend(self._process_single_file(f))
325
+ except tarfile.TarError as e:
326
+ logger.error(f"Error processing TAR archive: {e}")
327
+ # Handle GZIP archives (single file)
328
+ elif archive_path.lower().endswith('.gz'):
329
+ extracted_path = extract_to / Path(archive_path).stem
330
+ try:
331
+ with gzip.open(archive_path, 'rb') as gz_file, open(extracted_path, 'wb') as outfile:
332
+ outfile.write(gz_file.read())
333
+ if extracted_path.suffix.lower() in self.supported_extensions:
334
+ with open(extracted_path, 'rb') as f:
335
+ dataset.extend(self._process_single_file(f))
336
+ except gzip.GzipFile as e:
337
+ logger.error(f"Error processing GZIP archive: {e}")
338
+ # TODO: Add support for other archive types (.bz2, .7z, .rar) - may require external libraries
339
+ elif archive_path.lower().endswith(('.bz2', '.7z', '.rar')):
340
+ logger.warning(f"Support for {Path(archive_path).suffix} archives is not yet fully implemented.")
341
 
 
 
342
  except Exception as e:
343
  logger.error(f"Archive processing error: {e}")
 
344
  return dataset
345
 
346
  def chunk_data(self, data: Union[Dict, List], max_size: int = 2953) -> List[Dict]:
347
+ """Enhanced data chunking with sequence metadata"""
348
  try:
349
+ # Convert data to JSON string
350
  json_str = json.dumps(data, ensure_ascii=False)
351
+ total_length = len(json_str)
352
+
353
+ # Calculate overhead for metadata
 
354
  metadata_template = {
355
  "chunk_index": 0,
356
  "total_chunks": 1,
 
358
  "chunk_hash": "",
359
  "data": ""
360
  }
361
+ overhead = len(json.dumps(metadata_template)) + 20 # Extra padding for safety
362
+
363
+ # Calculate effective chunk size
364
+ effective_chunk_size = max_size - overhead
365
+
366
+ if total_length <= effective_chunk_size:
367
+ # Data fits in one chunk
 
 
 
 
 
 
368
  chunk = {
369
+ "chunk_index": 0,
370
+ "total_chunks": 1,
371
  "total_length": total_length,
372
+ "chunk_hash": hash(json_str) & 0xFFFFFFFF, # 32-bit hash
373
+ "data": json_str
374
  }
375
+ return [chunk]
376
+
 
 
 
 
 
 
 
 
 
 
377
  # Calculate number of chunks needed
378
  num_chunks = -(-total_length // effective_chunk_size) # Ceiling division
379
  chunk_size = -(-total_length // num_chunks) # Even distribution
380
+
381
  chunks = []
382
  for i in range(num_chunks):
383
  start_idx = i * chunk_size
384
  end_idx = min(start_idx + chunk_size, total_length)
385
  chunk_data = json_str[start_idx:end_idx]
386
+
387
  chunk = {
388
  "chunk_index": i,
389
  "total_chunks": num_chunks,
 
392
  "data": chunk_data
393
  }
394
  chunks.append(chunk)
395
+
396
  return chunks
 
397
  except Exception as e:
398
  logger.error(f"Error chunking data: {e}")
399
  return []
400
 
401
+ def generate_stylish_qr(data: Union[str, Dict],
402
+ filename: str,
403
+ size: int = 10,
404
+ border: int = 4,
405
+ fill_color: str = "#000000",
406
+ back_color: str = "#FFFFFF") -> str:
407
  """Generate a stylish QR code with enhanced visual appeal"""
408
  try:
409
  qr = qrcode.QRCode(
410
  version=None,
411
+ error_correction=qrcode.constants.ERROR_CORRECT_H,
412
  box_size=size,
413
  border=border
414
  )
415
+
416
  # Add data to QR code
417
  if isinstance(data, dict):
418
  qr.add_data(json.dumps(data, ensure_ascii=False))
419
  else:
420
  qr.add_data(data)
421
+
422
  qr.make(fit=True)
423
+
424
  # Create QR code image with custom colors
425
  qr_image = qr.make_image(fill_color=fill_color, back_color=back_color)
426
+
427
  # Convert to RGBA for transparency support
428
  qr_image = qr_image.convert('RGBA')
429
+
430
  # Add subtle gradient overlay
431
  gradient = Image.new('RGBA', qr_image.size, (0, 0, 0, 0))
432
  draw = ImageDraw.Draw(gradient)
433
  for i in range(qr_image.width):
434
  alpha = int(255 * (1 - i/qr_image.width) * 0.1) # 10% maximum opacity
435
  draw.line([(i, 0), (i, qr_image.height)], fill=(255, 255, 255, alpha))
436
+
437
  # Combine images
438
  final_image = Image.alpha_composite(qr_image, gradient)
439
+
440
  # Save the image
441
  output_path = QR_CODES_DIR / filename
442
  final_image.save(output_path, quality=95)
443
+
444
  return str(output_path)
 
445
  except Exception as e:
446
  logger.error(f"QR generation error: {e}")
447
  return ""
 
451
  try:
452
  file_processor = EnhancedFileProcessor()
453
  paths = []
454
+
455
  if combined:
456
  # Process combined data
457
  chunks = file_processor.chunk_data(data)
 
492
  )
493
  if qr_path:
494
  paths.append(qr_path)
495
+ return paths
 
496
  except Exception as e:
497
  logger.error(f"QR code generation error: {e}")
498
  return []
 
589
  with gr.Tab("πŸ“ File Input"):
590
  file_input = gr.File(
591
  label="Upload Files",
592
+ file_types=["*"], # Accept all file types
 
 
593
  file_count="multiple"
594
  )
595
  with gr.Tab("πŸ“‹ JSON Input"):
 
729
  gr.Markdown("""
730
  ### πŸš€ Features
731
  - **Complete URL Scraping**: Extracts every character from web pages
732
+ - **Advanced File Processing**: Full content extraction from various text-based files and common archives. Supports flexible JSON handling.
733
+ - **Smart JSON Handling**: Processes any size JSON with automatic chunking, either via direct input or file upload.
734
  - **Sequential QR Codes**: Maintains data integrity across multiple codes
735
  - **Modern Design**: Clean, responsive interface with visual feedback
736
 
737
  ### πŸ’‘ Tips
738
  1. **URLs**: Enter multiple URLs separated by commas or newlines
739
+ 2. **Files**: Upload any type of file. The processor will attempt to handle supported text-based files, archives (.zip, .tar, .gz), and JSON files.
740
+ 3. **JSON**: Use the example button to see the expected format or upload a .json file. The system will also try to detect JSON content in other file types.
741
  4. **QR Codes**: Choose whether to combine data into sequential codes
742
  5. **Processing**: Monitor the status for real-time feedback
743