acecalisto3 commited on
Commit
484ed5e
·
verified ·
1 Parent(s): 0b50840

Update app2.py

Browse files
Files changed (1) hide show
  1. app2.py +50 -146
app2.py CHANGED
@@ -18,6 +18,7 @@ import gradio as gr
18
  from bs4 import BeautifulSoup
19
  from fake_useragent import UserAgent
20
  from cleantext import clean
 
21
 
22
  # Setup logging
23
  logging.basicConfig(
@@ -33,22 +34,10 @@ logger = logging.getLogger(__name__)
33
  # Ensure output directories exist
34
  Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
35
 
36
- # At the top of the file, remove these imports:
37
- # from config import Config
38
- # from proxy_handler import ProxyHandler
39
- # from robots_handler import RobotsHandler
40
-
41
  class URLProcessor:
42
  def __init__(self):
43
  self.session = requests.Session()
44
- self.timeout = 10
45
- self.max_retries = 3
46
- self.request_delay = 1.0
47
- self.respect_robots = True
48
- self.use_proxy = False
49
- self.proxy_url = None
50
-
51
- # Update session headers
52
  self.session.headers.update({
53
  'User-Agent': UserAgent().random,
54
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
@@ -57,55 +46,6 @@ class URLProcessor:
57
  'Connection': 'keep-alive',
58
  'Upgrade-Insecure-Requests': '1'
59
  })
60
-
61
- if self.use_proxy and self.proxy_url:
62
- self.session.proxies = {
63
- 'http': self.proxy_url,
64
- 'https': self.proxy_url
65
- }
66
-
67
- def check_robots_txt(self, url: str) -> bool:
68
- """Check if URL is allowed by robots.txt"""
69
- if not self.respect_robots:
70
- return True
71
-
72
- try:
73
- from urllib.parse import urlparse
74
- from urllib.robotparser import RobotFileParser
75
-
76
- parsed_url = urlparse(url)
77
- robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt"
78
-
79
- rp = RobotFileParser()
80
- rp.set_url(robots_url)
81
- rp.read()
82
-
83
- return rp.can_fetch(self.session.headers['User-Agent'], url)
84
- except Exception as e:
85
- logger.warning(f"Error checking robots.txt: {e}")
86
- return True
87
-
88
- def fetch_content(self, url: str) -> Optional[Dict]:
89
- """Fetch content with built-in rate limiting and robots.txt checking"""
90
- if not self.check_robots_txt(url):
91
- logger.warning(f"URL {url} is disallowed by robots.txt")
92
- return None
93
-
94
- time.sleep(self.request_delay) # Basic rate limiting
95
-
96
- for attempt in range(self.max_retries):
97
- try:
98
- if 'drive.google.com' in url:
99
- return self._handle_google_drive(url)
100
- if 'calendar.google.com' in url:
101
- return self._handle_google_calendar(url)
102
- return self._fetch_html_content(url)
103
- except Exception as e:
104
- logger.error(f"Attempt {attempt + 1} failed: {e}")
105
- if attempt < self.max_retries - 1:
106
- time.sleep(self.request_delay * (attempt + 1))
107
-
108
- return None
109
 
110
  def advanced_text_cleaning(self, text: str) -> str:
111
  """Robust text cleaning with version compatibility"""
@@ -127,9 +67,9 @@ class URLProcessor:
127
  return cleaned_text
128
  except Exception as e:
129
  logger.warning(f"Text cleaning error: {e}. Using fallback method.")
130
- text = re.sub(r'[\x00 -\x1F\x7F-\x9F]', '', text) # Remove control characters
131
- text = text.encode('ascii', 'ignore').decode('ascii') # Remove non-ASCII characters
132
- text = re.sub(r'\s+', ' ', text) # Normalize whitespace
133
  return text.strip()
134
 
135
  def validate_url(self, url: str) -> Dict:
@@ -226,7 +166,7 @@ class URLProcessor:
226
  class FileProcessor:
227
  """Class to handle file processing"""
228
 
229
- def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024): # 2GB default
230
  self.max_file_size = max_file_size
231
  self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
232
 
@@ -235,7 +175,7 @@ class FileProcessor:
235
  try:
236
  mime_type, _ = mimetypes.guess_type(filepath)
237
  return (mime_type and mime_type.startswith('text/')) or \
238
- (os.path.splitext(filepath)[1].lower() in self.supported_text_extensions)
239
  except Exception:
240
  return False
241
 
@@ -280,7 +220,7 @@ class FileProcessor:
280
  "source": "file",
281
  "filename": filename,
282
  "content": content,
283
- "timestamp": datetime.now ().isoformat()
284
  })
285
  except Exception as e:
286
  logger.error(f"Error reading file {filename}: {str(e)}")
@@ -319,6 +259,7 @@ class FileProcessor:
319
  logger.error(f"File processing error: {e}")
320
  return []
321
 
 
322
  def clean_json(data: Union[str, Dict]) -> Optional[Dict]:
323
  """Clean and validate JSON data"""
324
  try:
@@ -335,6 +276,7 @@ class FileProcessor:
335
  logger.error(f"Unexpected error while cleaning JSON: {e}")
336
  return None
337
 
 
338
  def generate_qr_code(data: Union[str, Dict], combined: bool = True) -> List[str]:
339
  """Generate QR code(s) from data"""
340
  try:
@@ -342,7 +284,7 @@ class FileProcessor:
342
  output_dir.mkdir(parents=True, exist_ok=True)
343
 
344
  if combined:
345
- cleaned_data = clean_json(data)
346
  if cleaned_data:
347
  qr = qrcode.QRCode(
348
  version=None,
@@ -359,10 +301,10 @@ class FileProcessor:
359
  img.save(str(output_path))
360
  return [str(output_path)]
361
  else:
 
362
  if isinstance(data, list):
363
- paths = []
364
  for idx, item in enumerate(data):
365
- cleaned_item = clean_json(item)
366
  if cleaned_item:
367
  qr = qrcode.QRCode(
368
  version=None,
@@ -378,9 +320,8 @@ class FileProcessor:
378
  output_path = output_dir / f'item_{idx}_qr_{int(time.time())}.png'
379
  img.save(str(output_path))
380
  paths.append(str(output_path))
381
- return paths
382
  else:
383
- cleaned_item = clean_json(data)
384
  if cleaned_item:
385
  qr = qrcode.QRCode(
386
  version=None,
@@ -395,71 +336,32 @@ class FileProcessor:
395
  img = qr.make_image(fill_color="black", back_color="white")
396
  output_path = output_dir / f'single_qr_{int(time.time())}.png'
397
  img.save(str(output_path))
398
- return [str(output_path)]
399
-
400
- return []
401
  except Exception as e:
402
  logger.error(f"QR generation error: {e}")
403
  return []
404
 
405
- def decode_qr_code(image_path: str) -> Optional[str]:
406
- """Decode QR code from an image file"""
407
- try:
408
- # Open and convert image to grayscale for better QR detection
409
- img = Image.open(image_path).convert('L')
410
- decoded_objects = decode(img)
411
-
412
- if decoded_objects:
413
- return decoded_objects[0].data.decode('utf-8')
414
- logger.warning("No QR code found in image")
415
- return None
416
- except Exception as e:
417
- logger.error(f"QR decoding error: {e}")
418
- return None
419
-
420
- # Replace the existing decode_qr function with this one
421
- def decode_qr(image) -> List[str]:
422
- """Decode all QR codes found in an image"""
423
- try:
424
- # Convert to PIL Image if needed
425
- if not isinstance(image, Image.Image):
426
- image = Image.fromarray(image)
427
-
428
- # Convert to grayscale for better detection
429
- image = image.convert('L')
430
-
431
- # Decode QR codes
432
- decoded_objects = decode(image)
433
- results = []
434
-
435
- for obj in decoded_objects:
436
- try:
437
- decoded_text = obj.data.decode('utf-8')
438
- results.append(decoded_text)
439
- except UnicodeDecodeError:
440
- logger.warning("Failed to decode QR code data as UTF-8")
441
- continue
442
-
443
- return results
444
- except Exception as e:
445
- logger.error(f"QR decoding error: {e}")
446
- return []
447
-
448
- raise ValueError("Unable to decode QR code")
449
- except Exception as e:
450
- logger.error(f"QR decoding error: {e}")
451
- return None, None # Return None for both data and resolution in case of error
452
 
453
  def datachat_trained(data_input: str, query: str) -> str:
454
  """Handle trained data interaction logic"""
455
- data = clean_json(data_input)
456
  if not data:
457
  return "Invalid JSON data provided."
458
  return f"[Trained Mode]\nData: {json.dumps(data, indent=2)}\nQuery: {query}"
459
 
460
  def datachat_simple(data_input: str, query: str) -> str:
461
  """Handle simple chat interaction logic"""
462
- data = clean_json(data_input)
463
  if not data:
464
  return "Invalid JSON data provided."
465
  return f"[Chat Mode]\nData: {json.dumps(data, indent=2)}\nQuestion: {query}"
@@ -467,15 +369,14 @@ def datachat_simple(data_input: str, query: str) -> str:
467
  def datachat_interface(mode: str, data_source: str, json_input: str, qr_image: str, query: str) -> str:
468
  """Interface for DataChat functionality"""
469
  data = None
470
- resolution = None # Initialize resolution variable
471
  if data_source == "JSON Input":
472
  data = json_input
473
  elif data_source == "QR Code":
474
- try:
475
- decoded_data, resolution = decode_qr_code(qr_image) # Get both data and resolution
476
- data = decoded_data
477
- except Exception as e:
478
- return f"Invalid QR code data provided: {e}"
479
  else:
480
  return "No valid data source selected."
481
 
@@ -504,34 +405,37 @@ def create_interface():
504
  json_input = gr.Textbox(lines=8, label="JSON Data")
505
  qr_image = gr.Image(label="QR Code Image", type="filepath")
506
  query = gr.Textbox(label="Query")
507
-
508
  submit_btn = gr.Button("Submit")
509
  output = gr.Textbox(label="Response")
510
-
511
  submit_btn.click(datachat_interface, [mode, data_source, json_input, qr_image, query], output)
512
 
513
  with gr.Tab("QR Generator"):
514
  qr_input = gr.Textbox(lines=8, label="Input JSON for QR")
515
  generate_btn = gr.Button("Generate QR")
516
  qr_output = gr.Image(label="Generated QR Code")
517
-
518
- def generate_qr(json_data):
519
- data = clean_json(json_data)
520
- if data:
521
- return generate_qr_code(data)
522
- return None
523
-
524
- generate_btn.click(generate_qr, qr_input, qr_output)
 
 
 
 
 
 
 
525
 
526
  return interface
527
 
528
  def main():
529
  mimetypes.init()
530
  Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
531
-
532
- iface = gr.Interface(fn=decode_qr, inputs="image", outputs="text")
533
- iface.launch()
534
-
535
 
536
  if __name__ == "__main__":
537
- main()
 
18
  from bs4 import BeautifulSoup
19
  from fake_useragent import UserAgent
20
  from cleantext import clean
21
+ import qrcode # Added missing import
22
 
23
  # Setup logging
24
  logging.basicConfig(
 
34
  # Ensure output directories exist
35
  Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
36
 
 
 
 
 
 
37
  class URLProcessor:
38
  def __init__(self):
39
  self.session = requests.Session()
40
+ self.timeout = 10 # seconds
 
 
 
 
 
 
 
41
  self.session.headers.update({
42
  'User-Agent': UserAgent().random,
43
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
 
46
  'Connection': 'keep-alive',
47
  'Upgrade-Insecure-Requests': '1'
48
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  def advanced_text_cleaning(self, text: str) -> str:
51
  """Robust text cleaning with version compatibility"""
 
67
  return cleaned_text
68
  except Exception as e:
69
  logger.warning(f"Text cleaning error: {e}. Using fallback method.")
70
+ text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text) # Fixed regex
71
+ text = text.encode('ascii', 'ignore').decode('ascii')
72
+ text = re.sub(r'\s+', ' ', text)
73
  return text.strip()
74
 
75
  def validate_url(self, url: str) -> Dict:
 
166
  class FileProcessor:
167
  """Class to handle file processing"""
168
 
169
+ def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024):
170
  self.max_file_size = max_file_size
171
  self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
172
 
 
175
  try:
176
  mime_type, _ = mimetypes.guess_type(filepath)
177
  return (mime_type and mime_type.startswith('text/')) or \
178
+ (Path(filepath).suffix.lower() in self.supported_text_extensions)
179
  except Exception:
180
  return False
181
 
 
220
  "source": "file",
221
  "filename": filename,
222
  "content": content,
223
+ "timestamp": datetime.now().isoformat()
224
  })
225
  except Exception as e:
226
  logger.error(f"Error reading file {filename}: {str(e)}")
 
259
  logger.error(f"File processing error: {e}")
260
  return []
261
 
262
+ @staticmethod
263
  def clean_json(data: Union[str, Dict]) -> Optional[Dict]:
264
  """Clean and validate JSON data"""
265
  try:
 
276
  logger.error(f"Unexpected error while cleaning JSON: {e}")
277
  return None
278
 
279
+ @staticmethod
280
  def generate_qr_code(data: Union[str, Dict], combined: bool = True) -> List[str]:
281
  """Generate QR code(s) from data"""
282
  try:
 
284
  output_dir.mkdir(parents=True, exist_ok=True)
285
 
286
  if combined:
287
+ cleaned_data = FileProcessor.clean_json(data)
288
  if cleaned_data:
289
  qr = qrcode.QRCode(
290
  version=None,
 
301
  img.save(str(output_path))
302
  return [str(output_path)]
303
  else:
304
+ paths = []
305
  if isinstance(data, list):
 
306
  for idx, item in enumerate(data):
307
+ cleaned_item = FileProcessor.clean_json(item)
308
  if cleaned_item:
309
  qr = qrcode.QRCode(
310
  version=None,
 
320
  output_path = output_dir / f'item_{idx}_qr_{int(time.time())}.png'
321
  img.save(str(output_path))
322
  paths.append(str(output_path))
 
323
  else:
324
+ cleaned_item = FileProcessor.clean_json(data)
325
  if cleaned_item:
326
  qr = qrcode.QRCode(
327
  version=None,
 
336
  img = qr.make_image(fill_color="black", back_color="white")
337
  output_path = output_dir / f'single_qr_{int(time.time())}.png'
338
  img.save(str(output_path))
339
+ paths.append(str(output_path))
340
+ return paths
 
341
  except Exception as e:
342
  logger.error(f"QR generation error: {e}")
343
  return []
344
 
345
+ def decode_qr(image_path: str) -> List[str]:
346
+ """Decode QR code from image file"""
347
+ try:
348
+ image = Image.open(image_path)
349
+ decoded_objects = decode(image)
350
+ return [obj.data.decode('utf-8') for obj in decoded_objects]
351
+ except Exception as e:
352
+ logger.error(f"QR decoding error: {e}")
353
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
 
355
  def datachat_trained(data_input: str, query: str) -> str:
356
  """Handle trained data interaction logic"""
357
+ data = FileProcessor.clean_json(data_input)
358
  if not data:
359
  return "Invalid JSON data provided."
360
  return f"[Trained Mode]\nData: {json.dumps(data, indent=2)}\nQuery: {query}"
361
 
362
  def datachat_simple(data_input: str, query: str) -> str:
363
  """Handle simple chat interaction logic"""
364
+ data = FileProcessor.clean_json(data_input)
365
  if not data:
366
  return "Invalid JSON data provided."
367
  return f"[Chat Mode]\nData: {json.dumps(data, indent=2)}\nQuestion: {query}"
 
369
  def datachat_interface(mode: str, data_source: str, json_input: str, qr_image: str, query: str) -> str:
370
  """Interface for DataChat functionality"""
371
  data = None
 
372
  if data_source == "JSON Input":
373
  data = json_input
374
  elif data_source == "QR Code":
375
+ decoded_data = decode_qr(qr_image)
376
+ if decoded_data:
377
+ data = decoded_data[0]
378
+ else:
379
+ return "Invalid QR code data provided"
380
  else:
381
  return "No valid data source selected."
382
 
 
405
  json_input = gr.Textbox(lines=8, label="JSON Data")
406
  qr_image = gr.Image(label="QR Code Image", type="filepath")
407
  query = gr.Textbox(label="Query")
 
408
  submit_btn = gr.Button("Submit")
409
  output = gr.Textbox(label="Response")
 
410
  submit_btn.click(datachat_interface, [mode, data_source, json_input, qr_image, query], output)
411
 
412
  with gr.Tab("QR Generator"):
413
  qr_input = gr.Textbox(lines=8, label="Input JSON for QR")
414
  generate_btn = gr.Button("Generate QR")
415
  qr_output = gr.Image(label="Generated QR Code")
416
+ generate_btn.click(
417
+ lambda x: FileProcessor.generate_qr_code(x)[0] if x else None,
418
+ inputs=qr_input,
419
+ outputs=qr_output
420
+ )
421
+
422
+ with gr.Tab("QR Decoder"):
423
+ qr_upload = gr.Image(label="Upload QR Code", type="filepath")
424
+ decode_btn = gr.Button("Decode QR")
425
+ decoded_output = gr.Textbox(label="Decoded Data")
426
+ decode_btn.click(
427
+ lambda x: "\n".join(decode_qr(x)),
428
+ inputs=qr_upload,
429
+ outputs=decoded_output
430
+ )
431
 
432
  return interface
433
 
434
  def main():
435
  mimetypes.init()
436
  Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
437
+ interface = create_interface()
438
+ interface.launch()
 
 
439
 
440
  if __name__ == "__main__":
441
+ main()