acecalisto3 commited on
Commit
328f861
·
verified ·
1 Parent(s): 484ed5e

Update app2.py

Browse files
Files changed (1) hide show
  1. app2.py +140 -50
app2.py CHANGED
@@ -18,7 +18,6 @@ import gradio as gr
18
  from bs4 import BeautifulSoup
19
  from fake_useragent import UserAgent
20
  from cleantext import clean
21
- import qrcode # Added missing import
22
 
23
  # Setup logging
24
  logging.basicConfig(
@@ -34,10 +33,22 @@ logger = logging.getLogger(__name__)
34
  # Ensure output directories exist
35
  Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
36
 
 
 
 
 
 
37
  class URLProcessor:
38
  def __init__(self):
39
  self.session = requests.Session()
40
- self.timeout = 10 # seconds
 
 
 
 
 
 
 
41
  self.session.headers.update({
42
  'User-Agent': UserAgent().random,
43
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
@@ -46,6 +57,55 @@ class URLProcessor:
46
  'Connection': 'keep-alive',
47
  'Upgrade-Insecure-Requests': '1'
48
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  def advanced_text_cleaning(self, text: str) -> str:
51
  """Robust text cleaning with version compatibility"""
@@ -67,7 +127,7 @@ class URLProcessor:
67
  return cleaned_text
68
  except Exception as e:
69
  logger.warning(f"Text cleaning error: {e}. Using fallback method.")
70
- text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text) # Fixed regex
71
  text = text.encode('ascii', 'ignore').decode('ascii')
72
  text = re.sub(r'\s+', ' ', text)
73
  return text.strip()
@@ -166,7 +226,7 @@ class URLProcessor:
166
  class FileProcessor:
167
  """Class to handle file processing"""
168
 
169
- def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024):
170
  self.max_file_size = max_file_size
171
  self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
172
 
@@ -175,7 +235,7 @@ class FileProcessor:
175
  try:
176
  mime_type, _ = mimetypes.guess_type(filepath)
177
  return (mime_type and mime_type.startswith('text/')) or \
178
- (Path(filepath).suffix.lower() in self.supported_text_extensions)
179
  except Exception:
180
  return False
181
 
@@ -220,7 +280,7 @@ class FileProcessor:
220
  "source": "file",
221
  "filename": filename,
222
  "content": content,
223
- "timestamp": datetime.now().isoformat()
224
  })
225
  except Exception as e:
226
  logger.error(f"Error reading file {filename}: {str(e)}")
@@ -259,7 +319,6 @@ class FileProcessor:
259
  logger.error(f"File processing error: {e}")
260
  return []
261
 
262
- @staticmethod
263
  def clean_json(data: Union[str, Dict]) -> Optional[Dict]:
264
  """Clean and validate JSON data"""
265
  try:
@@ -276,7 +335,6 @@ class FileProcessor:
276
  logger.error(f"Unexpected error while cleaning JSON: {e}")
277
  return None
278
 
279
- @staticmethod
280
  def generate_qr_code(data: Union[str, Dict], combined: bool = True) -> List[str]:
281
  """Generate QR code(s) from data"""
282
  try:
@@ -284,7 +342,7 @@ class FileProcessor:
284
  output_dir.mkdir(parents=True, exist_ok=True)
285
 
286
  if combined:
287
- cleaned_data = FileProcessor.clean_json(data)
288
  if cleaned_data:
289
  qr = qrcode.QRCode(
290
  version=None,
@@ -301,10 +359,10 @@ class FileProcessor:
301
  img.save(str(output_path))
302
  return [str(output_path)]
303
  else:
304
- paths = []
305
  if isinstance(data, list):
 
306
  for idx, item in enumerate(data):
307
- cleaned_item = FileProcessor.clean_json(item)
308
  if cleaned_item:
309
  qr = qrcode.QRCode(
310
  version=None,
@@ -320,8 +378,9 @@ class FileProcessor:
320
  output_path = output_dir / f'item_{idx}_qr_{int(time.time())}.png'
321
  img.save(str(output_path))
322
  paths.append(str(output_path))
 
323
  else:
324
- cleaned_item = FileProcessor.clean_json(data)
325
  if cleaned_item:
326
  qr = qrcode.QRCode(
327
  version=None,
@@ -333,35 +392,66 @@ class FileProcessor:
333
  qr.add_data(json_str)
334
  qr.make(fit=True)
335
 
336
- img = qr.make_image(fill_color="black", back_color="white")
337
  output_path = output_dir / f'single_qr_{int(time.time())}.png'
338
  img.save(str(output_path))
339
- paths.append(str(output_path))
340
- return paths
 
341
  except Exception as e:
342
  logger.error(f"QR generation error: {e}")
343
  return []
344
 
345
- def decode_qr(image_path: str) -> List[str]:
346
- """Decode QR code from image file"""
347
- try:
348
- image = Image.open(image_path)
349
- decoded_objects = decode(image)
350
- return [obj.data.decode('utf-8') for obj in decoded_objects]
351
- except Exception as e:
352
- logger.error(f"QR decoding error: {e}")
353
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
 
355
  def datachat_trained(data_input: str, query: str) -> str:
356
  """Handle trained data interaction logic"""
357
- data = FileProcessor.clean_json(data_input)
358
  if not data:
359
  return "Invalid JSON data provided."
360
  return f"[Trained Mode]\nData: {json.dumps(data, indent=2)}\nQuery: {query}"
361
 
362
  def datachat_simple(data_input: str, query: str) -> str:
363
  """Handle simple chat interaction logic"""
364
- data = FileProcessor.clean_json(data_input)
365
  if not data:
366
  return "Invalid JSON data provided."
367
  return f"[Chat Mode]\nData: {json.dumps(data, indent=2)}\nQuestion: {query}"
@@ -369,14 +459,15 @@ def datachat_simple(data_input: str, query: str) -> str:
369
  def datachat_interface(mode: str, data_source: str, json_input: str, qr_image: str, query: str) -> str:
370
  """Interface for DataChat functionality"""
371
  data = None
 
372
  if data_source == "JSON Input":
373
  data = json_input
374
  elif data_source == "QR Code":
375
- decoded_data = decode_qr(qr_image)
376
- if decoded_data:
377
- data = decoded_data[0]
378
- else:
379
- return "Invalid QR code data provided"
380
  else:
381
  return "No valid data source selected."
382
 
@@ -405,29 +496,24 @@ def create_interface():
405
  json_input = gr.Textbox(lines=8, label="JSON Data")
406
  qr_image = gr.Image(label="QR Code Image", type="filepath")
407
  query = gr.Textbox(label="Query")
 
408
  submit_btn = gr.Button("Submit")
409
  output = gr.Textbox(label="Response")
 
410
  submit_btn.click(datachat_interface, [mode, data_source, json_input, qr_image, query], output)
411
 
412
  with gr.Tab("QR Generator"):
413
  qr_input = gr.Textbox(lines=8, label="Input JSON for QR")
414
  generate_btn = gr.Button("Generate QR")
415
  qr_output = gr.Image(label="Generated QR Code")
416
- generate_btn.click(
417
- lambda x: FileProcessor.generate_qr_code(x)[0] if x else None,
418
- inputs=qr_input,
419
- outputs=qr_output
420
- )
421
-
422
- with gr.Tab("QR Decoder"):
423
- qr_upload = gr.Image(label="Upload QR Code", type="filepath")
424
- decode_btn = gr.Button("Decode QR")
425
- decoded_output = gr.Textbox(label="Decoded Data")
426
- decode_btn.click(
427
- lambda x: "\n".join(decode_qr(x)),
428
- inputs=qr_upload,
429
- outputs=decoded_output
430
- )
431
 
432
  return interface
433
 
@@ -435,7 +521,11 @@ def main():
435
  mimetypes.init()
436
  Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
437
  interface = create_interface()
438
- interface.launch()
439
-
440
- if __name__ == "__main__":
441
- main()
 
 
 
 
 
18
  from bs4 import BeautifulSoup
19
  from fake_useragent import UserAgent
20
  from cleantext import clean
 
21
 
22
  # Setup logging
23
  logging.basicConfig(
 
33
  # Ensure output directories exist
34
  Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
35
 
36
+ # At the top of the file, remove these imports:
37
+ # from config import Config
38
+ # from proxy_handler import ProxyHandler
39
+ # from robots_handler import RobotsHandler
40
+
41
  class URLProcessor:
42
  def __init__(self):
43
  self.session = requests.Session()
44
+ self.timeout = 10
45
+ self.max_retries = 3
46
+ self.request_delay = 1.0
47
+ self.respect_robots = True
48
+ self.use_proxy = False
49
+ self.proxy_url = None
50
+
51
+ # Update session headers
52
  self.session.headers.update({
53
  'User-Agent': UserAgent().random,
54
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
 
57
  'Connection': 'keep-alive',
58
  'Upgrade-Insecure-Requests': '1'
59
  })
60
+
61
+ if self.use_proxy and self.proxy_url:
62
+ self.session.proxies = {
63
+ 'http': self.proxy_url,
64
+ 'https': self.proxy_url
65
+ }
66
+
67
+ def check_robots_txt(self, url: str) -> bool:
68
+ """Check if URL is allowed by robots.txt"""
69
+ if not self.respect_robots:
70
+ return True
71
+
72
+ try:
73
+ from urllib.parse import urlparse
74
+ from urllib.robotparser import RobotFileParser
75
+
76
+ parsed_url = urlparse(url)
77
+ robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt"
78
+
79
+ rp = RobotFileParser()
80
+ rp.set_url(robots_url)
81
+ rp.read()
82
+
83
+ return rp.can_fetch(self.session.headers['User-Agent'], url)
84
+ except Exception as e:
85
+ logger.warning(f"Error checking robots.txt: {e}")
86
+ return True
87
+
88
+ def fetch_content(self, url: str) -> Optional[Dict]:
89
+ """Fetch content with built-in rate limiting and robots.txt checking"""
90
+ if not self.check_robots_txt(url):
91
+ logger.warning(f"URL {url} is disallowed by robots.txt")
92
+ return None
93
+
94
+ time.sleep(self.request_delay) # Basic rate limiting
95
+
96
+ for attempt in range(self.max_retries):
97
+ try:
98
+ if 'drive.google.com' in url:
99
+ return self._handle_google_drive(url)
100
+ if 'calendar.google.com' in url:
101
+ return self._handle_google_calendar(url)
102
+ return self._fetch_html_content(url)
103
+ except Exception as e:
104
+ logger.error(f"Attempt {attempt + 1} failed: {e}")
105
+ if attempt < self.max_retries - 1:
106
+ time.sleep(self.request_delay * (attempt + 1))
107
+
108
+ return None
109
 
110
  def advanced_text_cleaning(self, text: str) -> str:
111
  """Robust text cleaning with version compatibility"""
 
127
  return cleaned_text
128
  except Exception as e:
129
  logger.warning(f"Text cleaning error: {e}. Using fallback method.")
130
+ text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text)
131
  text = text.encode('ascii', 'ignore').decode('ascii')
132
  text = re.sub(r'\s+', ' ', text)
133
  return text.strip()
 
226
  class FileProcessor:
227
  """Class to handle file processing"""
228
 
229
+ def __init__(self, max_file_size: int = 2 * 1024 * 1024 * 1024): # 2GB default
230
  self.max_file_size = max_file_size
231
  self.supported_text_extensions = {'.txt', '.md', '.csv', '.json', '.xml'}
232
 
 
235
  try:
236
  mime_type, _ = mimetypes.guess_type(filepath)
237
  return (mime_type and mime_type.startswith('text/')) or \
238
+ (os.path.splitext(filepath)[1].lower() in self.supported_text_extensions)
239
  except Exception:
240
  return False
241
 
 
280
  "source": "file",
281
  "filename": filename,
282
  "content": content,
283
+ "timestamp": datetime.now ().isoformat()
284
  })
285
  except Exception as e:
286
  logger.error(f"Error reading file {filename}: {str(e)}")
 
319
  logger.error(f"File processing error: {e}")
320
  return []
321
 
 
322
  def clean_json(data: Union[str, Dict]) -> Optional[Dict]:
323
  """Clean and validate JSON data"""
324
  try:
 
335
  logger.error(f"Unexpected error while cleaning JSON: {e}")
336
  return None
337
 
 
338
  def generate_qr_code(data: Union[str, Dict], combined: bool = True) -> List[str]:
339
  """Generate QR code(s) from data"""
340
  try:
 
342
  output_dir.mkdir(parents=True, exist_ok=True)
343
 
344
  if combined:
345
+ cleaned_data = clean_json(data)
346
  if cleaned_data:
347
  qr = qrcode.QRCode(
348
  version=None,
 
359
  img.save(str(output_path))
360
  return [str(output_path)]
361
  else:
 
362
  if isinstance(data, list):
363
+ paths = []
364
  for idx, item in enumerate(data):
365
+ cleaned_item = clean_json(item)
366
  if cleaned_item:
367
  qr = qrcode.QRCode(
368
  version=None,
 
378
  output_path = output_dir / f'item_{idx}_qr_{int(time.time())}.png'
379
  img.save(str(output_path))
380
  paths.append(str(output_path))
381
+ return paths
382
  else:
383
+ cleaned_item = clean_json(data)
384
  if cleaned_item:
385
  qr = qrcode.QRCode(
386
  version=None,
 
392
  qr.add_data(json_str)
393
  qr.make(fit=True)
394
 
395
+ img = qrcode.make_image(fill_color="black", back_color="white")
396
  output_path = output_dir / f'single_qr_{int(time.time())}.png'
397
  img.save(str(output_path))
398
+ return [str(output_path)]
399
+
400
+ return []
401
  except Exception as e:
402
  logger.error(f"QR generation error: {e}")
403
  return []
404
 
405
+ def decode_qr_code(image_path: str) -> Optional[str]:
406
+ """Decode QR code from an image file using ZXing"""
407
+ try:
408
+ reader = zxing.BarCodeReader()
409
+ result = reader.decode(image_path)
410
+
411
+ if result and result.parsed:
412
+ return result.parsed
413
+ logger.warning("No QR code found in image")
414
+ return None
415
+ except Exception as e:
416
+ logger.error(f"QR decoding error: {e}")
417
+ return None
418
+
419
+ def decode_qr(image) -> List[str]:
420
+ """Decode all QR codes found in an image using ZXing"""
421
+ try:
422
+ if isinstance(image, str):
423
+ image_path = image
424
+ else:
425
+ # Save temporary image if input is not a path
426
+ with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
427
+ Image.fromarray(image).save(tmp.name)
428
+ image_path = tmp.name
429
+
430
+ reader = zxing.BarCodeReader()
431
+ result = reader.decode(image_path)
432
+
433
+ if result and result.parsed:
434
+ return [result.parsed]
435
+ return []
436
+ except Exception as e:
437
+ logger.error(f"QR decoding error: {e}")
438
+ return []
439
+
440
+ raise ValueError("Unable to decode QR code")
441
+ except Exception as e:
442
+ logger.error(f"QR decoding error: {e}")
443
+ return None, None # Return None for both data and resolution in case of error
444
 
445
  def datachat_trained(data_input: str, query: str) -> str:
446
  """Handle trained data interaction logic"""
447
+ data = clean_json(data_input)
448
  if not data:
449
  return "Invalid JSON data provided."
450
  return f"[Trained Mode]\nData: {json.dumps(data, indent=2)}\nQuery: {query}"
451
 
452
  def datachat_simple(data_input: str, query: str) -> str:
453
  """Handle simple chat interaction logic"""
454
+ data = clean_json(data_input)
455
  if not data:
456
  return "Invalid JSON data provided."
457
  return f"[Chat Mode]\nData: {json.dumps(data, indent=2)}\nQuestion: {query}"
 
459
  def datachat_interface(mode: str, data_source: str, json_input: str, qr_image: str, query: str) -> str:
460
  """Interface for DataChat functionality"""
461
  data = None
462
+ resolution = None # Initialize resolution variable
463
  if data_source == "JSON Input":
464
  data = json_input
465
  elif data_source == "QR Code":
466
+ try:
467
+ decoded_data, resolution = decode_qr_code(qr_image) # Get both data and resolution
468
+ data = decoded_data
469
+ except Exception as e:
470
+ return f"Invalid QR code data provided: {e}"
471
  else:
472
  return "No valid data source selected."
473
 
 
496
  json_input = gr.Textbox(lines=8, label="JSON Data")
497
  qr_image = gr.Image(label="QR Code Image", type="filepath")
498
  query = gr.Textbox(label="Query")
499
+
500
  submit_btn = gr.Button("Submit")
501
  output = gr.Textbox(label="Response")
502
+
503
  submit_btn.click(datachat_interface, [mode, data_source, json_input, qr_image, query], output)
504
 
505
  with gr.Tab("QR Generator"):
506
  qr_input = gr.Textbox(lines=8, label="Input JSON for QR")
507
  generate_btn = gr.Button("Generate QR")
508
  qr_output = gr.Image(label="Generated QR Code")
509
+
510
+ def generate_qr(json_data):
511
+ data = clean_json(json_data)
512
+ if data:
513
+ return generate_qr_code(data)
514
+ return None
515
+
516
+ generate_btn.click(generate_qr, qr_input, qr_output)
 
 
 
 
 
 
 
517
 
518
  return interface
519
 
 
521
  mimetypes.init()
522
  Path('output/qr_codes').mkdir(parents=True, exist_ok=True)
523
  interface = create_interface()
524
+ interface.launch(
525
+ server_name="0.0.0.0",
526
+ server_port=7860,
527
+ show_error=True,
528
+ share=False,
529
+ inbrowser=True,
530
+ debug=True
531
+ )