acecalisto3 commited on
Commit
1d25250
·
verified ·
1 Parent(s): 3f2b354

Update app2.py

Browse files
Files changed (1) hide show
  1. app2.py +229 -3
app2.py CHANGED
@@ -457,7 +457,7 @@ def datachat_interface(mode: str, data_source: str, json_input: str, qr_image: s
457
  data = json_input
458
  elif data_source == "QR Code":
459
  try:
460
- decoded_data = decode_qr_code(qr_image) # Updated to use new function
461
  data = decoded_data
462
  if not data:
463
  return "No QR code found in the provided image."
@@ -485,6 +485,231 @@ def create_interface():
485
  with gr.Blocks(css=css, title="Advanced Data Processor & QR Code Generator") as interface:
486
  gr.Markdown("# 🌐 Advanced Data Processing & QR Code Generator")
487
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
  with gr.Tab("DataChat"):
489
  mode = gr.Radio(["Trained with Data", "Chat about Data"], label="Mode")
490
  data_source = gr.Radio(["JSON Input", "QR Code"], label="Data Source")
@@ -497,15 +722,16 @@ def create_interface():
497
 
498
  submit_btn.click(datachat_interface, [mode, data_source, json_input, qr_image, query], output)
499
 
 
500
  with gr.Tab("QR Generator"):
501
  qr_input = gr.Textbox(lines=8, label="Input JSON for QR")
502
  generate_btn = gr.Button("Generate QR")
503
  qr_output = gr.Image(label="Generated QR Code")
504
 
505
  def generate_qr(json_data):
506
- data = clean_json(json_data)
507
  if data:
508
- return generate_qr_code(data)
509
  return None
510
 
511
  generate_btn.click(generate_qr, qr_input, qr_output)
 
457
  data = json_input
458
  elif data_source == "QR Code":
459
  try:
460
+ decoded_data = decode_qr_code(qr_image)
461
  data = decoded_data
462
  if not data:
463
  return "No QR code found in the provided image."
 
485
  with gr.Blocks(css=css, title="Advanced Data Processor & QR Code Generator") as interface:
486
  gr.Markdown("# 🌐 Advanced Data Processing & QR Code Generator")
487
 
488
+ # URL Extraction Tab
489
+ with gr.Tab("URL Extraction"):
490
+ url_input = gr.Textbox(label="URL to Process", placeholder="https://example.com")
491
+ depth_slider = gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Crawl Depth (Higher values may affect performance)")
492
+ respect_robots = gr.Checkbox(label="Respect robots.txt", value=True)
493
+ extract_btn = gr.Button("Extract Content")
494
+ url_output = gr.JSON(label="Extracted Data")
495
+ download_btn = gr.Button("Download Results as ZIP")
496
+ download_output = gr.File(label="Download")
497
+
498
+ # Warning about depth
499
+ gr.Markdown("""
500
+ <div class="warning">
501
+ ⚠️ <strong>Warning:</strong> Higher depth values (>2) may significantly increase processing time and resource usage.
502
+ </div>
503
+ """)
504
+
505
+ # URL processor instance
506
+ url_processor = URLProcessor()
507
+
508
+ def process_url(url, depth, respect_robots):
509
+ url_processor.respect_robots = respect_robots
510
+ results = []
511
+ try:
512
+ # Validate URL
513
+ validation = url_processor.validate_url(url)
514
+ if not validation['is_valid']:
515
+ return {"error": validation['message']}
516
+
517
+ # Process with depth
518
+ processed_urls = set()
519
+ urls_to_process = [(url, 0)] # (url, current_depth)
520
+
521
+ while urls_to_process:
522
+ current_url, current_depth = urls_to_process.pop(0)
523
+
524
+ if current_url in processed_urls:
525
+ continue
526
+
527
+ processed_urls.add(current_url)
528
+ content = url_processor.fetch_content(current_url)
529
+
530
+ if content:
531
+ results.append({
532
+ "url": current_url,
533
+ "content": content.get('content', ''),
534
+ "content_type": content.get('content_type', ''),
535
+ "timestamp": datetime.now().isoformat()
536
+ })
537
+
538
+ # If we haven't reached max depth, extract and queue more URLs
539
+ if current_depth < depth:
540
+ soup = BeautifulSoup(content.get('content', ''), 'html.parser')
541
+ for link in soup.find_all('a', href=True):
542
+ next_url = link['href']
543
+ if next_url.startswith('/'):
544
+ # Convert relative URL to absolute
545
+ from urllib.parse import urlparse, urljoin
546
+ parsed_url = urlparse(current_url)
547
+ base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
548
+ next_url = urljoin(base_url, next_url)
549
+
550
+ if validators.url(next_url) and next_url not in processed_urls:
551
+ urls_to_process.append((next_url, current_depth + 1))
552
+
553
+ return results
554
+ except Exception as e:
555
+ logger.error(f"URL processing error: {e}")
556
+ return {"error": str(e)}
557
+
558
+ def create_download_zip(results):
559
+ if not results or (isinstance(results, dict) and 'error' in results):
560
+ return None
561
+
562
+ try:
563
+ # Create a temporary zip file
564
+ with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as tmp:
565
+ with zipfile.ZipFile(tmp.name, 'w') as zipf:
566
+ # Add JSON data
567
+ zipf.writestr('extracted_data.json', json.dumps(results, indent=2))
568
+
569
+ # Add individual text files for each URL
570
+ for idx, item in enumerate(results):
571
+ if 'content' in item:
572
+ zipf.writestr(f'content_{idx}_{int(time.time())}.txt', item['content'])
573
+
574
+ return tmp.name
575
+ except Exception as e:
576
+ logger.error(f"Error creating ZIP file: {e}")
577
+ return None
578
+
579
+ extract_btn.click(process_url, [url_input, depth_slider, respect_robots], url_output)
580
+ download_btn.click(create_download_zip, [url_output], download_output)
581
+
582
+ # ZIP File Extractor Tab
583
+ with gr.Tab("ZIP File Extractor"):
584
+ zip_file_input = gr.File(label="Upload ZIP File")
585
+ extract_zip_btn = gr.Button("Extract and Process")
586
+ zip_output = gr.JSON(label="Extracted Data")
587
+ zip_qr_btn = gr.Button("Generate QR Code")
588
+ zip_qr_output = gr.Image(label="QR Code")
589
+
590
+ file_processor = FileProcessor()
591
+
592
+ def process_zip_file(file):
593
+ if not file:
594
+ return {"error": "No file uploaded"}
595
+
596
+ try:
597
+ results = file_processor.process_file(file)
598
+ return results
599
+ except Exception as e:
600
+ logger.error(f"ZIP processing error: {e}")
601
+ return {"error": str(e)}
602
+
603
+ def generate_zip_qr(data):
604
+ if not data or (isinstance(data, dict) and 'error' in data):
605
+ return None
606
+
607
+ try:
608
+ return file_processor.generate_qr_code(data, combined=True)[0]
609
+ except Exception as e:
610
+ logger.error(f"QR generation error: {e}")
611
+ return None
612
+
613
+ extract_zip_btn.click(process_zip_file, [zip_file_input], zip_output)
614
+ zip_qr_btn.click(generate_zip_qr, [zip_output], zip_qr_output)
615
+
616
+ # Raw Text to JSON Tab
617
+ with gr.Tab("Text to JSON"):
618
+ text_input = gr.Textbox(lines=10, label="Raw Text Input")
619
+ json_structure = gr.Dropdown(
620
+ choices=["Simple", "Structured", "Key-Value Pairs"],
621
+ label="JSON Structure",
622
+ value="Simple"
623
+ )
624
+ convert_btn = gr.Button("Convert to JSON")
625
+ json_output = gr.JSON(label="JSON Output")
626
+ combine_json_btn = gr.Button("Combine with Previous JSON")
627
+ previous_json = gr.Textbox(lines=5, label="Previous JSON (Optional)")
628
+ combined_output = gr.JSON(label="Combined JSON")
629
+ text_qr_btn = gr.Button("Generate QR Code")
630
+ text_qr_output = gr.Image(label="QR Code")
631
+
632
+ def convert_text_to_json(text, structure):
633
+ if not text.strip():
634
+ return {"error": "No text provided"}
635
+
636
+ try:
637
+ if structure == "Simple":
638
+ return {
639
+ "text": text,
640
+ "timestamp": datetime.now().isoformat()
641
+ }
642
+ elif structure == "Structured":
643
+ lines = text.split('\n')
644
+ paragraphs = []
645
+ current_para = []
646
+
647
+ for line in lines:
648
+ if line.strip():
649
+ current_para.append(line)
650
+ elif current_para:
651
+ paragraphs.append(' '.join(current_para))
652
+ current_para = []
653
+
654
+ if current_para:
655
+ paragraphs.append(' '.join(current_para))
656
+
657
+ return {
658
+ "title": paragraphs[0] if paragraphs else "",
659
+ "paragraphs": paragraphs[1:] if len(paragraphs) > 1 else [],
660
+ "timestamp": datetime.now().isoformat()
661
+ }
662
+ elif structure == "Key-Value Pairs":
663
+ pairs = {}
664
+ lines = text.split('\n')
665
+
666
+ for line in lines:
667
+ if ':' in line:
668
+ key, value = line.split(':', 1)
669
+ pairs[key.strip()] = value.strip()
670
+
671
+ pairs["timestamp"] = datetime.now().isoformat()
672
+ return pairs
673
+
674
+ return {"error": "Invalid structure selected"}
675
+ except Exception as e:
676
+ logger.error(f"Text to JSON conversion error: {e}")
677
+ return {"error": str(e)}
678
+
679
+ def combine_json_data(current, previous):
680
+ if not current or (isinstance(current, dict) and 'error' in current):
681
+ return {"error": "No valid current JSON"}
682
+
683
+ try:
684
+ if not previous.strip():
685
+ return current
686
+
687
+ prev_json = json.loads(previous)
688
+
689
+ # Determine how to combine based on types
690
+ if isinstance(prev_json, list) and isinstance(current, list):
691
+ return prev_json + current
692
+ elif isinstance(prev_json, list):
693
+ return prev_json + [current]
694
+ elif isinstance(current, list):
695
+ return [prev_json] + current
696
+ else:
697
+ # Both are objects, merge them
698
+ combined = {**prev_json, **current}
699
+ # Add a combined timestamp
700
+ combined["combined_timestamp"] = datetime.now().isoformat()
701
+ return combined
702
+ except json.JSONDecodeError:
703
+ return {"error": "Previous JSON is invalid"}
704
+ except Exception as e:
705
+ logger.error(f"JSON combination error: {e}")
706
+ return {"error": str(e)}
707
+
708
+ convert_btn.click(convert_text_to_json, [text_input, json_structure], json_output)
709
+ combine_json_btn.click(combine_json_data, [json_output, previous_json], combined_output)
710
+ text_qr_btn.click(generate_zip_qr, [json_output], text_qr_output)
711
+
712
+ # DataChat Tab (existing)
713
  with gr.Tab("DataChat"):
714
  mode = gr.Radio(["Trained with Data", "Chat about Data"], label="Mode")
715
  data_source = gr.Radio(["JSON Input", "QR Code"], label="Data Source")
 
722
 
723
  submit_btn.click(datachat_interface, [mode, data_source, json_input, qr_image, query], output)
724
 
725
+ # QR Generator Tab (existing)
726
  with gr.Tab("QR Generator"):
727
  qr_input = gr.Textbox(lines=8, label="Input JSON for QR")
728
  generate_btn = gr.Button("Generate QR")
729
  qr_output = gr.Image(label="Generated QR Code")
730
 
731
  def generate_qr(json_data):
732
+ data = file_processor.clean_json(json_data)
733
  if data:
734
+ return file_processor.generate_qr_code(data)
735
  return None
736
 
737
  generate_btn.click(generate_qr, qr_input, qr_output)