acecalisto3 commited on
Commit
17fdb3b
·
verified ·
1 Parent(s): 32a4701

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +289 -1
app.py CHANGED
@@ -517,4 +517,292 @@ class FileProcessor:
517
  file_size = os.path.getsize(file.name)
518
  if file_size > self.max_file_size:
519
  logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
520
- ret
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517
  file_size = os.path.getsize(file.name)
518
  if file_size > self.max_file_size:
519
  logger.warning(f"File size ({file_size} bytes) exceeds maximum allowed size")
520
+ return []
521
+ with tempfile.TemporaryDirectory() as temp_dir:
522
+ if zipfile.is_zipfile(file.name):
523
+ dataset.extend(self._process_zip_file(file.name, temp_dir))
524
+ else:
525
+ dataset.extend(self._process_single_file(file))
526
+ except Exception as e:
527
+ logger.error(f"Error processing file: {str(e)}")
528
+ return []
529
+ return dataset
530
+
531
+ def _process_zip_file(self, zip_path, temp_dir):
532
+ """Extract and process files within a ZIP archive."""
533
+ result = []
534
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
535
+ zip_ref.extractall(temp_dir)
536
+ for extracted_file in os.listdir(temp_dir):
537
+ extracted_file_path = os.path.join(temp_dir, extracted_file)
538
+ if os.path.isfile(extracted_file_path):
539
+ with open(extracted_file_path, 'r', encoding='utf-8', errors='ignore') as f:
540
+ result.append({
541
+ 'source': 'file_from_zip',
542
+ 'filename': extracted_file,
543
+ 'content': f.read(),
544
+ 'timestamp': datetime.now().isoformat()
545
+ })
546
+ return result
547
+
548
+ def _process_single_file(self, file) -> List[Dict]:
549
+ try:
550
+ file_stat = os.stat(file.name)
551
+ # For very large files, read in chunks and summarize
552
+ if file_stat.st_size > 100 * 1024 * 1024: # 100MB
553
+ logger.info(f"Processing large file: {file.name} ({file_stat.st_size} bytes)")
554
+ # Read first and last 1MB for extremely large files
555
+ content = ""
556
+ with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
557
+ content = f.read(1 * 1024 * 1024) # First 1MB
558
+ content += "\n...[Content truncated due to large file size]...\n"
559
+ # Seek to the last 1MB
560
+ f.seek(max(0, file_stat.st_size - 1 * 1024 * 1024))
561
+ content += f.read() # Last 1MB
562
+ else:
563
+ # Regular file processing
564
+ with open(file.name, 'r', encoding='utf-8', errors='ignore') as f:
565
+ content = f.read()
566
+ return [{
567
+ 'source': 'file',
568
+ 'filename': os.path.basename(file.name),
569
+ 'file_size': file_stat.st_size,
570
+ 'mime_type': mimetypes.guess_type(file.name)[0],
571
+ 'created': datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
572
+ 'modified': datetime.fromtimestamp(file_stat.st_mtime).isoformat(),
573
+ 'content': content,
574
+ 'timestamp': datetime.now().isoformat()
575
+ }]
576
+ except Exception as e:
577
+ logger.error(f"File processing error: {e}")
578
+ return []
579
+
580
+
581
+ # Move process_all_inputs outside of the FileProcessor class
582
+ def process_all_inputs(urls, file, text, notes):
583
+ """Process all input types with progress tracking"""
584
+ try:
585
+ processor = URLProcessor()
586
+ file_processor = FileProcessor()
587
+ results = []
588
+
589
+ # Process URLs
590
+ if urls:
591
+ url_list = re.split(r'[,\n]', urls)
592
+ url_list = [url.strip() for url in url_list if url.strip()]
593
+
594
+ for url in url_list:
595
+ validation = processor.validate_url(url)
596
+ if validation.get('is_valid'):
597
+ content = processor.fetch_content(url)
598
+ if content:
599
+ results.append({
600
+ 'source': 'url',
601
+ 'url': url,
602
+ 'content': content,
603
+ 'timestamp': datetime.now().isoformat()
604
+ })
605
+ # Process files
606
+ if file:
607
+ results.extend(file_processor.process_file(file))
608
+ # Process text input
609
+ if text:
610
+ cleaned_text = processor.advanced_text_cleaning(text)
611
+ results.append({
612
+ 'source': 'direct_input',
613
+ 'content': cleaned_text,
614
+ 'timestamp': datetime.now().isoformat()
615
+ })
616
+ # Generate output
617
+ if results:
618
+ output_dir = Path('output') / datetime.now().strftime('%Y-%m-%d')
619
+ output_dir.mkdir(parents=True, exist_ok=True)
620
+ output_path = output_dir / f'processed_{int(time.time())}.json'
621
+
622
+ with open(output_path, 'w', encoding='utf-8') as f:
623
+ json.dump(results, f, ensure_ascii=False, indent=2)
624
+ summary = f"Processed {len(results)} items successfully!"
625
+ json_data = json.dumps(results, indent=2) # Prepare JSON for QR code
626
+ return str(output_path), summary, json_data # Return JSON for editor
627
+ else:
628
+ return None, "No valid content to process.", ""
629
+ except Exception as e:
630
+ logger.error(f"Processing error: {e}")
631
+ return None, f"Error: {str(e)}", ""
632
+
633
+
634
+ # Also move generate_qr_code outside of the FileProcessor class
635
+ def generate_qr_code(json_data):
636
+ """Generate QR code from JSON data and return the file path."""
637
+ if json_data:
638
+ return generate_qr(json_data)
639
+
640
+
641
+ # Move generate_qr outside of the FileProcessor class as well
642
+ def generate_qr(json_data):
643
+ """Generate QR code from JSON data and return the file path."""
644
+ try:
645
+ # Try first with automatic version selection
646
+ qr = qrcode.QRCode(
647
+ error_correction=qrcode.constants.ERROR_CORRECT_L,
648
+ box_size=10,
649
+ border=4,
650
+ )
651
+ qr.add_data(json_data)
652
+ qr.make(fit=True)
653
+
654
+ img = qrcode.make_image(fill_color="black", back_color="white")
655
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
656
+ img.save(temp_file.name)
657
+ return temp_file.name
658
+ except Exception as e:
659
+ # If the data is too large for a QR code
660
+ logger.error(f"QR generation error: {e}")
661
+
662
+ # Create a simple QR with error message
663
+ qr = qrcode.QRCode(
664
+ version=1,
665
+ error_correction=qrcode.constants.ERROR_CORRECT_L,
666
+ box_size=10,
667
+ border=4,
668
+ )
669
+ qr.add_data("Error: Data too large for QR code")
670
+ qr.make(fit=True)
671
+
672
+ img = qrcode.make_image(fill_color="black", back_color="white")
673
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
674
+ img.save(temp_file.name)
675
+ return temp_file.name
676
+
677
+
678
+ def create_interface():
679
+ """Create a comprehensive Gradio interface with advanced features"""
680
+ css = """
681
+ .container { max-width: 1200px; margin: auto; }
682
+ .warning { background-color: #fff3cd; color: #856404; }
683
+ .error { background-color: #f8d7da; color: #721c24; }
684
+ """
685
+ with gr.Blocks(css=css, title="Advanced Text & URL Processing") as interface:
686
+ gr.Markdown("# 🌐 Advanced URL & Text Processing Toolkit")
687
+
688
+ with gr.Tab("URL Processing"):
689
+ url_input = gr.Textbox(
690
+ label="Enter URLs (comma or newline separated)",
691
+ lines=5,
692
+ placeholder="https://example1.com\nhttps://example2.com"
693
+ )
694
+
695
+ with gr.Tab("File Input"):
696
+ file_input = gr.File(
697
+ label="Upload text file or ZIP archive",
698
+ file_types=[".txt", ".zip", ".md", ".csv", ".json", ".xml"]
699
+ )
700
+
701
+ with gr.Tab("Text Input"):
702
+ text_input = gr.Textbox(
703
+ label="Raw Text Input",
704
+ lines=5,
705
+ placeholder="Paste your text here..."
706
+ )
707
+
708
+ with gr.Tab("JSON Editor"):
709
+ json_editor = gr.Textbox(
710
+ label="JSON Editor",
711
+ lines=20,
712
+ placeholder="View and edit your JSON data here...",
713
+ interactive=True,
714
+ elem_id="json-editor" # Optional: for custom styling
715
+ )
716
+
717
+ with gr.Tab("Scratchpad"):
718
+ scratchpad = gr.Textbox(
719
+ label="Scratchpad",
720
+ lines=10,
721
+ placeholder="Quick notes or text collections...",
722
+ interactive=True
723
+ )
724
+
725
+ process_btn = gr.Button("Process Input", variant="primary")
726
+ qr_btn = gr.Button("Generate QR Code", variant="secondary")
727
+
728
+ output_text = gr.Textbox(label="Processing Results", interactive=False)
729
+ output_file = gr.File(label="Processed Output")
730
+ qr_output = gr.Image(label="QR Code", type="filepath") # To display the generated QR code
731
+
732
+ process_btn.click(
733
+ process_all_inputs,
734
+ inputs=[url_input, file_input, text_input, scratchpad],
735
+ outputs=[output_file, output_text, json_editor] # Update outputs to include JSON editor
736
+ )
737
+ qr_btn.click(
738
+ generate_qr_code,
739
+ inputs=json_editor,
740
+ outputs=qr_output
741
+ )
742
+ gr.Markdown("""
743
+ ### Usage Guidelines
744
+ - **URL Processing**: Enter valid HTTP/HTTPS URLs
745
+ - **File Input**: Upload text files or ZIP archives
746
+ - ** Text Input**: Direct text processing
747
+ - **JSON Editor**: View and edit your JSON data
748
+ - **Scratchpad**: Quick notes or text collections
749
+ - Advanced cleaning and validation included
750
+ """)
751
+ return interface
752
+
753
+
754
+ def check_network_connectivity():
755
+ """Check if the network is working properly by testing connection to common sites"""
756
+ test_sites = ["https://www.google.com", "https://www.cloudflare.com", "https://www.amazon.com"]
757
+ results = []
758
+
759
+ for site in test_sites:
760
+ try:
761
+ response = requests.get(site, timeout=5)
762
+ results.append({
763
+ "site": site,
764
+ "status": "OK" if response.status_code == 200 else f"Error: {response.status_code}",
765
+ "response_time": response.elapsed.total_seconds()
766
+ })
767
+ except Exception as e:
768
+ results.append({
769
+ "site": site,
770
+ "status": f"Error: {str(e)}",
771
+ "response_time": None
772
+ })
773
+ # If all sites failed, there might be a network issue
774
+ if all(result["status"].startswith("Error") for result in results):
775
+ logger.error("Network connectivity issue detected. All test sites failed.")
776
+ return False, results
777
+
778
+ return True, results
779
+
780
+
781
+ # Add this to the main function
782
+ def main():
783
+ # Configure system settings
784
+ mimetypes.init()
785
+
786
+ # Check network connectivity
787
+ network_ok, network_results = check_network_connectivity()
788
+ if not network_ok:
789
+ logger.warning("Network connectivity issues detected. Some features may not work properly.")
790
+ for result in network_results:
791
+ logger.warning(f"Test site {result['site']}: {result['status']}")
792
+ # Create and launch interface
793
+ interface = create_interface()
794
+
795
+ # Launch with proper configuration
796
+ interface.launch(
797
+ server_name="0.0.0.0",
798
+ server_port=7860,
799
+ show_error=True,
800
+ share=False,
801
+ inbrowser=True,
802
+ debug=True
803
+ )
804
+
805
+
806
+ if __name__ == "__main__":
807
+ main()
808
+