acecalisto3 commited on
Commit
da4162d
Β·
verified Β·
1 Parent(s): b80ce43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -119
app.py CHANGED
@@ -308,8 +308,11 @@ class EnhancedFileProcessor:
308
  if file_info.file_size > 0 and not file_info.filename.endswith('/'):
309
  extracted_path = extract_to / file_info.filename
310
  if extracted_path.suffix.lower() in self.supported_extensions:
311
- with open(extracted_path, 'rb') as f:
312
- dataset.extend(self._process_single_file(f))
 
 
 
313
  # Handle TAR archives
314
  elif archive_path.lower().endswith(('.tar', '.tar.gz', '.tgz')):
315
  try:
@@ -317,10 +320,13 @@ class EnhancedFileProcessor:
317
  for member in tar_ref.getmembers():
318
  if member.isfile():
319
  extracted_path = extract_to / member.name
320
- tar_ref.extract(member, path=extract_to)
321
- if extracted_path.suffix.lower() in self.supported_extensions:
322
- with open(extracted_path, 'rb') as f:
323
- dataset.extend(self._process_single_file(f))
 
 
 
324
  except tarfile.TarError as e:
325
  logger.error(f"Error processing TAR archive: {e}")
326
  # Handle GZIP archives (single file)
@@ -606,7 +612,7 @@ def create_qr_sequence_visualizer(output_gallery):
606
  qr_visualization.update(value=composite_image)
607
 
608
  # Event handlers
609
- visualize_btn.click(process_qr_codes, inputs=qr_input, outputs=[visualization_status, qr_visualization, qr_preview])
610
  reset_btn.click(lambda: (None, None, None, "⚠️ Visualization reset."), outputs=[visualization_status, qr_visualization, qr_preview])
611
 
612
  # Integrate the visualizer into the main application
@@ -779,120 +785,120 @@ def create_modern_interface():
779
  return ""
780
 
781
  def process_inputs(urls, files, text, combine):
782
- """Process all inputs and generate QR codes"""
783
- try:
784
- results = []
785
- url_processor = EnhancedURLProcessor()
786
- file_processor = EnhancedFileProcessor()
787
 
788
- # Process JSON input
789
- if text and text.strip():
790
- try:
791
- json_data = json.loads(text)
792
- if isinstance(json_data, list):
793
- results.extend(json_data)
794
- else:
795
- results.append(json_data)
796
- except json.JSONDecodeError as e:
797
- return None, [], f"❌ Invalid JSON format: {str(e)}"
798
-
799
- # Process URLs
800
- if urls and urls.strip():
801
- url_list = re.split(r'[,\n]', urls)
802
- url_list = [url.strip() for url in url_list if url.strip()]
803
- for url in url_list:
804
- validation = url_processor.validate_url(url)
805
- if validation['is_valid']:
806
- content = url_processor.fetch_content(url)
807
- if content:
808
- results.append({
809
- 'source': 'url',
810
- 'url': url,
811
- 'content': content,
812
- 'timestamp': datetime.now().isoformat()
813
- })
814
-
815
- # Process files
816
- if files:
817
- for file in files:
818
- file_results = file_processor.process_file(file)
819
- if file_results:
820
- results.extend(file_results)
821
-
822
- # Generate QR codes
823
- if results:
824
- qr_paths = generate_qr_codes(results, combine)
825
- if qr_paths:
826
- return (
827
- results,
828
- [str(path) for path in qr_paths],
829
- f"βœ… Successfully processed {len(results)} items and generated {len(qr_paths)} QR codes!"
830
- )
831
- else:
832
- return None, [], "❌ Failed to generate QR codes"
833
- else:
834
- return None, [], "⚠️ No valid content to process"
835
- except Exception as e:
836
- logger.error(f"Processing error: {e}")
837
- return None, [], f"❌ Error: {str(e)}"
838
-
839
- # Set up event handlers
840
- example_btn.click(load_example, outputs=[text_input])
841
- clear_btn.click(clear_input, outputs=[text_input])
842
- process_btn.click(
843
- process_inputs,
844
- inputs=[url_input, file_input, text_input, combine_data],
845
- outputs=[output_json, output_gallery, output_text]
846
- )
847
 
848
- # Add the visualization button and its click event within the interface scope
849
- visualize_btn = gr.Button("πŸ” Visualize QR Codes")
850
- visualize_btn.click(visualize_qr_codes, inputs=output_gallery, outputs=None)
851
 
852
- # Add helpful documentation
853
- gr.Markdown("""
854
- ### πŸš€ Features
855
- - **Complete URL Scraping**: Extracts every character from web pages
856
- - **Advanced File Processing**: Full content extraction from various text-based files and common archives. Supports flexible JSON handling.
857
- - **Smart JSON Handling**: Processes any size JSON with automatic chunking, either via direct input or file upload.
858
- - **Sequential QR Codes**: Maintains data integrity across multiple codes
859
- - **Modern Design**: Clean, responsive interface with visual feedback
860
- ### πŸ’‘ Tips
861
- 1. **URLs**: Enter multiple URLs separated by commas or newlines
862
- 2. **Files**: Upload any type of file. The processor will attempt to handle supported text-based files, archives (.zip, .tar, .gz), and JSON files.
863
- 3. **JSON**: Use the example button to see the expected format or upload a .json file. The system will also try to detect JSON content in other file types.
864
- 4. **QR Codes**: Choose whether to combine data into sequential codes
865
- 5. **Processing**: Monitor the status for real-time feedback
866
- ### 🎨 Output
867
- - Generated QR codes are saved in the `output/qr_codes` directory
868
- - Each QR code contains metadata for proper sequencing
869
- - Hover over QR codes in the gallery to see details
870
- """)
871
- return interface
 
 
 
 
 
 
872
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
873
  def main():
874
- """Initialize and launch the application"""
875
- try:
876
- # Configure system settings
877
- mimetypes.init()
878
-
879
- # Create and launch interface
880
- interface = create_modern_interface()
881
-
882
- # Add the QR sequence visualizer tab
883
- with interface:
884
- create_qr_sequence_visualizer(None) # output_gallery might not be relevant here
885
-
886
- # Launch with configuration
887
- interface.launch(
888
- share=False,
889
- debug=False,
890
- show_error=True,
891
- show_api=False
892
- )
893
- except Exception as e:
894
- logger.error(f"Application startup error: {e}")
895
- raise
896
-
897
- if __name__ == "__main__":
898
- main()
 
308
  if file_info.file_size > 0 and not file_info.filename.endswith('/'):
309
  extracted_path = extract_to / file_info.filename
310
  if extracted_path.suffix.lower() in self.supported_extensions:
311
+ try:
312
+ with open(extracted_path, 'rb') as f:
313
+ dataset.extend(self._process_single_file(f))
314
+ except Exception as e:
315
+ logger.error(f"Error processing extracted file {extracted_path}: {e}")
316
  # Handle TAR archives
317
  elif archive_path.lower().endswith(('.tar', '.tar.gz', '.tgz')):
318
  try:
 
320
  for member in tar_ref.getmembers():
321
  if member.isfile():
322
  extracted_path = extract_to / member.name
323
+ try:
324
+ tar_ref.extract(member, path=extract_to)
325
+ if extracted_path.suffix.lower() in self.supported_extensions:
326
+ with open(extracted_path, 'rb') as f:
327
+ dataset.extend(self._process_single_file(f))
328
+ except Exception as e:
329
+ logger.error(f"Error extracting or processing TAR member {member.name}: {e}")
330
  except tarfile.TarError as e:
331
  logger.error(f"Error processing TAR archive: {e}")
332
  # Handle GZIP archives (single file)
 
612
  qr_visualization.update(value=composite_image)
613
 
614
  # Event handlers
615
+ visualize_btn.click(process_qr_codes, inputs=qr_input, outputs=[visualization_status, qr_paths, qr_preview])
616
  reset_btn.click(lambda: (None, None, None, "⚠️ Visualization reset."), outputs=[visualization_status, qr_visualization, qr_preview])
617
 
618
  # Integrate the visualizer into the main application
 
785
  return ""
786
 
787
  def process_inputs(urls, files, text, combine):
788
+ """Process all inputs and generate QR codes"""
789
+ try:
790
+ results = []
791
+ url_processor = EnhancedURLProcessor()
792
+ file_processor = EnhancedFileProcessor()
793
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
794
 
 
 
 
795
 
796
+ # Process JSON input
797
+ if text and text.strip():
798
+ try:
799
+ json_data = json.loads(text)
800
+ if isinstance(json_data, list):
801
+ results.extend(json_data)
802
+ else:
803
+ results.append(json_data)
804
+ except json.JSONDecodeError as e:
805
+ return None, [], f"❌ Invalid JSON format: {str(e)}"
806
+
807
+ # Process URLs
808
+ if urls and urls.strip():
809
+ url_list = re.split(r'[,\n]', urls)
810
+ url_list = [url.strip() for url in url_list if url.strip()]
811
+ for url in url_list:
812
+ validation = url_processor.validate_url(url)
813
+ if validation['is_valid']:
814
+ content = url_processor.fetch_content(url)
815
+ if content:
816
+ results.append({
817
+ 'source': 'url',
818
+ 'url': url,
819
+ 'content': content,
820
+ 'timestamp': datetime.now().isoformat()
821
+ })
822
 
823
+ # Process files
824
+ if files:
825
+ for file in files:
826
+ file_results = file_processor.process_file(file)
827
+ if file_results:
828
+ results.extend(file_results)
829
+
830
+ # Generate QR codes
831
+ if results:
832
+ qr_paths = generate_qr_codes(results, combine)
833
+ if qr_paths:
834
+ return (
835
+ results,
836
+ [str(path) for path in qr_paths],
837
+ f"βœ… Successfully processed {len(results)} items and generated {len(qr_paths)} QR codes!"
838
+ )
839
+ else:
840
+ return None, [], "❌ Failed to generate QR codes"
841
+ else:
842
+ return None, [], "⚠️ No valid content to process"
843
+ except Exception as e:
844
+ logger.error(f"Processing error: {e}")
845
+ return None, [], f"❌ Error: {str(e)}"
846
+
847
+ # Set up event handlers
848
+ example_btn.click(load_example, outputs=[text_input])
849
+ clear_btn.click(clear_input, outputs=[text_input])
850
+ process_btn.click(
851
+ process_inputs,
852
+ inputs=[url_input, file_input, text_input, combine_data],
853
+ outputs=[output_json, output_gallery, output_text]
854
+ )
855
+
856
+ # Add the visualization button and its click event within the interface scope
857
+ #visualize_btn = gr.Button("πŸ” Visualize QR Codes")
858
+ #visualize_btn.click(visualize_qr_codes, inputs=output_gallery, outputs=None)
859
+
860
+ # Add helpful documentation
861
+ gr.Markdown("""
862
+ ### πŸš€ Features
863
+ - **Complete URL Scraping**: Extracts every character from web pages
864
+ - **Advanced File Processing**: Full content extraction from various text-based files and common archives. Supports flexible JSON handling.
865
+ - **Smart JSON Handling**: Processes any size JSON with automatic chunking, either via direct input or file upload.
866
+ - **Sequential QR Codes**: Maintains data integrity across multiple codes
867
+ - **Modern Design**: Clean, responsive interface with visual feedback
868
+ ### πŸ’‘ Tips
869
+ 1. **URLs**: Enter multiple URLs separated by commas or newlines
870
+ 2. **Files**: Upload any type of file. The processor will attempt to handle supported text-based files, archives (.zip, .tar, .gz), and JSON files.
871
+ 3. **JSON**: Use the example button to see the expected format or upload a .json file. The system will also try to detect JSON content in other file types.
872
+ 4. **QR Codes**: Choose whether to combine data into sequential codes
873
+ 5. **Processing**: Monitor the status for real-time feedback
874
+ ### 🎨 Output
875
+ - Generated QR codes are saved in the `output/qr_codes` directory
876
+ - Each QR code contains metadata for proper sequencing
877
+ - Hover over QR codes in the gallery to see details
878
+ """)
879
+ return interface
880
  def main():
881
+ """Initialize and launch the application"""
882
+ try:
883
+ # Configure system settings
884
+ mimetypes.init()
885
+
886
+ # Create and launch interface
887
+ interface = create_modern_interface()
888
+
889
+ # Add the QR sequence visualizer tab
890
+ #with interface:
891
+ create_qr_sequence_visualizer(None) # output_gallery might not be relevant here
892
+
893
+ # Launch with configuration
894
+ interface.launch(
895
+ share=False,
896
+ debug=False,
897
+ show_error=True,
898
+ show_api=False
899
+ )
900
+ except Exception as e:
901
+ logger.error(f"Application startup error: {e}")
902
+ raise
903
+ if name == "main":
904
+ main()